# PRÁCTICA NOSQL - SHAKESPEARE - PLACES_2


CASO DE PRUEBA:  EMPRESA VISITAS TEATRALIZADAS

In [1]:
# Instalar librerías pprintpp y py2neo
# pprintpp -  (pretty-print) mejora la legibilidad al mostrar estructuras de datos complejas.
# py2neo - facilita la creación y ejecución de consultas en Neo4j usando Python

!pip install pprintpp
!pip install py2neo



In [2]:
# Importación de clases de py2neo:
    # Graph, para representar la conexión con la base de datos.
    # Relationship, para modelar relaciones entre nodos en la base de datos.
    # Node, para representar nodos en la base de datos.

from py2neo import Graph, Relationship, Node

graph = Graph("http://neo4j:1234@neo4j:7474/db/data")

In [3]:
# Resetear notebook tras ejecuciones anteriores borrando los nodos y realaciones existentes en la base de datos.

graph.run("MATCH (n) DETACH DELETE n").evaluate()

In [8]:
# Comprobar ÍNDICES Y CONSTRAINTS creados:
graph.run("""
CALL db.indexes()
""").to_table()

#otra opción: graph.run("CALL db.indexes YIELD *").to_table()

id,name,state,populationPercent,uniqueness,type,entityType,labelsOrTypes,properties,provider
3,constraint_13f3da85,ONLINE,100.0,UNIQUE,BTREE,NODE,['Location'],['name'],native-btree-1.0
6,constraint_15c9d1c1,ONLINE,100.0,UNIQUE,BTREE,NODE,['Play'],['name'],native-btree-1.0
11,constraint_36fbcdfc,ONLINE,100.0,UNIQUE,BTREE,NODE,['Character'],['name'],native-btree-1.0
14,constraint_82051fbd,ONLINE,100.0,UNIQUE,BTREE,NODE,['City'],['name'],native-btree-1.0


In [9]:
import json
import requests
from py2neo import Graph, Node, Relationship

# URL del conjunto de datos
url = "https://raw.githubusercontent.com/rafaelgarrote/datahack-nosql/nosql-especial/workespecial/practica/data/shakespeare.json"

# Ruta del archivo local
local_file = "db_shakespeare.json"

# Realizar la solicitud HTTP
response = requests.get(url)

# Verificar si la solicitud fue exitosa (código de estado 200)
if response.status_code == 200:
    # Guardar los datos en un archivo local en modo "escritura"
    with open(local_file, "w") as file:
        file.write(response.text)
    
    print("Datos descargados exitosamente.")

    # Dividir las líneas y cargar cada línea como un objeto JSON
    data_list = [json.loads(line) for line in response.text.split('\n') if line]

    # Verificar si los índices ya existen antes de intentar crearlos
    if not graph.schema.get_indexes("City"):
        graph.run("CREATE INDEX ON :City(name)")

    if not graph.schema.get_indexes("Location"):
        graph.run("CREATE INDEX ON :Location(name)")

    if not graph.schema.get_indexes("Play"):
        graph.run("CREATE INDEX ON :Play(name)")
        
    if not graph.schema.get_indexes("Character"):
        graph.run("CREATE INDEX ON :Character(name)")
        
    # Agrega restricciones de unicidad si no existen
    if not graph.schema.get_uniqueness_constraints("City"):
        graph.run("DROP INDEX ON :City(name)")  # Elimina el índice existente
        graph.run("CREATE CONSTRAINT ON (city:City) ASSERT city.name IS UNIQUE")
    if not graph.schema.get_uniqueness_constraints("Location"):
        graph.run("DROP INDEX ON :Location(name)")  # Elimina el índice existente
        graph.run("CREATE CONSTRAINT ON (location:Location) ASSERT location.name IS UNIQUE")
    if not graph.schema.get_uniqueness_constraints("Play"):
        graph.run("DROP INDEX ON :Play(name)")  # Elimina el índice existente
        graph.run("CREATE CONSTRAINT ON (play:Play) ASSERT play.name IS UNIQUE")
    if not graph.schema.get_uniqueness_constraints("Character"):
        graph.run("DROP INDEX ON :Character(name)")  # Elimina el índice existente
        graph.run("CREATE CONSTRAINT ON (character:Character) ASSERT character.name IS UNIQUE")

    print("Índices y restricciones de unicidad creados exitosamente.")

    # Crea el grafo con los datos proporcionados
    for entry in data_list:
        if entry["type"] == "scene":
            location_split = entry["text_entry"].split(". ", 1)
            if len(location_split) == 2:
                city_and_location = location_split[1].split(". ", 1)
                if len(city_and_location) == 2:
                    city, location = city_and_location
                    #print(f"City: {city}, Location: {location}")

                city_node = Node("City", name=city)
                location_node = Node("Location", name=location)
                play_node = Node("Play", name=entry["play_name"])
                character_node = Node("Character", name=entry["speaker"])

                graph.merge(city_node, "City", "name")
                graph.merge(location_node, "Location", "name")
                graph.merge(play_node, "Play", "name")
                graph.merge(character_node, "Character", "name")
                
                play_city_rel = Relationship(play_node, "REPRESENTED_IN", city_node)
                city_location_rel = Relationship(city_node, "HAS_LOCATION", location_node)
                location_play_rel = Relationship(location_node, "LOCATION_OF", play_node)
                character_play_rel = Relationship(character_node, "APPEARS_IN", play_node)
                character_location_rel =Relationship(character_node, "IS_IN", location_node)
                
                graph.merge(play_city_rel)
                graph.merge(city_location_rel)
                graph.merge(location_play_rel)
                graph.merge(character_location_rel)
                graph.merge(character_play_rel)

    print("Grafo creado exitosamente.")
    
    # Imprimir los primeros 5 documentos JSON
    print("\nPrimeros 5 documentos JSON:")
    for i, entry in enumerate(data_list[:3], 1):
        print(f"\nDocumento {i}:\n{json.dumps(entry, indent=2)}")

else:
    print(f"Error al obtener datos. Código de estado: {response.status_code}")
    


Datos descargados exitosamente.
Índices y restricciones de unicidad creados exitosamente.
Grafo creado exitosamente.

Primeros 5 documentos JSON:

Documento 1:
{
  "type": "act",
  "line_id": 1,
  "play_name": "Henry IV",
  "speech_number": "",
  "line_number": "",
  "speaker": "",
  "text_entry": "ACT I"
}

Documento 2:
{
  "type": "scene",
  "line_id": 2,
  "play_name": "Henry IV",
  "speech_number": "",
  "line_number": "",
  "speaker": "",
  "text_entry": "SCENE I. London. The palace."
}

Documento 3:
{
  "type": "line",
  "line_id": 3,
  "play_name": "Henry IV",
  "speech_number": "",
  "line_number": "",
  "speaker": "",
  "text_entry": "Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR WALTER BLUNT, and others"
}


In [10]:
# Relaciones existentes entre los nodos:

graph.run("""
    MATCH (n)-[r]-(m)
    RETURN DISTINCT TYPE(r) AS RelationshipType
""").to_table()

RelationshipType
REPRESENTED_IN
HAS_LOCATION
IS_IN
LOCATION_OF
APPEARS_IN


In [None]:
# Nodos y relaciones existentes
graph.run("""
MATCH (n)
RETURN DISTINCT labels(n) AS Labels, keys(n) AS Keys, n
""").to_table()

In [None]:
# Consulta: Listado con las ciudades que aparecen en la obra de Shakespeare

graph.run("""
    MATCH (play:Play)-[:REPRESENTED_IN]->(city:City)
    RETURN play.name AS Play, COLLECT(city.name) AS Cities
    ORDER BY play.name
""").to_table()


In [None]:
# Consulta: Listado con los lugares que tiene cada una de las ciudades

graph.run("""
    MATCH (city:City)-[:HAS_LOCATION]->(location:Location)
    RETURN city.name AS City, COLLECT(location.name) AS Locations
    ORDER BY city.name
""").to_table()


#### ¡¡¡¡LIMPIAR DATOS PARA MEJORAR RESULTADOS: 'The same' no es una ciudad!!!!

In [None]:
# Consulta: Obras de Shakespeare en una ciudad específica (por ejemplo, Londres)
graph.run("""
    MATCH (city:City {name: 'London'})<-[:REPRESENTED_IN]-(play:Play)
    RETURN DISTINCT play.name AS Play
""").to_table()

In [None]:
# Consulta: Personajes que actuan en una obra concreta
graph.run("""
    MATCH (play:Play {name: 'Hamlet'})<-[:APPEARS_IN]-(character:Character)
    RETURN play.name AS Play, character.name AS Character
""").to_table()


In [None]:
# Consulta: Personajes que actuan en cada obra de Shakespeare
graph.run("""
    MATCH (play:Play)<-[:APPEARS_IN]-(character:Character)
    RETURN play.name AS Play, COLLECT(character.name) AS Character
    ORDER BY play.name
""").to_table()


In [None]:
# Consulta: Cada personaje de cada obra de Shakespeare y en qué localización aparece:
graph.run("""
    MATCH (character:Character)-[:APPEARS_IN]->(play:Play)
    OPTIONAL MATCH (play)<-[:REPRESENTED_IN]-(city:City)
    OPTIONAL MATCH (city)-[:HAS_LOCATION]->(location:Location)
    RETURN play.name AS Play, character.name AS Personaje, location.name AS Localizacion
    ORDER BY Play, Personaje, Localizacion
""").to_table()


In [None]:
graph.run("""
    MATCH (character:Character)-[:APPEARS_IN]->(play:Play)<-[:REPRESENTED_IN]-(city:City)-[:HAS_LOCATION]->(location:Location)
    WITH play, character, COLLECT(DISTINCT city.name + "/" + location.name) AS cities_locations
    RETURN play.name AS Play, character.name AS Personaje, cities_locations AS Cities_Localizacions
    ORDER BY Play, Personaje

""").to_table()

In [None]:
graph.run("MATCH (character:Character) RETURN character LIMIT 1")


In [None]:
graph.run("MATCH (play:Play) RETURN play LIMIT 1").to_table()

In [None]:
graph.run("MATCH (city:City) RETURN city LIMIT 1").to_table()

In [None]:
graph.run("MATCH (location:Location) RETURN location LIMIT 1").to_table()

In [None]:
graph.run("MATCH (character:Character)-[:APPEARS_IN]->(play:Play) RETURN character, play LIMIT 1").to_table()

In [None]:
graph.run("MATCH (play:Play)<-[:REPRESENTED_IN]-(city:City) RETURN play, city LIMIT 1").to_table()

In [None]:
graph.run("MATCH (city:City)-[:HAS_LOCATION]->(location:Location) RETURN city, location LIMIT 1").to_table()

In [None]:
graph.run("MATCH (:Play)<-[:REPRESENTED_IN]-(:City) RETURN COUNT(*) AS count").to_table()

In [None]:
graph.run("MATCH (p:Play) RETURN p LIMIT 1").to_table()

In [None]:
graph.run("MATCH (c:City) RETURN c LIMIT 1").to_table()

In [16]:
graph.run("""
    MATCH (character:Character)-[:APPEARS_IN]->(play:Play)
    WITH play, character
    MATCH (play)<-[:REPRESENTED_IN]-(city:City)-[:HAS_LOCATION]->(location:Location)
    WITH play, character, city, location
    RETURN play.name AS Play, character.name AS Personaje, COLLECT(DISTINCT city.name + "/" + location.name) AS Cities_Localizacions
    ORDER BY Play, Personaje;

""").to_table()
