In [None]:
import pandas as pd
from neo4j import GraphDatabase as gdb
import weaviate as wev
import weaviate.classes.config as wc

In [None]:
movie_data = pd.read_csv('movie_data.csv', engine='python')


In [None]:
movie_data.head()

In [None]:
movie_data.isnull().sum()

In [None]:
movie_data.duplicated().sum()

In [None]:
movie_data.dropna(inplace=True)

In [None]:
movie_data.isnull().sum()

In [None]:
movie_data

In [None]:
movie_data = movie_data[['title', 'overview', 'genres', 'cast', 'poster_url', 'release_date']]

## Load csv data and populate  Neo4j

### Function to create a Neo4j driver

In [None]:
def create_driver(uri, username, password):
    driver = gdb.driver(uri, auth=(username, password))
    return driver

### Function to insert movies into the Neo4j database

In [None]:
def insert_movie(tx, movie):
    # cypher query to create a Movie node with it's properties
    query = """
    MERGE (m:Movie {title: $title})
    SET m.overview = $overview,
        m.release_date = $release_date,
        m.poster_url = $poster_url
    """
    tx.run(query, movie)

### Function to insert genres and their relationships with movies

In [None]:
def insert_genres(tx, title, genres):
    for genre in genres:
        query = """
            MERGE (g:Genre {name: $genre})
            WITH g
            MATCH (m:Movie {title: $title})
            MERGE (m)-[:BELONGS_TO]->(g)
        """
        tx.run(query, genre=genre, title=title)

### Function to insert cast members and their relationships with movies

In [None]:
def insert_cast(tx, title, cast):
    for actor in cast:
        query = """
            MERGE (p:Person {name: $actor})
            WITH p
            MATCH (m:Movie {title: $title})
            MERGE (p)-[:ACTED_IN]->(m)
        """
        tx.run(query, actor=actor, title=title)

### Function to populate the Neo4j database with movie data from a CSV

In [None]:
def populate_neo4j(uri, username, password, csv_file):
    # Load csv file
    df = pd.read_csv(csv_file, engine='python')

    # Create the Neo4j driver
    driver = create_driver(uri, username, password)

    # Use a session to execute write transactions
    with driver.session() as session:
        for index, row in df.iterrows():
            movie = {
                'title': row['title'],
                'overview': row['overview'],
                'release_date': row['release_date'],
                'poster_url': row['poster_url']
            }
            genres = eval(row['genres']) if isinstance(row['genres'], str) else []
            cast = eval(row['cast']) if isinstance(row['cast'], str) else []

            # Insert Movie node
            session.write_transaction(insert_movie, movie)

            # Insert genre relationship
            session.write_transaction(insert_genres, movie['title'], genres)

            # Insert Cast relationship
            session.write_transaction(insert_cast, movie['title'], cast)

    driver.close()

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

uri = os.getenv('NEO4J_URI')
username = os.getenv('NEO4J_USERNAME')
password = os.getenv('NEO4J_PASSWORD')
csv_file = 'movies.csv'

### Testing the connection to the neo4j password

In [None]:

def test_connection(uri, username, password):
    driver = gdb.driver(uri, auth=(username, password))
    try:
        with driver.session() as session:
            result = session.run("RETURN 'Connection successful!'")
            for record in result:
                print(record)
    except Exception as e:
        print(f"Error: {e}")
    finally:
        driver.close()

# Test connection
test_connection(uri, username, password)

### Calling the populate_neo4j function

In [None]:
# populate_neo4j(uri, username, password, csv_file)

### Querying for Movie Recommendations

In [None]:
from neo4j import GraphDatabase

def create_driver(uri, username, password):
    return GraphDatabase.driver(uri, auth=(username, password))

def close_driver(driver):
    driver.close()

In [None]:
# Retrieve movies similar to a given movie based on shared genres
def get_movie_recommendations(tx, title, limit=5):
    query = """
    MATCH (m:Movie {title: $title})-[:BELONGS_TO]->(g:Genre)<-[:BELONGS_TO]-(rec:Movie)
    WHERE m <> rec
    RETURN rec.title AS title, COUNT(g) AS genre_overlap
    ORDER BY genre_overlap DESC
    LIMIT $limit
    """
    result = tx.run(query, title=title, limit=limit)
    return [record['title'] for record in result]


In [None]:
def recommend_movies(uri, username, password, movie_title, limit=5):
    driver = create_driver(uri, username, password)
    recommendations = []

    with driver.session() as session:
        recommendations = session.read_transaction(get_movie_recommendations, movie_title, limit)
    close_driver(driver)
    return recommendations

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

uri = os.getenv('NEO4J_URI')
username = os.getenv('NEO4J_USERNAME')
password = os.getenv('NEO4J_PASSWORD')

In [None]:
movie_title = "Inside Out 2"
recommendations = recommend_movies(uri, username, password, movie_title, limit=5)

In [None]:
for i, movie in enumerate(recommendations, start=1):
    print(f'{i}. {movie}')

### Weaviate Database

In [None]:
weaviate_url = os.getenv('WCD_URL')
weaviate_api_key = os.getenv('API_KEY')
huggingFace_access_token = os.getenv('HuggingFace_AccessToken')

In [None]:
client = wev.connect_to_wcs(
    cluster_url=weaviate_url,
    auth_credentials=wev.auth.AuthApiKey(weaviate_api_key),
    headers={'X-Huggingface-Api-Key': huggingFace_access_token},
    skip_init_checks=True
    )

client.is_ready()

### Create a collection for movies

In [None]:
# Define the movie properties and configurations
movie_properties = [
    wc.Property(name="title", data_type=wc.DataType.TEXT),
    wc.Property(name="overview", data_type=wc.DataType.TEXT)
]

vectorizer_config = wc.Configure.Vectorizer.text2vec_huggingface()

vector_index_config = wc.Configure.VectorIndex.hnsw(
    distance_metric=wc.VectorDistances.COSINE
)

generative_config = wc.Configure.Generative.openai()

references = [
    wc.ReferenceProperty(name="hasReview", target_collection="Review")
]

if not client.collections.exists('Movie'):
    client.collections.create(
        name='Movie',
        properties=movie_properties,
        vectorizer_config=vectorizer_config,
        vector_index_config=vector_index_config,
        generative_config=generative_config,
        # references=references
    )
    print('Collection for Movie class created successfully.')
else:
    print('Collection for Movie class already exists.')



### Add data to the Movie collection

In [None]:
movies = client.collections.get('Movie')
data = pd.read_csv('movies.csv', engine='python')

#### 1. Single Insert

In [None]:
# movies.data.insert(
#     properties={'title': 'Twisters', 'overview': 'As storm season intensifies, the paths of form...'},
    #   references={'hasReview': 'jim_uuid'}
# )

#### 2. Insert many by passing them as a list

In [None]:
# movie_list = [{'title': row['title'], 'overview': row['overview']} for _, row in movie_data.iterrows()]
# movies.data.insert_many(movie)

# from weaviate.classes.data import DataObject

# movie_objs = [
#     DataObject(
#         properties={
#             'title': row['title'], 
#             'overview': row['overview']
#         },
#         references={
#             'hasReview': 'jim_uuid'
#         }
#     ) for _, row in movie_data.iterrows()
# ]

# movies.data.insert_many(movie_objs)

#### 3. Using Batch functions for large datasets

In [None]:
# use a context manager
# with movies.batch.dynamic() as batch:
#     for _, row in data.iterrows():
#         batch.add_object(
#             properties={
#                 'title': row['title'],
#                 'overview': row['overview']
#             }
#         )
#         if batch.number_errors > 1000:
#             print("More than 1000 errors encountered, stopping the batch process.")
#             break

# if len(movies.batch.failed_objects) > 0 or len(movies.batch.failed_references) > 0:
#     print("Some objects or references failed to insert.")
# else:
#     print("All objects were inserted successfully.")

## Querying data from the vector database

In [None]:
import weaviate.classes.query as wq


movies = client.collections.get('Movie')

# Near text search
response = movies.query.near_text(
    query='Twisters',
    limit=2
    # return_references=[wq.QueryReference(link_on='hasReview', return_properties=['username'])],
    # return_properties=['title', 'tagline'],
    # filters=(wq.Filter.by_property('runtime').less_than(100) & wq.Filter.by_property('runtime').greater_than(85))
)

response2 = movies.generate.near_text(
    query='Twisters',
    limit=4,
    single_prompt="Translate this into Portugese: {title}",
    grouped_task="What do these movies have in common?"
    
)

print(response2.generated)

for obj in response.objects:
    print(obj.properties, obj.uuid)
    # print(obj.references['hasReview'].objects[0].properties)
    print(obj.properties['title'])