In [10]:
from neo4j import GraphDatabase
import pandas as pd

In [27]:
# Define the connection
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [28]:
conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="password")

In [144]:
# prepare the data
# We will be working with the IMDB Top 1000 Dataset from Kaggle: https://www.kaggle.com/datasets/bansodesandeep/imdb-top-1000-movies

data = pd.read_csv('imdb_top_1000.csv')
data.head(10)

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000
5,https://m.media-amazon.com/images/M/MV5BNzA5ZD...,The Lord of the Rings: The Return of the King,2003,U,201 min,"Action, Adventure, Drama",8.9,Gandalf and Aragorn lead the World of Men agai...,94.0,Peter Jackson,Elijah Wood,Viggo Mortensen,Ian McKellen,Orlando Bloom,1642758,377845905
6,https://m.media-amazon.com/images/M/MV5BNGNhMD...,Pulp Fiction,1994,A,154 min,"Crime, Drama",8.9,"The lives of two mob hitmen, a boxer, a gangst...",94.0,Quentin Tarantino,John Travolta,Uma Thurman,Samuel L. Jackson,Bruce Willis,1826188,107928762
7,https://m.media-amazon.com/images/M/MV5BNDE4OT...,Schindler's List,1993,A,195 min,"Biography, Drama, History",8.9,"In German-occupied Poland during World War II,...",94.0,Steven Spielberg,Liam Neeson,Ralph Fiennes,Ben Kingsley,Caroline Goodall,1213505,96898818
8,https://m.media-amazon.com/images/M/MV5BMjAxMz...,Inception,2010,UA,148 min,"Action, Adventure, Sci-Fi",8.8,A thief who steals corporate secrets through t...,74.0,Christopher Nolan,Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot Page,Ken Watanabe,2067042,292576195
9,https://m.media-amazon.com/images/M/MV5BMmEzNT...,Fight Club,1999,A,139 min,Drama,8.8,An insomniac office worker and a devil-may-car...,66.0,David Fincher,Brad Pitt,Edward Norton,Meat Loaf,Zach Grenier,1854740,37030102


In [145]:
# first we delete all existing data in the database
conn.query('MATCH (n) DETACH DELETE n')

[]

In [146]:
# create a list of unique person names and then create a node for each person
directors = data.Director.to_list()
star1 = data.Star1.to_list()
star2 = data.Star2.to_list()
star3 = data.Star3.to_list()
star4 = data.Star4.to_list()

person_unique = []

for person in (directors + star1 + star2 + star3 + star3):
    if person not in person_unique:
        person_unique.append(person.replace("'",""))

for person in person_unique:
    conn.query('CREATE (p:Person{{name:{0}}})'.format(repr(person)))

In [147]:
# create a node for each movie
for i,v in data.iterrows():
    conn.query('CREATE (p:Movie{{name:{0},year:{1},rating:{2}}})'.format(repr(v['Series_Title'].replace("'","")), repr(v['Released_Year']), repr(v['IMDB_Rating'])))

In [148]:
# create list of unique genres and create a node for each
genres_unique = []
for v in data.Genre.to_list():
    for w in v.split(','):
        if w.strip() not in genres_unique:
            genres_unique.append(w.strip())

for genre in genres_unique:
    conn.query('CREATE (p:Genre{{name:{0}}})'.format(repr(genre)))

In [149]:
# create relationships between movies and genres
for i,v in data.iterrows():
    for genre in v['Genre'].split(','):
        conn.query(f'''
        MATCH (m:Movie), (g:Genre)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and g.name = '{genre.strip()}'
        CREATE (m)-[t:IS_TYPE]->(g)
    ''')

In [150]:
# create relationships between movies and directors
for i, v in data.iterrows():
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Director'].replace("'","")}'
        CREATE (m)-[t:DIRECTED]->(p)
    ''')

In [151]:
# create relationships between movies and actors
for i, v in data.iterrows():
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Star1'].replace("'","")}'
        CREATE (m)-[t:STARED]->(p)
    ''')
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Star2'].replace("'","")}'
        CREATE (m)-[t:STARED]->(p)
    ''')
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Star3'].replace("'","")}'
        CREATE (m)-[t:STARED]->(p)
    ''')
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Star4'].replace("'","")}'
        CREATE (m)-[t:STARED]->(p)
    ''')

# EXAMPLE QUERIES

In [164]:
# getting all actors from the movie "Schindlers List"

query = """
    MATCH (m:Movie)-[d:STARED]->(p:Person)
    WHERE m.name = 'Schindlers List'
    RETURN m.name,p.name
"""

pd.DataFrame([dict(_) for _ in conn.query(query)])

Unnamed: 0,m.name,p.name
0,Schindlers List,Ben Kingsley
1,Schindlers List,Ralph Fiennes
2,Schindlers List,Liam Neeson


In [167]:
# getting all movies of genre "Drama"

query = """
    MATCH (g:Genre)<-[d:IS_TYPE]-(m:Movie)
    WHERE g.name = 'Drama'
    RETURN m.name
"""

pd.DataFrame([dict(_) for _ in conn.query(query)])

Unnamed: 0,m.name
0,Taare Zameen Par
1,Lifeboat
2,From Here to Eternity
3,Giant
4,Breakfast at Tiffanys
...,...
721,Kimi no na wa.
722,Dangal
723,Avengers: Endgame
724,Django Unchained


In [None]:
conn.close()