In [10]:
from neo4j import GraphDatabase
import pandas as pd

In [27]:
# Define the connection
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [28]:
conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="password")

In [38]:
# prepare the data
# We will be working with the IMDB Top 1000 Dataset from Kaggle: https://www.kaggle.com/datasets/bansodesandeep/imdb-top-1000-movies

data = pd.read_csv('imdb_top_1000.csv')

In [39]:
data

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,https://m.media-amazon.com/images/M/MV5BNGEwMT...,Breakfast at Tiffany's,1961,A,115 min,"Comedy, Drama, Romance",7.6,A young New York socialite becomes interested ...,76.0,Blake Edwards,Audrey Hepburn,George Peppard,Patricia Neal,Buddy Ebsen,166544,
996,https://m.media-amazon.com/images/M/MV5BODk3Yj...,Giant,1956,G,201 min,"Drama, Western",7.6,Sprawling epic covering the life of a Texas ca...,84.0,George Stevens,Elizabeth Taylor,Rock Hudson,James Dean,Carroll Baker,34075,
997,https://m.media-amazon.com/images/M/MV5BM2U3Yz...,From Here to Eternity,1953,Passed,118 min,"Drama, Romance, War",7.6,"In Hawaii in 1941, a private is cruelly punish...",85.0,Fred Zinnemann,Burt Lancaster,Montgomery Clift,Deborah Kerr,Donna Reed,43374,30500000
998,https://m.media-amazon.com/images/M/MV5BZTBmMj...,Lifeboat,1944,,97 min,"Drama, War",7.6,Several survivors of a torpedoed merchant ship...,78.0,Alfred Hitchcock,Tallulah Bankhead,John Hodiak,Walter Slezak,William Bendix,26471,


In [137]:
# create a list of unique person names and then create a node for each person

directors = data.Director.to_list()
star1 = data.Star1.to_list()
star2 = data.Star2.to_list()
star3 = data.Star3.to_list()
star4 = data.Star4.to_list()

person_unique = []

for person in (directors + star1 + star2 + star3 + star3):
    if person not in person_unique:
        person_unique.append(person.replace("'",""))

for person in person_unique:
    conn.query('CREATE (p:Person{{name:{0}}})'.format(repr(person)))

In [139]:
person_unique

['Frank Darabont',
 'Francis Ford Coppola',
 'Christopher Nolan',
 'Sidney Lumet',
 'Peter Jackson',
 'Quentin Tarantino',
 'Steven Spielberg',
 'David Fincher',
 'Robert Zemeckis',
 'Sergio Leone',
 'Lana Wachowski',
 'Martin Scorsese',
 'Irvin Kershner',
 'Milos Forman',
 'Thomas Kail',
 'Bong Joon Ho',
 'Sudha Kongara',
 'Fernando Meirelles',
 'Hayao Miyazaki',
 'Roberto Benigni',
 'Jonathan Demme',
 'George Lucas',
 'Masaki Kobayashi',
 'Akira Kurosawa',
 'Frank Capra',
 'Todd Phillips',
 'Damien Chazelle',
 'Olivier Nakache',
 'Roman Polanski',
 'Ridley Scott',
 'Tony Kaye',
 'Bryan Singer',
 'Luc Besson',
 'Roger Allers',
 'James Cameron',
 'Giuseppe Tornatore',
 'Isao Takahata',
 'Alfred Hitchcock',
 'Michael Curtiz',
 'Charles Chaplin',
 'Nadine Labaki',
 'Can Ulkay',
 'Gayatri',
 'Makoto Shinkai',
 'Nitesh Tiwari',
 'Bob Persichetti',
 'Anthony Russo',
 'Lee Unkrich',
 'Rajkumar Hirani',
 'Aamir Khan',
 'Andrew Stanton',
 'Florian Henckel von Donnersmarck',
 'Chan-wook Park',


In [132]:
# create a node for each movie
for i,v in data.iterrows():
    conn.query('CREATE (p:Movie{{name:{0},year:{1},rating:{2}}})'.format(repr(v['Series_Title'].replace("'","")), repr(v['Released_Year']), repr(v['IMDB_Rating'])))

In [133]:
# create list of unique genres and create a node for each
genres_unique = []
for v in data.Genre.to_list():
    for w in v.split(','):
        if w.strip() not in genres_unique:
            genres_unique.append(w.strip())

for genre in genres_unique:
    conn.query('CREATE (p:Genre{{name:{0}}})'.format(repr(genre)))

In [85]:
# create a node for each genre
for genre in data.Genre.unique():
    conn.query('CREATE (p:Genre{{name:{0}}})'.format(repr(genre)))

In [135]:
# create relationships between movies and genres
for i,v in data.iterrows():
    for genre in v['Genre'].split(','):
        conn.query(f'''
        MATCH (m:Movie), (g:Genre)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and g.name = '{genre.strip()}'
        CREATE (m)-[t:IS_TYPE]->(g)
    ''')

In [140]:
# create relationships between movies and directors
for i, v in data.iterrows():
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Director'].replace("'","")}'
        CREATE (m)-[t:DIRECTED]->(p)
    ''')

In [141]:
# create relationships between movies and actors
for i, v in data.iterrows():
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Star1'].replace("'","")}'
        CREATE (m)-[t:STARED]->(p)
    ''')
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Star2'].replace("'","")}'
        CREATE (m)-[t:STARED]->(p)
    ''')
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Star3'].replace("'","")}'
        CREATE (m)-[t:STARED]->(p)
    ''')
    conn.query(f'''
        MATCH (m:Movie), (p:Person)
        WHERE m.name = '{v['Series_Title'].replace("'","")}' and p.name = '{v['Star4'].replace("'","")}'
        CREATE (m)-[t:STARED]->(p)
    ''')

In [29]:
conn.query("match(n) RETURN n")

[<Record n=<Node element_id='0' labels=frozenset({'Person'}) properties={}>>]

In [40]:
pd.DataFrame([dict(_) for _ in conn.query('match(n) RETURN n')])

Unnamed: 0,n
0,()


In [30]:
res = conn.query("match(n) RETURN n")

In [37]:
res[0].values()

[<Node element_id='0' labels=frozenset({'Person'}) properties={}>]

In [None]:
conn.close()