In [1]:
import pandas as pd

In [9]:
df = pd.read_csv(r"C:\Users\shiva\OneDrive\Desktop\Projects\LLM based Recommendation System\dataset\netflix_titles.csv")
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [10]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [32]:
def create_textual_representation(row):
    textual_representation = f"""
    Type: {row['type']},
    Title: {row['title']},
    Director: {row['director']},
    Cast: {row['cast']},
    Released: {row['release_year']},
    Genres: {row['listed_in']}
    Description: {row['description']}"""
    return textual_representation

In [33]:
df['textual_representation'] = df.apply(create_textual_representation, axis=1)

In [34]:
print(df['textual_representation'].values[0])


    Type: Movie,
    Title: Dick Johnson Is Dead,
    Director: Kirsten Johnson,
    Cast: nan,
    Released: 2020,
    Genres: Documentaries
    Description: As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable.


In [36]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [41]:
import faiss
import numpy as np

dim  = 384
index = faiss.IndexFlatL2(dim)

X = np.zeros((len(df['textual_representation']), dim), dtype='float32')


In [42]:
for i, representation in enumerate(df['textual_representation']):
    if i % 50 == 0:
        print('Processed', str(i), 'instances')
    
    embedding = model.encode(representation)
    X[i] = np.array(embedding)
    
index.add(X)

Processed 0 instances
Processed 50 instances
Processed 100 instances
Processed 150 instances
Processed 200 instances
Processed 250 instances
Processed 300 instances
Processed 350 instances
Processed 400 instances
Processed 450 instances
Processed 500 instances
Processed 550 instances
Processed 600 instances
Processed 650 instances
Processed 700 instances
Processed 750 instances
Processed 800 instances
Processed 850 instances
Processed 900 instances
Processed 950 instances
Processed 1000 instances
Processed 1050 instances
Processed 1100 instances
Processed 1150 instances
Processed 1200 instances
Processed 1250 instances
Processed 1300 instances
Processed 1350 instances
Processed 1400 instances
Processed 1450 instances
Processed 1500 instances
Processed 1550 instances
Processed 1600 instances
Processed 1650 instances
Processed 1700 instances
Processed 1750 instances
Processed 1800 instances
Processed 1850 instances
Processed 1900 instances
Processed 1950 instances
Processed 2000 instance

In [43]:
faiss.write_index(index, 'index')

In [44]:
index = faiss.read_index('index')

In [46]:
favorite_movie = df.iloc[1358]
print(favorite_movie['textual_representation'])


    Type: Movie,
    Title: Shutter Island,
    Director: Martin Scorsese,
    Cast: Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley, Max von Sydow, Michelle Williams, Emily Mortimer, Patricia Clarkson, Jackie Earle Haley, Ted Levine, John Carroll Lynch, Elias Koteas,
    Released: 2010,
    Genres: Thrillers
    Description: A U.S. marshal's troubling visions compromise his investigation into the disappearance of a patient from a hospital for the criminally insane.


In [51]:
favorite_movie_embedding = model.encode(favorite_movie['textual_representation'])
favorite_movie_embedding = np.array([favorite_movie_embedding], dtype='float32')

In [52]:
D, I = index.search(favorite_movie_embedding, 5)

In [55]:
for match in df['textual_representation'][I.flatten()]:
    print(match)


    Type: Movie,
    Title: Shutter Island,
    Director: Martin Scorsese,
    Cast: Leonardo DiCaprio, Mark Ruffalo, Ben Kingsley, Max von Sydow, Michelle Williams, Emily Mortimer, Patricia Clarkson, Jackie Earle Haley, Ted Levine, John Carroll Lynch, Elias Koteas,
    Released: 2010,
    Genres: Thrillers
    Description: A U.S. marshal's troubling visions compromise his investigation into the disappearance of a patient from a hospital for the criminally insane.

    Type: Movie,
    Title: Black Island,
    Director: Miguel Alexandre,
    Cast: Philip Froissant, Alice Dwyer, Hanns Zischler, Mercedes Müller, Sammy Scheuritzel, Lieselotte Voß, Katharina Schütz, Jodie Leslie Ahlborn, Marco Wittorf, Ilknur Boyraz,
    Released: 2021,
    Genres: International Movies, Thrillers
    Description: The dark secrets of a seemingly peaceful island threaten to swallow up an orphaned student when he grows close to a mysterious new teacher.

    Type: Movie,
    Title: Inception,
    Director: C