In [5]:
##Import Libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer


In [6]:
data={'movie_id':[1,2,3,4,5],
     'title':['The Matrix','john wick','The Godfather','Pulp Fiction','Avatar'],
     'genre':['action,sci-fi','action','crime,drama','Thriller','action,thriller,sci-fi']
}

In [7]:
# Convert the dataset into a DataFrame
df=pd.DataFrame(data)
df

Unnamed: 0,movie_id,title,genre
0,1,The Matrix,"action,sci-fi"
1,2,john wick,action
2,3,The Godfather,"crime,drama"
3,4,Pulp Fiction,Thriller
4,5,Avatar,"action,thriller,sci-fi"


In [8]:
## Define a TF-IDF vectorizer to trnasform the genre text into vectors
tfidf=TfidfVectorizer(stop_words='english')

In [10]:
## Fit and transform the genre column into a matrix of features
tfidf_matrix=tfidf.fit_transform(df['genre'])

In [12]:
# compute the cosine similarity matrix
cosine_sim=cosine_similarity(tfidf_matrix,tfidf_matrix)

In [14]:
# Function to recommend movies based on cosine similarity
def get_recommendations(title,cosine_sim=cosine_sim):
    
    # Get index of movie that matches the title
    idx=df[df['title']==title].index[0]

    # get the pairwise similarity scores of all movies with that movie
    sim_scores=list(enumerate(cosine_sim[idx]))

    # sort movies based on similarity scores
    sim_scores=sorted(sim_scores,key=lambda x:x[1],reverse=True)

    # Get the indices of the most 2 similar movies
    sim_scores=sim_scores[1:3]

    # Get the movie indices
    movie_indices=[i[0] for i in sim_scores]

    # Return the top 2 most similar movies
    return df['title'].iloc[movie_indices]

In [18]:
## test the recommendations system with an example
movie_title="Avatar"
recommend_movies=get_recommendations(movie_title)
print(f"Recommendations for '{movie_title}' :")
for movie in recommend_movies:
    print(movie)

Recommendations for 'Avatar' :
The Matrix
Pulp Fiction
