In [1]:
!pip install  scikit-learn

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [98]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

In [4]:
data=pd.read_csv('./imdb_top_1000.csv') # read the csv file
data.head()

Unnamed: 0,Poster_Link,Series_Title,Released_Year,Certificate,Runtime,Genre,IMDB_Rating,Overview,Meta_score,Director,Star1,Star2,Star3,Star4,No_of_Votes,Gross
0,https://m.media-amazon.com/images/M/MV5BMDFkYT...,The Shawshank Redemption,1994,A,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,80.0,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2343110,28341469
1,https://m.media-amazon.com/images/M/MV5BM2MyNj...,The Godfather,1972,A,175 min,"Crime, Drama",9.2,An organized crime dynasty's aging patriarch t...,100.0,Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1620367,134966411
2,https://m.media-amazon.com/images/M/MV5BMTMxNT...,The Dark Knight,2008,UA,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,84.0,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2303232,534858444
3,https://m.media-amazon.com/images/M/MV5BMWMwMG...,The Godfather: Part II,1974,A,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,90.0,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1129952,57300000
4,https://m.media-amazon.com/images/M/MV5BMWU4N2...,12 Angry Men,1957,U,96 min,"Crime, Drama",9.0,A jury holdout attempts to prevent a miscarria...,96.0,Sidney Lumet,Henry Fonda,Lee J. Cobb,Martin Balsam,John Fiedler,689845,4360000


In [79]:
# filter to select only the required columns
df=data[['Overview','Series_Title']]
df

Unnamed: 0,Overview,Series_Title
0,Two imprisoned men bond over a number of years...,The Shawshank Redemption
1,An organized crime dynasty's aging patriarch t...,The Godfather
2,When the menace known as the Joker wreaks havo...,The Dark Knight
3,The early life and career of Vito Corleone in ...,The Godfather: Part II
4,A jury holdout attempts to prevent a miscarria...,12 Angry Men
...,...,...
995,A young New York socialite becomes interested ...,Breakfast at Tiffany's
996,Sprawling epic covering the life of a Texas ca...,Giant
997,"In Hawaii in 1941, a private is cruelly punish...",From Here to Eternity
998,Several survivors of a torpedoed merchant ship...,Lifeboat


In [89]:
import nltk

# download the corpus
nltk.download('wordnet')

# import the lemmatizer
from nltk.stem import WordNetLemmatizer


def lemmatize_text(text):

  # initialize the lemmatizer
  wn_lemmatizer = WordNetLemmatizer()

  # split the text into words and lemmatize them
  lemmatized_text=[wn_lemmatizer.lemmatize(word) for word in text.split()]

  # join all the lemmatized words as a single text and return
  return ' '.join(lemmatized_text)


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [93]:
#replace any empty descriptions with ''.
df['Overview']=df['Overview'].fillna('')

# convert the descriptions into lower case
df['processed_overview']=df['Overview'].apply(lambda x:x.lower())

# lemmatize the processed descriptions
df['lemmatized_overview']=df['processed_overview'].apply(lambda x:lemmatize_text(x))

df

Unnamed: 0,Overview,Series_Title,processed_overview,lemmatized_overview
0,Two imprisoned men bond over a number of years...,The Shawshank Redemption,two imprisoned men bond over a number of years...,two imprisoned men bond over a number of years...
1,An organized crime dynasty's aging patriarch t...,The Godfather,an organized crime dynasty's aging patriarch t...,an organized crime dynasty's aging patriarch t...
2,When the menace known as the Joker wreaks havo...,The Dark Knight,when the menace known as the joker wreaks havo...,when the menace known a the joker wreaks havoc...
3,The early life and career of Vito Corleone in ...,The Godfather: Part II,the early life and career of vito corleone in ...,the early life and career of vito corleone in ...
4,A jury holdout attempts to prevent a miscarria...,12 Angry Men,a jury holdout attempts to prevent a miscarria...,a jury holdout attempt to prevent a miscarriag...
...,...,...,...,...
995,A young New York socialite becomes interested ...,Breakfast at Tiffany's,a young new york socialite becomes interested ...,a young new york socialite becomes interested ...
996,Sprawling epic covering the life of a Texas ca...,Giant,sprawling epic covering the life of a texas ca...,sprawling epic covering the life of a texas ca...
997,"In Hawaii in 1941, a private is cruelly punish...",From Here to Eternity,"in hawaii in 1941, a private is cruelly punish...","in hawaii in 1941, a private is cruelly punish..."
998,Several survivors of a torpedoed merchant ship...,Lifeboat,several survivors of a torpedoed merchant ship...,several survivor of a torpedoed merchant ship ...


In [95]:
def process_user_input(user_input):

    # convert to lower case
    user_input=user_input.lower()


    # lemmatize the user input
    lemmatized_user_input=lemmatize_text(user_input)

    return lemmatized_user_input

In [96]:
def get_recommendations(data, item_column, description_column, user_input, top_n=5):

    # initialize the TFIDFVectorizer
    tfidf = TfidfVectorizer(stop_words='english')

    #convert into numeric data using TFiDF vectorizer
    tfidf_matrix = tfidf.fit_transform(data['lemmatized_overview'])

    #process the user input
    processed_user_input= process_user_input(user_input)

    # vectorize the processed user input to numeric data
    user_tfidf = tfidf.transform([processed_user_input])

    #calculate the cosine similarity between user and descriptions in the dataset.
    cosine_sim = cosine_similarity(user_tfidf, tfidf_matrix)

    # pair the scores with their indices
    sim_scores = list(enumerate(cosine_sim[0]))

    # sort the scores based on similarity in descending order.
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # only return top n scores so that we can have n recommendations
    sim_scores = sim_scores[:top_n]

    # create indices for the recommendations
    item_indices = [i[0] for i in sim_scores]

    #create a dataframe to return the recommendations
    recommendations = pd.DataFrame(data.iloc[item_indices][[item_column, description_column]])
    return recommendations

In [100]:
def run():

  # prompt the user to give input
  user_input=input("Enter the description :\n\n")

  # call the recommendation system
  results=get_recommendations(df,'Series_Title','Overview','A masked vigilante protects a city infested with criminals', 5)

  return results

run()

Enter the description :

A masked vigilante fights to eradicate the crime in a city 


Unnamed: 0,Series_Title,Overview
648,The Boondock Saints,Two Irish Catholic brothers become vigilantes ...
125,M - Eine Stadt sucht einen Mörder,When the police in a German city are unable to...
665,Batman: Mask of the Phantasm,Batman is wrongly implicated in a series of mu...
126,Metropolis,In a futuristic city sharply divided between t...
930,Watchmen,"In 1985 where former superheroes exist, the mu..."
