# Recommender systems
- The objective is to recommend top 5 similar moves to the one we will consider

In [1]:
# Importing the basic libraries
"""
Cut-off year: 2020
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Importing the data

data = pd.read_csv('imdb_top_1000.csv')

In [3]:
data['Series_Title'] = data['Series_Title'].str.lower()

In [4]:
# Basic inspection of the data

#data.head(3)

In [5]:
#100*data.isnull().sum()/data.shape[0]

In [6]:
# Selecting the most relevant fields based on my understaning

cols = ['Series_Title', 'Genre', 'IMDB_Rating', 'Overview']
filtered_data = data[cols]

In [7]:
#filtered_data.head()

In [8]:
# Genre

vectorize = CountVectorizer()
genre = vectorize.fit_transform(filtered_data['Genre'])
genre = pd.DataFrame(genre.toarray(), columns=vectorize.get_feature_names())

In [9]:
#genre

In [10]:
# Overview

text_bert = SentenceTransformer('distilbert-base-nli-mean-tokens')
embeddings = text_bert.encode(filtered_data['Overview'], show_progress_bar=True)

2023-01-31 17:57:10.104 INFO    sentence_transformers.SentenceTransformer: Load pretrained SentenceTransformer: distilbert-base-nli-mean-tokens
2023-01-31 17:57:11.576 INFO    sentence_transformers.SentenceTransformer: Use pytorch device: cpu


Batches:   0%|          | 0/32 [00:00<?, ?it/s]

In [11]:
# Final data

final_data = pd.concat([filtered_data['Series_Title'], pd.DataFrame(embeddings), genre], axis = 1)

In [12]:
#final_data.head()

In [13]:
sim = cosine_similarity(final_data.iloc[:, 1:])

In [14]:
#sim

In [15]:
sim = pd.DataFrame(sim, columns = list(final_data['Series_Title']))

In [16]:
#sim

In [17]:
sim.index = list(final_data['Series_Title'])

In [18]:
#sim.head()

In [19]:
#sim.loc['The Shawshank Redemption'].sort_values(ascending=False)[1:6].reset_index()

In [20]:
def recommendations(movie_name):
    movie_name = movie_name.lower()
    recommendations = sim.loc[movie_name].sort_values(ascending=False)[1:6]
    recommendations = recommendations.reset_index()
    recommendations.columns = ['Series_Title', 'score']
    recommendations = pd.merge(recommendations, data[['Series_Title', 'Genre', 'IMDB_Rating', 'Director']], on='Series_Title')
    #print(data[data['Series_Title'] == movie_name][['Series_Title', 'Genre', 'IMDB_Rating', 'Director']])
    return recommendations['Series_Title'].iloc[:4]

In [21]:
#recommendations('The Godfather')

In [22]:
#recommendations('Taare Zameen Par')

In [23]:
#recommendations('The Lion King')

In [24]:
#recommendations('The Dark Knight')

In [25]:
#recommendations('Avatar')

In [26]:
#recommendations('The Terminator')

In [27]:
#recommendations('The Matrix')


In [28]:
def main():
    st.title("Recommendation System for Movies")
    html_temp = """
    <div style="background-color:tomato;padding:10px">
    <h2 style="color:white;text-align:center;">Recommendation System App</h2>
    </div>
    """

    st.markdown(html_temp, unsafe_allow_html=True)
    text_input = st.text_input("Enter the name of a Movie", "Type here")
    result = ""
    if st.button("Find Movies"):
        result = recommendations(text_input)
    st.success("The Movies are {}".format(list(result)))

In [29]:
if __name__ == '__main__':
    main()

2023-01-31 17:58:25.175 
  command:

    streamlit run c:\Users\mirmm\anaconda3_\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
