# **Book Recommendation Sysytem 📚**

## Loading...

In [103]:
#packages

import numpy as np
import pandas as pd
import re
import logging

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import ipywidgets as widgets
from IPython.display import display

In [104]:
movies = pd.read_csv('movies.csv')
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
62418,209157,We (2018),Drama
62419,209159,Window of the Soul (2001),Documentary
62420,209163,Bad Poems (2018),Comedy|Drama
62421,209169,A Girl Thing (2001),(no genres listed)


## **Building Search Engine🚂**

### Cleaning the movie title

In [105]:
def clean_text(title:str):
    '''
    It cleans a text for system.
    Args: title-str
    Return: cleaned-str
    '''
    try:
        cleaned = re.sub("[^a-zA-Z0-9 ]", "", title)
        return cleaned
    except Exception as e:
        logging.error(e)

In [106]:
movies['cleaned_title'] = movies['title'].apply(clean_text)

### Creating a TF-IDF Matrix

In [107]:
#inilizing our transformer
transformer = TfidfVectorizer(ngram_range=(1,2))

#fit and transform the title
tfidf = transformer.fit_transform(movies['cleaned_title'])

### Creating a Search Function

In [108]:
def search(title:str):
    '''Take title with clean structer and transform into a vectors'''
    title = clean_text(title)
    vector = transformer.transform([title])
    similarity= cosine_similarity(vector, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices][::-1]
    return results

In [113]:
# movie matchs the same text
movie_input = widgets.Text(
    value = 'It returns a similar Movie',
    description = 'Movie Title')

movie_list = widgets.Output()

def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title=data["new"]
        if len(title) > 5:
            display(search(title))
            
movie_input.observe(on_type, names='value')
display(movie_input, movie_list)

Text(value='It returns a similar Movie', description='Movie Title')

Output()

### Creating Recommendation Function

In [110]:
rating = pd.read_csv('ratings.csv')
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


In [115]:
def find_similar_user(movieid):
    '''It gets a movieid and search for a similar user,
        then compare them with all users. Atlast merges both and calculate the recommendation score
        Args: movieid - int
        Return: output- pd.dataframe'''
    #comparing with similar user
    similar_user = rating[(rating['movieId'] == movieid) & (rating['rating'] > 4)]['userId'].unique()
    similar_recs = rating[(rating['userId'].isin(similar_user)) & (rating['rating'] > 4)]['movieId']
    similar_recs = similar_recs.value_counts() / len(similar_user)
    similar_recs = similar_recs[similar_recs > .10]
    
    #compare with all users
    all_user = rating[(rating['movieId'].isin(similar_recs.index)) & (rating['rating'] > 4)]
    all_user_recs = all_user['movieId'].value_counts() / len(all_user['userId'].unique())
    
    #calculating recommendation score
    rec_percent = pd.concat([similar_recs, all_user_recs], axis=1)
    rec_percent.columns = ['similar','all']
    rec_percent['score'] = (rec_percent['similar'] / rec_percent['all'])
    rec_percent= rec_percent.sort_values('score',ascending=False)
    output = rec_percent.head(10).merge(movies, left_index=True, right_on='movieId') [['score','title','genres']]
    return output

In [116]:
movie_input_name = widgets.Text( value = 'Ironman', description='Movie Title:', disabled=False)

recommendation = widgets.Output()

def on_type(data):
    with recommendation:
        recommendation.clear_output()
        title = data["new"]
        if len(title) > 5:
            result = search(title)
            movieid = result.iloc[0]['movieId']
            display(find_similar_user(movieid))
            
            
movie_input_name.observe(on_type, names='value')
            
display(movie_input_name, recommendation)

Text(value='Ironman', description='Movie Title:')

Output()