In [1]:
import ast
import pandas as pd
import numpy as np
from utils import *
from fastai.text import load_learner

Using cosine similarity to find similarity between our movie with the movie dataset.
WE DO NOT USE SQUARE ROOT AND POWER 2, AS WE HAVE ALL TOKEN APPEARING ONLY ONCE

In [2]:
class LMRecommender():
    def __init__(self):
        self.no_tokens_to_desc_movie = 25
        self.__user_input = ''
        self.__empty_input = False
        self.__recom_data = []
        
        self.__read_processed_movie_dataset()
        self.__get_user_data()
    
    def __read_processed_movie_dataset(self):
        self.__movie_dataset = pd.read_csv('processed_movie_details.csv')
        self.__movie_dataset['movie_desc'] = self.__movie_dataset['movie_desc'].apply(lambda row: row.split())
        self.__movie_dataset['genres'] = self.__movie_dataset['genres'].apply(lambda row: ast.literal_eval(row))
    
    def __get_user_data(self):
        try:
            movies = []
            attrib = []
            for movie_name in pd.read_csv('input/user_input.csv', header=None).values.flatten():
                if type(movie_name) == str:
                    movies.append(movie_name.strip())
            for arr in self.__movie_dataset[self.__movie_dataset['title'].isin(movies)]['movie_desc']:
                attrib.extend(arr)
            self.__user_input = ' '.join(list(dict.fromkeys(attrib)))
            
            if len(self.__user_input) == 0:
                raise Exception()
                
        except Exception as e:
            self.__empty_input = True
    
    def __get_pred_from_model(self):
        model = load_learner('models/', 'recom_model')
        pred = model.predict(self.__user_input, self.no_tokens_to_desc_movie, temperature=0.8)[len(self.__user_input) + 1:].split()
        
        for token in pred:
            if token == 'xxeos':
                break
            self.__recom_data.append(token)
        
        self.__recom_data = list(dict.fromkeys(self.__recom_data))
    
    def __get_cosine_similarity(self, movie_data):
        all_tokens = set(self.__recom_data).union(movie_data)
        dot_pdt = 0
        mag_recom_data = 0
        mag_movie_data = 0
        for token in all_tokens:
            dot_pdt += 1 if token in self.__recom_data and token in movie_data else 0
            mag_movie_data += 1 if token in movie_data else 0

        return dot_pdt/mag_movie_data
    
    def __compare_recomendation_with_all_movies(self):
        self.__movie_dataset['similarity'] = self.__movie_dataset.apply(lambda row: self.__get_cosine_similarity(row['movie_desc']), axis=1)
    
    def __get_exhaustive_probabilities(self):
        denom = np.sum(self.__movie_dataset['similarity'])
        self.__movie_dataset['similarity'] = self.__movie_dataset['similarity'].apply(lambda row: row / denom)
    
    def recommend(self):
        if self.__empty_input:
            self.__movie_dataset['similarity'] = np.array(1)
        else:
            self.__get_pred_from_model()
            self.__compare_recomendation_with_all_movies()
        self.__get_exhaustive_probabilities()
        movie_ids_for_recom = np.random.choice(range(self.__movie_dataset.shape[0]), 5, replace=True, p=self.__movie_dataset['similarity'])
        self.__recommended_movies = self.__movie_dataset.iloc[movie_ids_for_recom]
        
        for index, row in self.__recommended_movies.iterrows():
            print('Title -', row['title'])
            print('Overview -', row['overview'])
            print('Genre - ', end='')
            for genre in row['genres']:
                print(genre['name'], end=' ')
            
            print('\nRating -', row['vote_average'])
            print('Language -', row['original_language'])
            print()

In [3]:
x = LMRecommender()
x.recommend()

Title - Dreamer: Inspired By a True Story
Overview - Ben Crane believes that a severely injured racehorse deserves another chance. He and his daughter Cale adopt the horse (in fact is a mare)and save it of being sacrificed by the owner.
Genre - Drama Family 
Rating - 6.3
Language - en

Title - Harry Potter and the Goblet of Fire
Overview - Harry starts his fourth year at Hogwarts, competes in the treacherous Triwizard Tournament and faces the evil Lord Voldemort. Ron and Hermione help Harry manage the pressure – but Voldemort lurks, awaiting his chance to destroy Harry and all that he stands for.
Genre - Adventure Fantasy Family 
Rating - 7.5
Language - en

Title - Food Chains
Overview - nan
Genre - Documentary 
Rating - 7.4
Language - de

Title - Lost in Translation
Overview - Two lost souls visiting Tokyo -- the young, neglected wife of a photographer and a washed-up movie star shooting a TV commercial -- find an odd solace and pensive freedom to be real in each other's company, away