In [1]:
import warnings  # disable python warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from collections import defaultdict

import surprise
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate


from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer  # To convert text to numerical data
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity


from scipy import sparse
import xgboost as xgb
from ast import literal_eval  # evaluate strings containing Python code in the current Python environment
from nltk.stem.snowball import SnowballStemmer # Removing stem words



import matplotlib.pyplot as plt
import plotly.express as px
from wordcloud import WordCloud
import seaborn as sns

In [None]:
reader = Reader(rating_scale=(1,5))

ratdf = pd.read_csv('/Users/mayank garg/Desktop/AZHackathon2023/Dataset/ratings_small.csv',  low_memory=False)
movdf = pd.read_csv('/Users/mayank garg/Desktop/AZHackathon2023/Dataset/movies_metadata.csv', low_memory=False)
keydf =  pd.read_csv('/Users/mayank garg/Desktop/AZHackathon2023/Dataset/keywords.csv', low_memory=False)
imdbdf =  pd.read_csv('/Users/mayank garg/Desktop/AZHackathon2023/Dataset/links_small.csv', low_memory=False)
castdf =  pd.read_csv('/Users/mayank garg/Desktop/AZHackathon2023/Dataset/credits.csv', low_memory=False)


In [None]:
print(ratdf.columns)
print(keydf.columns)
print(castdf.columns)
print(imdbdf.columns)
print(movdf.columns)
movdf.describe()
# df.describe()
movdf

In [None]:

print(movdf.isnull().sum(), '\n\n')
print(keydf.isnull().sum(), '\n\n')
print(imdbdf.isnull().sum(), '\n\n')
print(castdf.isnull().sum(), '\n\n')
print(df.isnull().sum(), '\n\n')

# So there is problem in movdf keydf and imdbdf for the NA thing

## Basic Recommendation
In general movie recommendation by Imdb rating calculation for all the movies and then the classification is done in two ways:-

1. By overall recommendation and 
2. By genre based recommendation

In [None]:
C =  movdf['vote_average'].mean()
m =  movdf['vote_count'].quantile(0.95) # How many votes is needed to be in top 5% of the movies having heighest votes
# // since we are going to use the vote count and vote_average method for each movie 
# we need them to be completely filled so we are going to remove them 
# Similarly we need to remove them from other dataframe also
movdf.dropna(subset= ['vote_average', 'vote_count'], inplace = True)
movdf.isnull().sum()
# How many votes is needed to be in top 5% of the movies having heighest votes
top_mov  = movdf.copy().loc[movdf['vote_count'] >= m]  
top_mov.reset_index(inplace = True) 
top_mov['score'] = ''
print(C, m)
# Now for getting score we will craete a function which will give us the score based on the imdb id
# top_mov
def getIMDB(cnt, av):
#     we already have m and C for the whole dataset now calculating the IMDB score for a particular movie by using its vote
#     count(cnt) and vote average(av)
    return ((cnt/(cnt+m)) * av) + ((m/(m+cnt)) * C)  

for i in range(top_mov.shape[0]):
    l = top_mov['vote_count'][i]
    g =  top_mov['vote_average'][i]
    top_mov['score'][i] =  getIMDB(l, g)

top_mov  = top_mov.sort_values(by=['score'], ascending=False)
top_mov.reset_index(inplace = True) 
smpl_recom =  top_mov[['title','score']].head(20)

t1 = top_mov[['title', 'score']].head(20)  #Overall recommendation 

# print(t1)
# smpl_recom
# top_mov

In [None]:
#Lets do plotation
# Distribution of average vote among movies in the dataset

fig = px.histogram(top_mov, x="vote_average")
fig.show()


### Genre Based Recommendation

In [None]:
top_mov['genres'] = top_mov['genres'].apply(literal_eval)  
# Here no need to apply literal_evals they are already in such format as required to be converted by literal_eval
# Lets get a list of all the genres may be needed in future for next recommendations

genrelist  =  set()
genredict  =  dict()

#Since we are already have top movies why we need to do them again so we are using them directly here


for i in range(top_mov['genres'].shape[0]):
    for x in top_mov['genres'][i]:
#         print(x)
        genrelist.add(x['name'])
        
for i in range(top_mov['genres'].shape[0]):
    for x in top_mov['genres'][i]:
#         print(x)
        if x['name'] not in genredict.keys():
            genredict[x['name']] =  pd.DataFrame(columns = top_mov.columns)
        
        genredict[x['name']] =  genredict[x['name']].append((top_mov.iloc[i]))
        

# print(genrelist)
genredict

In [None]:
# Lets Plot the everything we have done 
cntmovgenre  =  list()
for i in genrelist:
    cntmovgenre.append(genredict[i].shape[0])

genrecnt  = pd.DataFrame({'Genre': list(genrelist), 'Movie Counts':cntmovgenre}, columns = ['Genre', 'Movie Counts'])


fig =px.bar(genrecnt, x='Genre', y='Movie Counts')
fig.show()

In [None]:
def genreBasedRecommend(gnr):
    if gnr not in genrelist:
        return None
    return genredict[gnr][['title','score']].head(10)

print(genreBasedRecommend('Comedy'))

# Content-Based Recommendation

In [None]:
#Lets do some data preprocessing for generating the final column on which we will do content based filtering

movdf['id'] = movdf['id'].astype('int')  #The astype() function is used to cast a pandas object to a specified data type.

# merging both credits and keywords in movies_data on the basis of movie id as the primary key
movdf = movdf.merge(castdf, on='id')
movdf = movdf.merge(keydf, on='id')
imdbdf = imdbdf[imdbdf['tmdbId'].notnull()]['tmdbId'].astype('int')
smd = movdf[movdf['id'].isin(imdbdf)]  
smd = smd.reset_index()

smd.head()

smd['tagline'] = smd['tagline'].str.split()
smd['overview'] = smd['overview'].str.split()

# Applying literal_eval to get the right data type from the expression of string
smd['cast'] = smd['cast'].apply(literal_eval)
smd['crew'] = smd['crew'].apply(literal_eval)
smd['keywords'] = smd['keywords'].apply(literal_eval)
smd['genres'] = smd['genres'].apply(literal_eval)

def get_director(x):
    for i in x:
        if i['job'] == 'Director':
            return i['name']
    return np.nan
smd['director'] = smd['crew'].apply(get_director) 


In [None]:

# Taking all the movie cast in a list and then taking only the top 3 cast
smd['cast'] = smd['cast'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
smd['cast'] = smd['cast'].apply(lambda x: x[:3] if len(x) >=3 else x)
smd['cast'] = smd['cast'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])# Strip Spaces and Convert to Lowercase

smd['keywords'] = smd['keywords'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])



smd['tagline'] = smd['tagline'].apply(lambda x: [i for i in x] if isinstance(x, list) else [])
smd['overview'] = smd['overview'].apply(lambda x: [i for i in x] if isinstance(x, list) else [])

smd['genres'] = smd['genres'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
smd['genres'] = smd['genres'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

smd['director'] = smd['director'].astype('str').apply(lambda x: str.lower(x.replace(" ", "")))
smd['director'] = smd['director'].apply(lambda x: [x,x,x]) 
# giving more weight to the director relative to the entire cast
smd['tagline'][0]

In [None]:
# Generating a dictionary which will contain all the keywords for all the movies 
# with their frequency al over the keyword dataset

keywords_count = dict()
for i in range(len(smd['keywords'])):
    for j in range(len(smd['keywords'][i])):
        if smd['keywords'][i][j] not in keywords_count.keys():
            keywords_count[smd['keywords'][i][j]] = 0
        keywords_count[smd['keywords'][i][j]] +=1

# removing those keywords which occur only once
for i in list(keywords_count):
    if keywords_count[i] == 1:
        del keywords_count[i]
        
        
# Generating a dictionary which will contain all the keywords for all the movies 
# with their frequency al over the keyword dataset

overview_count = dict()
for i in range(len(smd['overview'])):
    for j in range(len(smd['overview'][i])):
        if smd['overview'][i][j] not in overview_count.keys():
            overview_count[smd['overview'][i][j]] = 0
        overview_count[smd['overview'][i][j]] +=1

# removing those keywords which occur only once
for i in list(overview_count):
    if overview_count[i] == 1:
        del overview_count[i]

In [None]:
# By this function we will genearte a list of keywords for a particular movie which are already there in the
# keywords_counts dictionary

def filter_overviews(x):
    words = []
    for i in x:
        if i in overview_count.keys():
            words.append(i)
    return words

print(smd['overview'].isnull().sum())
print(smd['tagline'].isnull().sum())

# By this function we will genearte a list of keywords for a particular movie which are already there in the
# keywords_counts dictionary

def filter_keywords(x):
    words = []
    for i in x:
        if i in keywords_count.keys():
            words.append(i)
    return words

print(smd['keywords'].isnull().sum())
print(smd['tagline'].isnull().sum())


In [None]:
stemmer = SnowballStemmer('english')

smd['keywords'] = smd['keywords'].apply(filter_keywords) # removing those keywords which occur only once
smd['keywords'] = smd['keywords'].apply(lambda x: [stemmer.stem(i) for i in x])
smd['keywords'] = smd['keywords'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

smd['tagline'] = smd['tagline'].apply(lambda x: [stemmer.stem(i) for i in x])
smd['tagline'] = smd['tagline'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

smd['overview'] = smd['overview'].apply(filter_overviews) # removing those keywords which occur only once
smd['overview'] = smd['overview'].apply(lambda x: [stemmer.stem(i) for i in x])
smd['overview'] = smd['overview'].apply(lambda x: [str.lower(i.replace(" ", "")) for i in x])

# combining keywords, cast, director and genres
smd['soup'] = smd['keywords'] + smd['cast'] + smd['director'] + smd['genres'] +smd['tagline'] + smd['overview']
smd['soup'] = smd['soup'].apply(lambda x: ' '.join(x))
smd['soup'][0]


In [None]:
# Creating the Wordcloud for visualisation of the word which occur frequently in the dataset

# Combining all the text contained in smd['soup'] column
text = ""
for i in smd['soup']:
    text +=i
    
word_cloud = WordCloud(collocations = False, background_color = 'white').generate(text)

# Display the generated Word Cloud
# plot the WordCloud image                      
plt.figure(figsize = (8, 8))
plt.imshow(word_cloud)
plt.axis("off")
 
plt.show()

In [None]:
count = CountVectorizer(analyzer='word',ngram_range=(1, 2) ,min_df=0, stop_words='english')
count_matrix = count.fit_transform(smd['soup'])

cosine_sim = cosine_similarity(count_matrix, count_matrix)
# df['Courses'].str.lower()
titles = smd['title']
indices = pd.Series(smd.index, index=smd['title'].str.lower())  # Creating a mapping between movie and title and index

df_cosine=pd.DataFrame(cosine_sim)
df_cosine

In [None]:
# Creating Heatmap for visualization of correlation between different movies 

#specify size of heatmap
fig, ax = plt.subplots(figsize=(8, 8))

#create seaborn heatmap of only top 100 movies
sns.heatmap(cosine_sim[:100,:100])

In [None]:
# indices =  pd.DataFrame(indices)
# indices = indices.reset_index(drop = True)
# print(indices['forest gump'])
# print(indices[0])
# indices['forest gump']
indices = pd.Series(smd.index, index=smd['title'].str.lower())
print(type(indices.index))
print(indices)
movnamelist = indices.index.tolist()

print(type(indices.index))
movnamelist

In [None]:
# Now we are going to get the recommendations by inputting particular movie


def get_recommendations(title):
    
    '''
    
    This function gives the top 10 movies according to the cosine similarities calculated above along with the movie id
    
    Parameters: title (string) : Name of the movie present in the smd dataset
    
    Returns: (list) Top 10 movies along with the movie id
    
    
    '''
    
    idx = indices[title] # movie id corrosponding to the given title 
    sim_scores = list(enumerate(cosine_sim[idx])) # list of cosine similarity scores value along the given index
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) # sorting the given scores in ascending order
    sim_scores = sim_scores[1:31] # Taking only the top 30 scores
    movie_indices = [i[0] for i in sim_scores] # Finding the indices of 30 most similar movies
    
    return titles.iloc[movie_indices] 

get_recommendations('The Dark Knight').head(10)

# Collaborative Filtering

In [2]:
data = ratdf
print(data)
data = data.drop('timestamp',axis=1)
data

NameError: name 'pd' is not defined

In [None]:
train_data=data.iloc[:int(data.shape[0]*0.80)]
test_data=data.iloc[int(data.shape[0]*0.80):]
reader = Reader(rating_scale=(1,5))

# create the traindata from the dataframe..., It is of dataset format from surprise library..
train_data_mf = Dataset.load_from_df(train_data[['userId', 'movieId', 'rating']], reader)

# build the trainset from traindata..
trainset = train_data_mf.build_full_trainset() 

# create the testdata from the dataframe...
test_data_mf = Dataset.load_from_df(test_data[['userId', 'movieId', 'rating']], reader)

# build the testset from testdata..
testset = test_data_mf.build_full_trainset() 

In [None]:
svd = SVD(n_factors=100, biased=True, random_state=15, verbose=True)
svd.fit(trainset)
#getting predictions of trainset
train_preds = svd.test(trainset.build_testset())

train_pred_mf = np.array([pred.est for pred in train_preds])
#getting predictions of trainset
test_preds = svd.test(testset.build_testset())

test_pred_mf = np.array([pred.est for pred in test_preds])

In [None]:
print(train_rmse)
print(train_mape)

### Sparse Matrix Generation 

In [3]:
# Creating a sparse matrix
train_sparse_matrix = sparse.csr_matrix((train_data.rating.values, (train_data.userId.values,
                                               train_data.movieId.values)))

NameError: name 'sparse' is not defined

## Generating Hand Crafted features for User Movie Interaction Matrix

### Global variables

In [None]:
train_averages = dict()
# get the global average of ratings in our train set.
train_global_average = train_sparse_matrix.sum()/train_sparse_matrix.count_nonzero()
train_averages['global'] = train_global_average

def get_average_ratings(sparse_matrix, of_users):
    # average ratings of user/axes
    ax = 1 if of_users else 0 # 1 - User axes,0 - Movie axes

    # ".A1" is for converting Column_Matrix to 1-D numpy array 
    sum_of_ratings = sparse_matrix.sum(axis=ax).A1
    # Boolean matrix of ratings ( whether a user rated that movie or not)
    is_rated = sparse_matrix!=0
    # no of ratings that each user OR movie..
    no_of_ratings = is_rated.sum(axis=ax).A1
    
    # max_user  and max_movie ids in sparse matrix 
    u,m = sparse_matrix.shape
    # creae a dictonary of users and their average ratigns..
    average_ratings = { i : sum_of_ratings[i]/no_of_ratings[i]
                                 for i in range(u if of_users else m) 
                                    if no_of_ratings[i] !=0}

    # return that dictionary of average ratings
    return average_ratings

train_averages['user'] = get_average_ratings(train_sparse_matrix, of_users=True)
print('\nAverage rating of user 10 :',train_averages['user'][10])

train_averages['movie'] =  get_average_ratings(train_sparse_matrix, of_users=False)
print('\n AVerage rating of movie 15 :',train_averages['movie'][15])
train_averages

### Now top most 5 similar users for a movie and top most 5 similar movies for a user for each user - movie pair in the dataset



In [None]:
# get users, movies and ratings from our samples train sparse matrix
train_users, train_movies, train_ratings = sparse.find(train_sparse_matrix)
final_data = pd.DataFrame()
count = 0
for (user, movie, rating)  in zip(train_users, train_movies, train_ratings):
            st = datetime.now()
        #     print(user, movie)    
            #--------------------- Ratings of "movie" by similar users of "user" ---------------------
            # compute the similar Users of the "user"        
            user_sim = cosine_similarity(train_sparse_matrix[user], train_sparse_matrix).ravel()
            top_sim_users = user_sim.argsort()[::-1][1:] # we are ignoring current user from its similar users.
            # get the ratings of most similar users for the current movie
            top_ratings = train_sparse_matrix[top_sim_users, movie].toarray().ravel()
            # we will make it's length "5" by adding movie averages to .
            top_sim_users_ratings = list(top_ratings[top_ratings != 0][:5])
            top_sim_users_ratings.extend([train_averages['movie'][movie]]*(5 - len(top_sim_users_ratings)))
            
            
        #     print(top_sim_users_ratings, end=" ")    
            #--------------------- Ratings by "user"  to similar movies of "movie" ---------------------
            # compute the similar movies of the "movie"        
            movie_sim = cosine_similarity(train_sparse_matrix[:,movie].T, train_sparse_matrix.T).ravel()
            top_sim_movies = movie_sim.argsort()[::-1][1:] # we are ignoring 'The movie' from its similar movies .
            # get the ratings of most similar movie rated by this user..
            top_ratings = train_sparse_matrix[user, top_sim_movies].toarray().ravel()
            # we will make it's length "5" by adding user averages to.
            top_sim_movies_ratings = list(top_ratings[top_ratings != 0][:5])
            top_sim_movies_ratings.extend([train_averages['user'][user]]*(5-len(top_sim_movies_ratings))) 
        #     print(top_sim_movies_ratings, end=" : -- ")

            #-----------------prepare the row to be stores in a file-----------------#
            row = list()
            row.append(user)
            row.append(movie)
            # Now add the other features to this data...
            row.append(train_averages['global']) # first feature
            # next 5 features are similar_users' "movie" ratings
            row.extend(top_sim_users_ratings)
            # next 5 features are "user" ratings for similar_movies
            row.extend(top_sim_movies_ratings)
            # Avg_user rating
            row.append(train_averages['user'][user])
            # Avg_movie rating
            row.append(train_averages['movie'][movie])

            # finalley, The actual Rating of this user-movie pair...
            row.append(rating)
            count = count + 1
            final_data = final_data.append([row])

## Creating XGBoost Model for final prediction

In [None]:
final_test_data.columns=['user', 'movie', 'GAvg', 'sur1', 'sur2', 'sur3', 'sur4', 'sur5',
            'smr1', 'smr2', 'smr3', 'smr4', 'smr5', 'UAvg', 'MAvg', 'rating']

In [None]:
def get_error_metrics(y_true, y_pred):
    rmse = np.sqrt(np.mean([ (y_true[i] - y_pred[i])**2 for i in range(len(y_pred)) ]))
    mape = np.mean(np.abs( (y_true - y_pred)/y_true )) * 100
    return rmse, mape

# prepare train data
x_train = final_data.drop(['user', 'movie','rating'], axis=1)
y_train = final_data['rating']

# Prepare Test data
x_test = final_test_data.drop(['user','movie','rating'], axis=1)
y_test = final_test_data['rating']

In [None]:
# initialize XGBoost model...
xgb_model = xgb.XGBRegressor(silent=False, n_jobs=13, random_state=15, n_estimators=100)
# dictionaries for storing train and test results
train_results = dict()
test_results = dict()

# fit the model
print('Training the model..')
start =datetime.now()
xgb_model.fit(x_train, y_train, eval_metric = 'rmse')
print('Done. Time taken : {}\n'.format(datetime.now()-start))
print('Done \n')

In [None]:
# from the trained model, get the predictions....
print('Evaluating the model with TRAIN data...')
start =datetime.now()
y_train_pred = xgb_model.predict(x_train)
# get the rmse and mape of train data...
rmse_train, mape_train = get_error_metrics(y_train.values, y_train_pred)
    
# store the results in train_results dictionary..
train_results = {'rmse': rmse_train,
                    'mape' : mape_train,
                    'predictions' : y_train_pred}
train_results

In [None]:
#######################################
# get the test data predictions and compute rmse and mape
print('Evaluating Test data')
y_test_pred = xgb_model.predict(x_test) 
rmse_test, mape_test = get_error_metrics(y_true=y_test.values, y_pred=y_test_pred)
# store them in our test results dictionary.
test_results = {'rmse': rmse_test,
                    'mape' : mape_test,
                    'predictions':y_test_pred}
test_results

# Tkinter GUI Implementation

In [None]:
import tkinter as tk
import tkinter.ttk
from tkinter import *
import tkinter.messagebox
from collections import defaultdict

In [None]:
#instance creation and all 

# global windx , winy ,frmx ,frmy ,l,invalidname  ,invalidcchoice 
#some global variables
windx = 1000
winy =  400
frmx =  1000
frmy = 400
l = [None for i in range(10)]
invalidname = [None for i in range(1)]
invalidcchoice = [None for i in range(1)]

# create root window
root = Tk()
# root window title and dimension4
root.title("Movie Recommender System")

# Set geometry (widthxheight)
root.geometry(f'{windx}x{winy}')
l1 = Label(root, text = "MOVIE RECOMMENDER SYSTEM", fg = "blue")
l1.grid(row = 0,column = 1)

In [None]:

#logic or function which will run after the commands
def hide_all_frame():
    basic_rec_frame.grid_forget()
    content_based_frame.grid_forget()
    collaborative_frame.grid_forget()
# Basic recomendation 
def basic_rec():
    hide_all_frame()
    basic_rec_frame.grid(row =0, column  =0)
    
def basic_based():
    global windx , winy ,frmx ,frmy ,l,invalidname  ,invalidcchoice 
    print(t1)
    for i in range(10):
        if l[i] is not None:
            l[i].grid_remove()
    for i in range(1):
        if invalidname[i] is not None:
            invalidname[i].grid_remove()
    for i in range(1):
        if invalidcchoice[i] is not None:
            invalidcchoice[i].grid_remove()
#   we have all the top 20 movies stored in the above t1 just use that and show that
    for i in range(min(10,len(t1))):
            l[i] = Label(basic_rec_frame ,  text = t1.iloc[i]['title'])
            l[i].grid(row = 2+i, column = 5, sticky = W, pady = 5)
    
def genre_based():
    global windx , winy ,frmx ,frmy ,l,invalidname  ,invalidcchoice 
    event = clicked.get()
    for i in range(10):
        if l[i] is not None:
            l[i].grid_remove()
    for i in range(1):
        if invalidname[i] is not None:
            invalidname[i].grid_remove()
    for i in range(1):
        if invalidcchoice[i] is not None:
            invalidcchoice[i].grid_remove()
    if event == "Select Genre":
        invalidchoice[0] = Label(basic_rec_frame ,  text = "Please select a valid choice")
        invalidchoice[0].grid(row = 2, column = 5, sticky = W, pady = 5)
    else:
        ll = list((genredict[event][['title']].head(10))['title'])
        for i in range(min(10,len(ll))):
            l[i] = Label(basic_rec_frame ,  text = ll[i])
            l[i].grid(row = 2+i, column = 5, sticky = W, pady = 5)
    

    
# Content Based recomendation
def content_rec():
    hide_all_frame()
    content_based_frame.grid(row =0, column  =0)
    
def content_based():
    global windx , winy ,frmx ,frmy ,l,invalidname  ,invalidcchoice 
    for i in range(10):
        if l[i] is not None:
            l[i].grid_remove()
    for i in range(1):
        if invalidname[i] is not None:
            invalidname[i].grid_remove()
    for i in range(1):
        if invalidcchoice[i] is not None:
            invalidcchoice[i].grid_remove()
    movname =  movvalue.get()
    if(movname == ""):
        invalidname =  Label(content_based_frame ,  text = "Please enter the valid movie name!!")
        invalidname.grid(row = 2, column = 5, sticky = W, pady = 5)
    else:
        movname = movname.lower()
        print(movname)
        res = get_recommendations((movname)).head(10)
        res.reset_index(inplace = True, drop =  True)
        for i in range(min(10,len(res))):
            print(res[i])
            l[i] = Label(content_based_frame ,  text = res[i])
            l[i].grid(row = 2+i, column = 5, sticky = W, pady = 5)
    
    
# Collaborative based
def collaborative_rec():
    hide_all_frame()
    collaborative_frame.grid(row =0, column  =0)
    

def collaborative_based():
        global windx , winy ,frmx ,frmy ,l,invalidname  ,invalidcchoice 
        for i in range(10):
            if l[i] is not None:
                l[i].grid_remove()
        for i in range(1):
            if invalidname[i] is not None:
                invalidname[i].grid_remove()
        for i in range(1):
            if invalidcchoice[i] is not None:
                invalidcchoice[i].grid_remove()
        rtts = list()
        rtts.append(l11.get())
        rtts.append(l12.get())
        rtts.append(l13.get())
        rtts.append(l14.get())
        rtts.append(l15.get())
        movie_ratings = [int(i) for i in rtts]
        print(movie_ratings)
        
        example = { 'userId' : [99999,99999,99999,99999,99999],
           'movieId' : [278, 13,637,122, 11],
           'rating'  : movie_ratings,
                  }
        df = pd.DataFrame(example)
        frames = [ratings, df]
        result = pd.concat(frames)
        
        #create dataset from dataframe
        data= Dataset.load_from_df(result[columns],reader)

        #create trainset
        trainset= data.build_full_trainset()

        #create testset, here the anti_testset is testset
        testset = trainset.build_anti_testset()

        cross_validate(model,data, measures=['RMSE','MAE'], cv= 5, verbose= True)
        print('Training Done')

        #prediction
        prediction = model.test(testset)
        print("prediction done")
        prediction[99999]

        # An RMSE value of less than 2 is considered good
        #Now Recommend Users top 10 movies based on prediction

        
        def get_top_n(prediction, n):

            # First map the predictions to each user.
            print("Here1")
            top_n = defaultdict(list)
            for uid, iid, true_r, est, _ in prediction:
                top_n[uid].append((iid, est))
            print("Here2")
            # Then sort the predictions for each user and retrieve the n highest ones.
            for uid, user_ratings in top_n.items():
                user_ratings.sort(key=lambda x: x[1], reverse=True)
                top_n[uid] = user_ratings[:n]
            print("Here3")
            return top_n
        print("Here4")
        rcmnd = []
        top_n = get_top_n(prediction, n=30)
        print("Here5")
        for uid, user_ratings in top_n.items():
            if uid == 99999:
                print("Here6")
                for (iid,rating) in user_ratings:
                    for i in range(movdf.shape[0]):
                        if movdf['id'][i] == iid:
                            print("Here7")
                            rcmnd.append([movdf['id'][i],movdf['title'][i]])
                break
    
        for i in range(min(10,len(rcmnd))):
            l[i] = Label(collaborative_frame ,  text = rcmnd[i][1])
            l[i].grid(row = 2+i, column = 5, sticky = W, pady = 5)
        print("Here8")

In [None]:
    
#creating frame for each recomendation type

#Basic Recommendation system
# We have to be very careful in these system because we need to show the result in the best format 
basic_rec_frame = LabelFrame(root, width =frmx , height = frmy)
l2 = Label(basic_rec_frame, text = "Click on the button below to show the best recomended movies of all time:")
l2.grid(row = 1,column = 0,sticky = W, pady = 2)
button_basic1 = Button(basic_rec_frame, text = "SUBMIT" , fg = "white",bg = "green", command = basic_based)
button_basic1.grid(row = 2,column = 1, sticky = S)

#defining labels and buttons in this frame
options = [
    'Action',
 'Adventure',
 'Animation',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'History',
 'Horror',
 'Music',
 'Mystery',
 'Romance',
 'Science Fiction',
 'TV Movie',
 'Thriller',
 'War',
 'Western'
]

# adding a label to the basic frame
l3 = Label(basic_rec_frame, text = "Select genre of the movie you want to watch : ")
l3.grid(row = 1,column = 0,sticky = W, pady = 2)

# datatype of menu text
clicked = StringVar()

# initial menu text
clicked.set( "Select Genre" )

# Create Dropdown menu
drop = OptionMenu( basic_rec_frame , clicked , *options)
drop.grid(row = 3,column = 0, sticky = W)
# button widget with green color text
button = Button(basic_rec_frame, text = "SUBMIT" , fg = "white",bg = "green", command = genre_based)
button.grid(row = 9,column = 1, sticky = S)



#Content based filtering 
content_based_frame = LabelFrame(root, width =frmx , height = frmy)
l2 = Label(content_based_frame, text = "Write name of the any one of the following movie")
l2.grid(row = 1, column  = 0)
movvalue =  StringVar()
moventry =  Entry(content_based_frame, textvariable = movvalue)
moventry.grid(row =1, column = 1)
button = Button(content_based_frame, text = "SUBMIT" , fg = "white",bg = "green", command = content_based)
button.grid(row = 2,column = 1, sticky = S)


#Collaborative filtering
collaborative_frame = LabelFrame(root, width =frmx , height = frmy)

l3 = Label(collaborative_frame, text = "Rate the following movies")
l3.grid(row = 1,column = 2,sticky = W, pady = 2)

# labels for movies name
l4 = Label(collaborative_frame, text="Movies Name").grid(row=2, column=2)
l5 = Label(collaborative_frame, text="the shawshank redemption").grid(row=3, column=2) # 278
l6 = Label(collaborative_frame, text="forest gump").grid(row=4, column=2) # 13
l7 = Label(collaborative_frame, text="life is beautiful").grid(row=5, column=2) #637
l8 = Label(collaborative_frame, text="the lord of the rings: the return of the king").grid(row=6, column=2) #122
l9 = Label(collaborative_frame, text="star wars").grid(row=7, column=2) # 11

# label for movies rating
l10 = Label(collaborative_frame, text="Rate the movie on the scale of 5").grid(row=2, column=3)
l11 = Spinbox(collaborative_frame, from_= 0, to = 5)
l11.grid(row=3, column=3)
l12 = Spinbox(collaborative_frame, from_= 0, to = 5)
l12.grid(row=4, column=3)
l13= Spinbox(collaborative_frame, from_= 0, to = 5)
l13.grid(row=5, column=3)
l14 = Spinbox(collaborative_frame, from_= 0, to = 5)
l14.grid(row=6, column=3)
l15 = Spinbox(collaborative_frame, from_= 0, to = 5)
l15.grid(row=7, column=3)


# button widget with green color text
button = Button(collaborative_frame, text = "SUBMIT" , fg = "white",bg = "green", command = collaborative_based)
button.grid(row = 9,column = 1, sticky = S)



#The actual strucutre of the GUI
mainmenu = Menu(root)
mainmenu.add_command(label="Basic", command = basic_rec)
mainmenu.add_command(label="Content Based", command = content_rec)
mainmenu.add_command(label="Collaborative", command = collaborative_rec)

root.config(menu = mainmenu)

In [None]:
#The mailoop 
root.mainloop()