In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from numpy.linalg import norm
pd.set_option('display.max.columns',None)

In [None]:
movies = pd.read_csv("/content/drive/MyDrive/Dataset/movies.csv")
ratings = pd.read_csv("/content/drive/MyDrive/Dataset/ratings.csv")

In [None]:
movies.shape, ratings.shape

((9125, 3), (100836, 4))

In [None]:
movies.columns, ratings.columns

(Index(['movieId', 'title', 'genres'], dtype='object'),
 Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object'))

In [None]:
movies.dtypes

movieId     int64
title      object
genres     object
dtype: object

In [None]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
movies['title']=movies['title'].apply(lambda x : x.strip())
movies['year'] = movies['title'].apply(lambda x : int(x[-5:-1]) if x[-5:-1].isdigit() else np.nan)
movies['title']=movies['title'].apply(lambda x : x[:-7] if x[-5:-1].isdigit() else x)
movies.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995.0
1,2,Jumanji,Adventure|Children|Fantasy,1995.0
2,3,Grumpier Old Men,Comedy|Romance,1995.0
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995.0
4,5,Father of the Bride Part II,Comedy,1995.0


In [None]:
movies['year'].unique()

array([1995., 1994., 1996., 1976., 1992., 1967., 1993., 1964., 1977.,
       1965., 1982., 1990., 1991., 1989., 1937., 1940., 1969., 1981.,
       1973., 1970., 1960., 1955., 1959., 1968., 1988., 1948., 1950.,
       1997., 1956., 1958., 1972., 1943., 1952., 1951., 1957., 1961.,
       1954., 1934., 1944., 1963., 1942., 1941., 1953., 1939., 1946.,
       1945., 1938., 1947., 1935., 1936., 1926., 1949., 1932., 1985.,
       1975., 1974., 1971., 1979., 1987., 1986., 1980., 1978., 1966.,
       1962., 1983., 1984., 1933., 1931., 1922., 1998., 1927., 1929.,
       1930., 1928., 1999., 2000., 1925., 1923., 1918., 1921., 2001.,
       1924., 2002., 2003., 1920., 1915., 2004., 1916., 1917., 2005.,
       2006., 1902., 2007., 2008., 2009., 1919., 2010., 2011., 2012.,
       2013., 2014.,   nan, 2015., 2016.])

In [None]:
movies[np.isnan(movies['year'])]

Unnamed: 0,movieId,title,genres,year
8505,108548,"Big Bang Theory, The (2007-)",Comedy,
9017,143410,Hyena Road,(no genres listed),
9063,151307,The Lovers and the Despot,(no genres listed),
9118,162376,Stranger Things,Drama,
9124,164979,"Women of '69, Unboxed",Documentary,


In [None]:
movies.drop(index=movies[movies['genres']=="(no genres listed)"].index,inplace=True)
movies[np.isnan(movies['year'])]

Unnamed: 0,movieId,title,genres,year
8505,108548,"Big Bang Theory, The (2007-)",Comedy,
9118,162376,Stranger Things,Drama,
9124,164979,"Women of '69, Unboxed",Documentary,


In [None]:
movies.at[8505,'year']=2007
movies.at[8505,'title']="Big Bang Theory"
movies.at[9118,'year']=2016
movies.at[9124,'year']=2014
movies['year']= movies['year'].astype('int')
movies.isnull().sum()

movieId    0
title      0
genres     0
year       0
dtype: int64

In [None]:
movies.reset_index(drop=True,inplace=True)
movies['genres'] = movies['genres'].apply(lambda x : x.split('|'))
all_genres=['Drama','Adventure','Documentary','War','Sci-Fi','IMAX','Romance','Fantasy','Crime','Children',
            'Thriller','Comedy','Action','Film-Noir','Horror','Western','Animation','Musical','Mystery']
for genre in all_genres:
    movies[genre]=0;
for i in range(len(movies)):
    for genre in movies.iloc[i,2]:
        movies.at[i,genre]=1
movies.drop(columns=['genres'],inplace=True)

In [None]:
from sklearn.preprocessing import StandardScaler
year_scaler = StandardScaler()
temp = year_scaler.fit_transform(np.array(movies['year']).reshape(-1,1))
movies['year']=pd.Series(temp.reshape(-1))
movies.head()

Unnamed: 0,movieId,title,year,Drama,Adventure,Documentary,War,Sci-Fi,IMAX,Romance,Fantasy,Crime,Children,Thriller,Comedy,Action,Film-Noir,Horror,Western,Animation,Musical,Mystery
0,1,Toy Story,0.157599,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0
1,2,Jumanji,0.157599,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men,0.157599,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0
3,4,Waiting to Exhale,0.157599,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0
4,5,Father of the Bride Part II,0.157599,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [None]:
#Checking for multiple ratings user has given to particular movie
ratings['combine'] = ratings.apply(lambda x : str(x['userId'])+"#"+str(x['movieId']),axis=1)
ratings['combine'].value_counts().values.max()

1

In [None]:
ratings.drop(columns=['timestamp','combine'],inplace=True)

In [None]:
#Transforming rating in range of (-1,1)
ratings['rating'] = ((ratings['rating']/5)-0.5)*2
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,0.6
1,1,3,0.6
2,1,6,0.6
3,1,47,1.0
4,1,50,1.0


In [None]:
movie_name = movies.iloc[:,:2].values
X = movies.iloc[:,2:].values

In [None]:
def give_recommendation(user):
    user_rating = ratings[ratings['userId']==user].copy()
    df = pd.merge(movies, user_rating, how='inner', on = 'movieId')
    x = df[['year']+all_genres].values
    y = df[['rating']].values
    user_profile = np.matmul(np.transpose(y), x)
    user_profile = (user_profile/x.shape[0]).reshape(-1)
    pred_rating = (np.dot(X, user_profile)/(norm(X, axis=1)*norm(user_profile))).reshape(-1,1)
    df_pred = pd.DataFrame(np.concatenate((movie_name,pred_rating), axis=1),columns=['movieId','title','pred_rating'])
    temp = pd.merge(df_pred, user_rating, how='left',on ='movieId')
    unseen = temp[np.isnan(temp['rating'])].drop(columns=['userId','rating'])
    unseen = unseen.sort_values('pred_rating',ascending=False).reset_index(drop=True)
    return list(unseen.iloc[0:10,1].values)

In [None]:
user = 1
user_rating = ratings[ratings['userId']==user].copy()
df = pd.merge(movies, user_rating, how='inner', on = 'movieId')
x = df[['year']+all_genres].values
y = df[['rating']].values
user_profile = np.matmul(np.transpose(y), x)
user_profile = (user_profile/x.shape[0]).reshape(-1)
pred_rating = (np.dot(X, user_profile)/(norm(X, axis=1)*norm(user_profile))).reshape(-1,1)
df_pred = pd.DataFrame(np.concatenate((movie_name,pred_rating), axis=1),columns=['movieId','title','pred_rating'])
temp = pd.merge(df_pred, user_rating, how='left',on ='movieId')
unseen = temp[np.isnan(temp['rating'])].drop(columns=['userId','rating'])
unseen = unseen.sort_values('pred_rating',ascending=False).reset_index(drop=True)
list(unseen.iloc[0:10,1].values)

['The Great Train Robbery',
 "It's a Mad, Mad, Mad, Mad World",
 'Stunt Man, The',
 'Asterix and the Gauls (Astérix le Gaulois)',
 'Wages of Fear, The (Salaire de la peur, Le)',
 'Batman',
 'Those Magnificent Men in Their Flying Machines',
 'Casino Royale',
 'Wonderful World of the Brothers Grimm, The',
 'Three Musketeers, The']

In [None]:
give_recommendation(10)

["Naomi and Ely's No Kiss List",
 'Aloha',
 'And So It Goes',
 'Magic in the Moonlight',
 'One I Love, The',
 'What If',
 'Geography Club',
 'Don Jon',
 'Words and Pictures',
 'Last Vegas']