# Movies Recommender System

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
# Getting Data
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv('ml-100k/u.user', sep='|', names=u_cols,encoding='latin-1')
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols,encoding='latin-1',usecols=range(3))
i_cols = ['movie id', 'movie title']
movies = pd.read_csv('ml-100k/u.item', sep='|', names=i_cols,encoding='latin-1',usecols=range(2))

In [3]:
users.head()

Unnamed: 0,user_id,age,sex,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [4]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [5]:
movies.head()

Unnamed: 0,movie id,movie title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [6]:
#Merging movies and ratings dataframes
movie_ratings = pd.merge(movies,ratings,left_on='movie id',right_on='movie_id')
movie_ratings.drop(['movie id','movie_id'],axis=1,inplace=True)

In [7]:
movie_ratings.head()

Unnamed: 0,movie title,user_id,rating
0,Toy Story (1995),308,4
1,Toy Story (1995),287,5
2,Toy Story (1995),148,4
3,Toy Story (1995),280,4
4,Toy Story (1995),66,3


In [8]:
# Pivoting the movie_ratings dataframe
movie_ratings_pivot= movie_ratings.pivot_table(index=['user_id'],columns=['movie title'],values='rating')

In [9]:
movie_ratings_pivot.head()

movie title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,2.0,5.0,,,3.0,4.0,,,...,,,,5.0,3.0,,,,4.0,
2,,,,,,,,,1.0,,...,,,,,,,,,,
3,,,,,2.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,2.0,,,,,4.0,,,...,,,,4.0,,,,,4.0,


In [10]:
# Find correlation between movies
corr_matrix = movie_ratings_pivot.corr()

In [11]:
corr_matrix.head()

movie title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
movie title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Til There Was You (1997),1.0,,-1.0,-0.5,-0.5,0.522233,,-0.426401,,,...,,,,,,,,,,
1-900 (1994),,1.0,,,,,,-0.981981,,,...,,,,-0.944911,,,,,,
101 Dalmatians (1996),-1.0,,1.0,-0.04989,0.269191,0.048973,0.266928,-0.043407,,0.111111,...,,-1.0,,0.15884,0.119234,0.680414,0.0,0.707107,,
12 Angry Men (1957),-0.5,,-0.04989,1.0,0.666667,0.256625,0.274772,0.178848,,0.457176,...,,,,0.096546,0.068944,-0.361961,0.144338,1.0,1.0,
187 (1997),-0.5,,0.269191,0.666667,1.0,0.596644,,-0.5547,,1.0,...,,0.866025,,0.455233,-0.5,0.5,0.475327,,,


In [12]:
popular_movies = movie_ratings

In [13]:
def good(x) :
    if x > 2:
        return 1

In [14]:
def bad(x) :
    if x <= 2 and x > 0:
        return 1

In [15]:
popular_movies['Good_ratings'] = popular_movies['rating'].apply(good)
popular_movies['Bad_ratings'] = popular_movies['rating'].apply(bad)
popular_movies.fillna(0,inplace=True)

In [16]:
popular_movies.head()

Unnamed: 0,movie title,user_id,rating,Good_ratings,Bad_ratings
0,Toy Story (1995),308,4,1.0,0.0
1,Toy Story (1995),287,5,1.0,0.0
2,Toy Story (1995),148,4,1.0,0.0
3,Toy Story (1995),280,4,1.0,0.0
4,Toy Story (1995),66,3,1.0,0.0


In [17]:
most_popular_movies = popular_movies.groupby(by='movie title'
                                             ,as_index=False).agg({'user_id':'count'
                                                                   ,'Good_ratings':'sum'
                                                                   ,'Bad_ratings':'sum'})
most_popular_movies = most_popular_movies[(most_popular_movies['user_id']>100) & 
                                          (most_popular_movies['Good_ratings']> most_popular_movies['Bad_ratings'])]

In [18]:
most_popular_movies.head()

Unnamed: 0,movie title,user_id,Good_ratings,Bad_ratings
2,101 Dalmatians (1996),109,77.0,32.0
3,12 Angry Men (1957),125,124.0,1.0
7,2001: A Space Odyssey (1968),259,234.0,25.0
15,Absolute Power (1997),127,107.0,20.0
16,"Abyss, The (1989)",151,134.0,17.0


In [19]:
# predict movies that are highly correlated with another and ignore dislike movie from list
def predit_recommendation(movies) :
    recommended = pd.DataFrame(columns=['movie title','Similarity_Score'])
    for i in movies :
        if i[1] > 2 :
            corr =pd.DataFrame(corr_matrix[i[0]])
            corr.columns = ['Similarity_Score']
            corr_sims =corr.sort_values(by='Similarity_Score',ascending=False)
            corr_sims.reset_index(inplace=True)
            corr_sims.dropna(inplace=True)
            df = pd.merge(corr_sims,most_popular_movies,left_on='movie title'
                 ,right_on='movie title').sort_values(['Similarity_Score'],ascending=False).head(15)
            recommended = recommended.append(df)
            recommended.drop(recommended[recommended['movie title']==i[0]].index.values,inplace=True)
    recommended = recommended.sort_values('Similarity_Score',ascending=False)
    recommended = recommended[['movie title']]
    recommended.drop_duplicates(inplace=True)
    recommended.reset_index(inplace=True)
    recommended.drop(labels='index',inplace=True,axis=1)
    return recommended.head(10)

In [20]:
# Test case 1 : liked Contact (1997) & Ransom (1996) and disliked Muppet Treasure Island (1996)
m =[['Muppet Treasure Island (1996)', 1],['Contact (1997)', 4],['Ransom (1996)', 5]]
predit_recommendation(m)

Unnamed: 0,movie title
0,Young Guns (1988)
1,As Good As It Gets (1997)
2,Conspiracy Theory (1997)
3,My Fair Lady (1964)
4,Gattaca (1997)
5,Waterworld (1995)
6,Pretty Woman (1990)
7,"Game, The (1997)"
8,Speed (1994)
9,Stargate (1994)


In [21]:
# Test case2 :disliked Muppet Treasure Island (1996)
m =[['Muppet Treasure Island (1996)', 1]]
predit_recommendation(m)

Unnamed: 0,movie title


In [22]:
# Test case 3: liked Lion King, The (1994) and Absolute Power (1997)
m = [['Lion King, The (1994)',5],['Absolute Power (1997)',4]]
predit_recommendation(m)

Unnamed: 0,movie title
0,Murder at 1600 (1997)
1,"Ice Storm, The (1997)"
2,Volcano (1997)
3,"African Queen, The (1951)"
4,Mimic (1997)
5,Cool Hand Luke (1967)
6,Under Siege (1992)
7,Die Hard: With a Vengeance (1995)
8,"Manchurian Candidate, The (1962)"
9,Seven Years in Tibet (1997)
