### Singular Value Decomposition and Classification

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import TruncatedSVD
import seaborn as sb

In [3]:
columns=['userid','item_id','rating','timestamp']
frame=pd.read_csv('ml-100k/u.data',sep='\t',names=columns)
frame.head()

Unnamed: 0,userid,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [19]:
columns=['item_id','movie title','release date','video release date','IMDB URL','unknown','Action','Adventure','Animation'
       ,'Children','Comedy','Crime','Documentary','Drama','Fantasy','Film-Noir','Horror','Musical','Mystery','Romance','Sci-Fi'
       ,'Thriller','War','Western']
movies=pd.read_csv('ml-100k/u.item',sep='|',names=columns,encoding='latin-1')
movies.head(50)

Unnamed: 0,item_id,movie title,release date,video release date,IMDB URL,unknown,Action,Adventure,Animation,Children,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
5,6,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,01-Jan-1995,,http://us.imdb.com/Title?Yao+a+yao+yao+dao+wai...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,7,Twelve Monkeys (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Twelve%20Monk...,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
7,8,Babe (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Babe%20(1995),0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
8,9,Dead Man Walking (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Dead%20Man%20...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,10,Richard III (1995),22-Jan-1996,,http://us.imdb.com/M/title-exact?Richard%20III...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [7]:
#number of records present
len(movies)

1682

In [9]:
combined_movies_data=pd.merge(frame,movies ,on='item_id')
combined_movies_data.head()

Unnamed: 0,userid,item_id,rating,timestamp,movie title,release date,video release date,IMDB URL,unknown,Action,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,196,242,3,881250949,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0
1,63,242,3,875747190,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0
2,226,242,5,883888671,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0
3,154,242,3,879138235,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0
4,306,242,5,876503793,Kolya (1996),24-Jan-1997,,http://us.imdb.com/M/title-exact?Kolya%20(1996),0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
combined_movies_data.groupby('item_id')['rating'].count().sort_values(ascending=False).head()

item_id
50     583
258    509
100    508
181    507
294    485
Name: rating, dtype: int64

In [18]:
pd.set_option('display.max_rows',100)

0       http://us.imdb.com/M/title-exact?Toy%20Story%2...
1       http://us.imdb.com/M/title-exact?GoldenEye%20(...
2       http://us.imdb.com/M/title-exact?Four%20Rooms%...
3       http://us.imdb.com/M/title-exact?Get%20Shorty%...
4       http://us.imdb.com/M/title-exact?Copycat%20(1995)
                              ...                        
1677    http://us.imdb.com/M/title-exact?Mat%27+i+syn+...
1678    http://us.imdb.com/M/title-exact?B%2E+Monkey+(...
1679        http://us.imdb.com/Title?Sliding+Doors+(1998)
1680    http://us.imdb.com/M/title-exact?You%20So%20Cr...
1681    http://us.imdb.com/M/title-exact?Schrei%20aus%...
Name: IMDB URL, Length: 1682, dtype: object

In [23]:
Filter=combined_movies_data['item_id']==50
combined_movies_data[Filter]['movie title'].unique()

array(['Star Wars (1977)'], dtype=object)

In [24]:
#building a uitility matrix
rating_cross_tab=combined_movies_data.pivot_table(values='rating',index='userid',columns='movie title',fill_value=0)
rating_cross_tab

movie title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,2,5,0,0,3,4,0,0,...,0,0,0,5,3,0,0,0,4,0
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,2,0,0,0,0,4,0,0,...,0,0,0,4,0,0,0,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
940,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
941,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
942,0,0,0,0,0,0,0,3,0,3,...,0,0,0,0,0,0,0,0,0,0


In [29]:
#creating a Transpose of the matrix
x=rating_cross_tab.values.T
x.shape

(1664, 943)

In [36]:
svd=TruncatedSVD(n_components=12,random_state=17)
resultant_matrix=svd.fit_transform(x)
resultant_matrix.shape

(1664, 12)

### generating co-relation matrix

In [34]:
corr_mat=np.corrcoef(resultant_matrix)
corr_mat.shape

(1664, 1664)

In [46]:
movies_names=rating_cross_tab.columns
movies_list=list(movies_names)
star_war=movies_list.index('Star Wars (1977)')

(1664,)

In [45]:
corr_star_wars=corr_mat[star_war]
corr_star_wars

array([0.36854685, 0.42380505, 0.59468319, ..., 0.33301726, 0.64177278,
       0.26805079])

### recommending movies that are co-related

In [65]:

arr=list(movies_names[(corr_star_wars<1.0) & (corr_star_wars >0.9)])
arr

['Die Hard (1988)',
 'Empire Strikes Back, The (1980)',
 'Fugitive, The (1993)',
 'Raiders of the Lost Ark (1981)',
 'Return of the Jedi (1983)',
 'Terminator 2: Judgment Day (1991)',
 'Terminator, The (1984)',
 'Toy Story (1995)']

In [66]:
arr=list(movies_names[(corr_star_wars<1.0) & (corr_star_wars >0.95)])
arr

['Return of the Jedi (1983)']