# Assignment 1


In [4]:
import pandas as pd
from sklearn.model_selection import KFold

In [5]:
# loading data
movies = pd.read_csv(
    'movies.dat',
    sep = "::",
    names = ['MovieID', 'Title', 'Genres'],
    encoding='latin-1',
    engine='python',
)

ratings = pd.read_csv(
    'ratings.dat',
    sep = "::",
    names = ['UserID','MovieID','Rating','Timestamp'],
    encoding='latin-1',
    engine='python',
)

users = pd.read_csv(
    'users.dat',
    sep = "::",
    names = ['UserID', 'Gender', 'Age','Occupation','Zip-code'],
    encoding='latin-1',
    engine='python',
)
display(movies.head())
display(users.head())
ratings.head()

Unnamed: 0,MovieID,Title,Genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


Unnamed: 0,UserID,Gender,Age,Occupation,Zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


## 1. Recommender System

## 1.1 Naive Approaches

## 1.2 UV Matrix Decomposition

Notes:
To increase our chances of finding the global minimum, we need to pick many dif-
ferent starting points, that is, different choices of the initial matrices U and V .
However, there is never a guarantee that our best local minimum will be the
global minimum.


In [6]:
import numpy as np


In [7]:
# creating matrix M
Utility_DF = ratings.pivot(index = 'UserID', columns ='MovieID', values = 'Rating')
M = Utility_DF.to_numpy()
M.shape
B = M - np.nanmean(M,axis=1, keepdims=True)
print(M)
print(B)


[[ 5. nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [ 3. nan nan ... nan nan nan]]
[[ 0.81132075         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 ...
 [        nan         nan         nan ...         nan         nan
          nan]
 [        nan         nan         nan ...         nan         nan
          nan]
 [-0.57771261         nan         nan ...         nan         nan
          nan]]


In [50]:
class Matrix_Decomposition:
    def __init__(self, matrix, dimensions = 5, iterations = 1):
        self.matrix = matrix
        self.n = self.matrix.shape[0]
        self.m = self.matrix.shape[1]
        self.d = dimensions
        self.normalized = self.matrix - self.matrix.mean(axis=1, keepdims=True)
        self.U = np.random.rand(self.n,self.d)
        self.V = np.random.rand(self.d,self.m)
        self.iter = iterations


        
    def rmse(self):
        e = 0.
        c = 0.
        UV = np.dot(self.U,self.V)
        for i in range(0,self.n):
            for j in np.where(~np.isnan(self.normalized[i])):
                e += (self.normalized[i,j]-UV[i,j])**2
                c += 1
                return np.sqrt(e/c)





    def UV(self):
        err = []
        Us =dict()
        Vs = dict()

        # number of ittertaions 
        for i in range(self.iter):
            # decompose User matrix
            for r in range(0,self.n):
                for s in range(0,self.d):
                    m = np.array(self.normalized[r,:])
                    v = np.array(self.V[s,:])
                    v[np.isnan(m)] = np.nan
                    
                    # formula page 334
                    #self.U[r,s]=float(np.nansum(self.V[s,:]*(m-np.matmul(self.normalized[r,:],self.V[:])-(self.U[r,s]*self.V[s,:]))))/np.nansum(np.square(v))
                    self.U[r,s]=float(np.nansum(self.V[s,:] * (m-np.nansum(self.U[r,s]*self.V[s,:]))))/np.nansum(np.square(v))
            
            # decompose item matrix
            for s in range(0,self.m):
                for r in range(0,self.d):
                    m = np.array(self.normalized[:,s])
                    u = np.array(self.U[:,r])
                    u[np.isnan(m)] = np.nan
                    
                    # formula page 334
                    #self.V[r,s]=float(np.nansum(self.U[:, r] * (m - np.matmul(self.U[:], self.V[:,s]) - (self.V[r, s] * self.U[:, r]))))/np.nansum(np.square(u))
                    self.V[r,s]=float(np.nansum(self.U[:, r] * (m - np.nansum(self.V[r, s] * self.U[:, r]))))/np.nansum(np.square(u))




            err.append(self.rmse())
            Us[i] = self.U
            Vs[i] = self.V
        return self.U, self.V, err


In [51]:
model_matrix = Matrix_Decomposition(M)

In [52]:
model_matrix.UV()

  self.U[r,s]=float(np.nansum(self.V[s,:] * (m-np.nansum(self.U[r,s]*self.V[s,:]))))/np.nansum(np.square(v))
  self.V[r,s]=float(np.nansum(self.U[:, r] * (m - np.nansum(self.V[r, s] * self.U[:, r]))))/np.nansum(np.square(u))


(array([[nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan],
        ...,
        [nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan]]),
 array([[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]]),
 [array([], dtype=float64)])

In [15]:
n = B.shape[0]
m = B.shape[1]
U = np.random.rand(n,5)
V = np.random.rand(5,m)
for r in range(0,n):
    for s in range(0,5):
        m = np.array(B[r,:])
        v = np.array(V[s,:])
        v[np.isnan(m)] = np.nan
        
        # formula page 334
        #self.U[r,s]=float(np.nansum(self.V[s,:]*(m-np.matmul(self.normalized[r,:],self.V[:])-(self.U[r,s]*self.V[s,:]))))/np.nansum(np.square(v))
        U[r,s]=float(np.nansum(V[s,:]*(m-np.nansum(U[r,s]*V[s,:]))))/np.nansum(np.square(v))



In [49]:
B[4].shape[0]

3706