In [0]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /gdrive
/gdrive


In [0]:
cd My\ Drive

/gdrive/My Drive


In [0]:
cd Colab\ Notebooks

/gdrive/My Drive/Colab Notebooks


In [0]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse

class CF(object):
    """docstring for CF"""
    def __init__(self, Y_data, k, dist_func = cosine_similarity, uuCF = 1):
      
      self.uuCF = uuCF # user-user (1) or item-item (0) CF
      self.Y_data = Y_data if uuCF else Y_data[:, [1, 0, 2]]
      self.k = k # number of neighbor points
      self.dist_func = dist_func
      self.Ybar_data = None
        # number of users and items. Remember to add 1 since id starts from 0
      self.n_users = int(np.max(self.Y_data[:, 0])) + 1 
      self.n_items = int(np.max(self.Y_data[:, 1])) + 1
      
    def add(self, new_data):
      self.Y_data = np.concatenate((self.Y_data, new_data), axis = 0)
      
    def normalize_Y(self):
      users = self.Y_data[:, 0]
      self.Ybar_data = self.Y_data.copy()
      self.mu = np.zeros((self.n_users,))
      for n in range(self.n_users):
        indexs = np.where(users == n)[0].astype(np.int32)
        item_ids = self.Y_data[indexs, 1]
        ratings = self.Y_data[indexs, 2]
        m = np.mean(ratings)
        if np.isnan(m):
          m =0
        self.mu[n] = m
        self.Ybar_data[indexs, 2] = ratings - self.mu[n]
        
      self.Ybar = sparse.coo_matrix((self.Ybar_data[:, 2], (self.Ybar_data[:, 1], self.Ybar_data[:, 0])), (self.n_items, self.n_users))
      self.Ybar = self.Ybar.tocsr()
      
    def similarity(self):
      self.S = self.dist_func(self.Ybar.T, self.Ybar.T)
      
    def refresh(self):
      self.normalize_Y()
      self.similarity()
      
    def fit(self):
      self.refresh()
      
    def __pred(self, u, i, normalized=1):
      # find all users who rated i
      indexs = np.where(self.Y_data[:, 1] == i)[0].astype(np.int32)
      users_rated_i = (self.Y_data[indexs, 0]).astype(np.int32)
      sim = self.S[u, users_rated_i]
      # find the k most similarity users
      a = np.argsort(sim)[-self.k:]
      nearest_s = sim[a]
      r = self.Ybar[i, users_rated_i[a]]
      if normalized:
        return (r*nearest_s)[0]/(np.abs(nearest_s).sum() + 1e-8)
      return (r*nearest_s)[0]/(np.abs(nearest_s).sum() + 1e-8) + self.mu[u]
    
    def pred(self, u, i, normalized=1):
      if self.uuCF: return self.__pred(u, i, normalized)
      return self.__pred(i, u, normalized)
    
    def recommend(self, u):
      indexs = np.where(self.Y_data[:, 0] == u)[0]
      items_rated_by_u = self.Y_data[indexs, 1].tolist()
      recommended_items = []
      
      for i in range(self.n_items):
        if i not in  items_rated_by_u:
          rating = self.__pred(u, i)
          if rating > 0:
            recommended_items.append(i)
      return recommended_items
    
    def print_recommendation(self):
      print('Recommendations: ')
      for u in range(self.n_users):
        recommended_items = self.recommend(u)
        if self.uuCF:
          print('    Recommend item(s):', recommended_items, 'for user', u)
        else:
          print('    Recommend item', u, 'for user(s): ', recommended_items)
    def print_error(self):
      users = self.Y_data[:, 0]
      self.Ybar_data = self.Y_data.copy()
      self.mu = np.zeros((self.n_users,))
      for n in range(self.n_users):
        indexs = np.where(users == n)[0].astype(np.int32)
        item_ids = Y_data[indexs, 1]
        ratings = Y_data[indexs, 2]
        #m = np.mean(ratings)
        print(n)
        print(len(ratings))

In [0]:
# data

r_cols = ['user_id', 'item_id', 'rating']
ratings = pd.read_csv('ex.dat', sep=' ', names=r_cols, encoding='latin-1')
#print(ratings)
Y_data = ratings.as_matrix()
#Y_data_matrix = [i.split(' ') for i in Y_data[:, 0]]
#Y_data_matrix = np.asarray(Y_data_matrix).astype(np.float32)
print(Y_data)

[[0. 0. 5.]
 [0. 1. 4.]
 [0. 3. 2.]
 [0. 4. 2.]
 [1. 0. 5.]
 [1. 2. 4.]
 [1. 3. 2.]
 [1. 4. 0.]
 [2. 0. 2.]
 [2. 2. 1.]
 [2. 3. 3.]
 [2. 4. 4.]
 [3. 0. 0.]
 [3. 1. 0.]
 [3. 3. 4.]
 [4. 0. 1.]
 [4. 3. 4.]
 [5. 1. 2.]
 [5. 2. 1.]
 [6. 2. 1.]
 [6. 3. 4.]
 [6. 4. 5.]]


  """


In [0]:
rs = CF(Y_data, k=2, uuCF=1)
rs.fit()
rs.print_recommendation()

Recommendations: 
    Recommend item(s): [2] for user 0
    Recommend item(s): [1] for user 1
    Recommend item(s): [] for user 2
    Recommend item(s): [4] for user 3
    Recommend item(s): [4] for user 4
    Recommend item(s): [0, 3, 4] for user 5
    Recommend item(s): [1] for user 6


In [0]:
rs = CF(Y_data, k = 2, uuCF = 0)
rs.fit()
rs.print_recommendation()

Recommendations: 
    Recommend item 0 for user(s):  []
    Recommend item 1 for user(s):  [1]
    Recommend item 2 for user(s):  [0]
    Recommend item 3 for user(s):  [5]
    Recommend item 4 for user(s):  [3, 4, 5]


# ***MovieLens***

In [0]:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']

rating_base = pd.read_csv('ml-100k/ub.base', sep = '\t', names=r_cols, encoding='latin-1')
rating_test = pd.read_csv('ml-100k/ub.test', sep = '\t', names=r_cols, encoding='latin-1')

rate_train = rating_base.as_matrix()
#rate_train_matrix = [i.split('\t') for i in rate_train[:, 0]]
#rate_train_matrix = np.asarray(rate_train_matrix, dtype=np.float32)[:, [0, 1, 2]]
#print(rate_train_matrix)

rate_test = rating_test.as_matrix()
#rate_test_matrix = [i.split('\t') for i in rate_test[:, 0]]
#rate_test_matrix = np.asarray(rate_test_matrix, dtype=np.int32)[:, [0, 1, 2]]
#print(rate_test_matrix.shape)

# indices start from 0
rate_train[:, :2] -= 1
rate_test[:, :2] -= 1


  
  # This is added back by InteractiveShellApp.init_path()


In [0]:
rs = CF(rate_train, k = 30, uuCF = 1)
rs.fit()

n_tests = rate_test.shape[0]
SE = 0
for n in range(n_tests):
  pred = rs.pred(rate_test[n, 0], rate_test[n, 1], normalized = 0)
  SE += (pred - rate_test[n, 2])**2
  
RMSE = np.sqrt(SE/n_tests)
print("User-user CF, RMSE = ", RMSE)

User-user CF, RMSE =  0.9951981100882598


In [0]:
rs = CF(rate_train, k = 30, uuCF = 0)
rs.fit()

n_tests = rate_test.shape[0]
SE = 0 # squared error
for n in range(n_tests):
    pred = rs.pred(rate_test[n, 0], rate_test[n, 1], normalized = 0)
    SE += (pred - rate_test[n, 2])**2 

RMSE = np.sqrt(SE/n_tests)
print( 'Item-item CF, RMSE =', RMSE)


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Item-item CF, RMSE = 0.9867912132705384
