In [None]:
import pandas
import numpy as np

In [None]:
class UserCF:
    def __init__(self, data):
        self.data = data
        self.norm_user_rate = data - data.mean(axis=1,keepdims=True)
        self.user_similarity_matrix = np.corrcoef(data)

    def predict(self, user_id, item_id, k=10):
        # remove users who haven't rated the item
        non_zero_users = np.nonzero(self.data[:,item_id])[0]
        # remove the user itself
        non_zero_users = non_zero_users[non_zero_users != user_id]

        sim_scores = self.user_similarity_matrix[user_id][non_zero_users]

        if len(sim_scores) == 0:
            return 0
        elif len(sim_scores) < k:
            similar_users = non_zero_users
        else:
            similar_users = non_zero_users[np.argsort(sim_scores)[-k:]]
        prediction = self.data[similar_users, item_id].dot(self.user_similarity_matrix[user_id][similar_users]) / np.sum(self.user_similarity_matrix[user_id][similar_users])
        return prediction
        

In [None]:
class ItemCF:
    def __init__(self, data):
        self.data = data
        self.norm_user_rate = data - data.mean(axis=1,keepdims=True)
        self.item_similarity_matrix = np.corrcoef(data.T)

    def predict(self, user_id, item_id, k=10):
        # remove items that the user hasn't rated
        non_zero_items = np.nonzero(self.data[user_id])[0]
        sim_scores = self.item_similarity_matrix[item_id][non_zero_items]

        if len(sim_scores) == 0:
            return 0
        elif len(sim_scores) < k:
            similar_items = non_zero_items
        else:
            similar_items = non_zero_items[np.argsort(sim_scores)[-k:]]
        prediction = self.data[user_id, similar_items].dot(self.item_similarity_matrix[item_id][similar_items]) / np.sum(self.item_similarity_matrix[item_id][similar_items])
        return prediction

In [None]:
data = pandas.read_csv('col_matrix.csv').values

In [None]:
user_cf = UserCF(data)
user_cf.predict(2, 7)

In [None]:
item_cf = ItemCF(data)
item_cf.predict(2, 7)

In [None]:
# predict [4100:, 2700:]
predictions = np.zeros((data.shape[0]-4100, data.shape[1]-2700))
for i in range(4100, data.shape[0]):
    for j in range(2700, data.shape[1]):
        predictions[i-4100, j-2700] = item_cf.predict(i, j)
np.savetxt('item_cf_predictions.csv', predictions, delimiter=',', fmt='%.2f')

In [None]:
predictions = np.zeros((data.shape[0]-4100, data.shape[1]-2700))
for i in range(4100, data.shape[0]):
    for j in range(2700, data.shape[1]):
        predictions[i-4100, j-2700] = user_cf.predict(i, j)
np.savetxt('user_cf_predictions.csv', predictions, delimiter=',', fmt='%.2f')