In [8]:
from collaborative_filtering import CF 
from demographic_filtering import DF
from perceptron import Perceptron as pct
from get_data import (
    get_users_data,
    get_rating_base_data,
    get_rating_test_data,
)
import numpy as np

In [22]:
RATE_TRAIN = get_rating_base_data().values # convert to matrix
RATE_TRAIN[:, :2] -= 1 # start from 0

RATE_TRAIN

array([[        0,         2,         4, 878542960],
       [        0,         3,         3, 876893119],
       [        0,         4,         3, 889751712],
       ...,
       [      942,      1187,         3, 888640250],
       [      942,      1227,         3, 888640275],
       [      942,      1329,         3, 888692465]])

In [5]:
# CF 
RATE_TRAIN = get_rating_base_data().values # convert to matrix
RATE_TEST = get_rating_test_data().values # convert to matrix

RATE_TRAIN[:, :2] -= 1 # start from 0
RATE_TEST[:, :2] -= 1

CF = CF(RATE_TRAIN, k=25)
CF.fit()

print('Similar Matrix Works')
print(CF.S)
print('Number of rows: ', CF.S.shape[0])
print('Number of columns: ', CF.S.shape[1])

ids = np.where(RATE_TEST[:, 0] == 0)[0].astype('int32')
real_items_1 = RATE_TEST[(np.where((RATE_TEST[:, 0] == 0) & (RATE_TEST[:, 2] >= 3)))]
predicted_items = []

for row in RATE_TEST[ids, :]:
    predicted_rating = CF.pred(0, row[1])
    if predicted_rating >= 3:
        predicted_items.append(row[1])

print('Items which user 1 actually like: ', real_items_1[:, 1])
print('Items in prediction which user 1 might like ', predicted_items)


n_test = RATE_TEST.shape[0]
correct_items_count = 0
real_items_user_like_count = len(np.where(RATE_TEST[:, 2] >= 3)[0].astype(np.int32))

user_id = 0
while user_id < CF.n_users:
    ids = np.where(RATE_TEST[:, 0] == user_id)[0].astype('int32')
    real_items = RATE_TEST[(np.where((RATE_TEST[:, 0] == user_id) & (RATE_TEST[:, 2] >= 3)))]
    for row in RATE_TEST[ids, :]:
        predicted_rating = CF.pred(user_id, row[1])
        if predicted_rating >= 3 and row[1] in real_items:
            correct_items_count += 1
    user_id += 1
accuracy = correct_items_count/real_items_user_like_count
print('The accuracy of Collaborative Filtering: {0}/{1} = {2}'.format(correct_items_count, real_items_user_like_count, accuracy))

Similar Matrix Works
[[ 1.   -0.08  0.01 ... -0.08  0.03 -0.06]
 [-0.08  1.    0.05 ... -0.11  0.    0.22]
 [ 0.01  0.05  1.   ... -0.01 -0.04  0.05]
 ...
 [-0.08 -0.11 -0.01 ...  1.    0.   -0.18]
 [ 0.03  0.   -0.04 ...  0.    1.    0.  ]
 [-0.06  0.22  0.05 ... -0.18  0.    1.  ]]
Number of rows:  943
Number of columns:  943
Items which user 1 actually like:  [  0   1   8  21  25  29  31  37  39  67  74  88  92  98 118 132 134 135
 140 143 145 146 157 165 172 175 178 180 186 193 196 204 210 219 233 238
 245 248 255 256 267 268 269]
Items in prediction which user 1 might like  [0, 1, 7, 8, 20, 21, 25, 29, 31, 33, 36, 37, 39, 62, 67, 74, 88, 92, 98, 104, 118, 130, 132, 134, 135, 137, 140, 143, 145, 146, 157, 165, 172, 175, 178, 180, 186, 193, 196, 204, 210, 219, 233, 236, 238, 245, 248, 255, 256, 267, 268, 269, 270]
The accuracy of Collaborative Filtering: 14963/16558 = 0.903671941055683


In [9]:
# DF
USERS = get_users_data()
DF = DF(USERS, RATE_TRAIN, 25)
DF.fit()

print("Ma trận tương đồng thông tin")
print(DF.similarities)
print("Số hàng của ma trận:", DF.similarities.shape[0])
print("Số cột của ma trận: ", DF.similarities.shape[1])

ids = np.where(RATE_TEST[:, 0] == 0)[0].astype("int32")
real_items_1 = RATE_TEST[(np.where((RATE_TEST[:, 0] == 0) & (RATE_TEST[:, 2] >= 3)))]
correct_predicted_items = []

for row in RATE_TEST[ids, :]:
    predicted_rating = DF.pred(0, row[1])
    if predicted_rating >= 3 and row[1] in real_items_1:
        correct_predicted_items.append(row[1])


print("Những items user 1 thật sự thích       : ", real_items_1[:, 1])
print("Những items user 1 được dự đoán thích  : ", correct_predicted_items)

n_test = RATE_TEST.shape[0]
correct_items_count = 0
real_items_user_like_count = len(np.where(RATE_TEST[:, 2] >= 3)[0].astype(np.int32))

user_id = 0
while user_id < DF.n_users:
    ids = np.where(RATE_TEST[:, 0] == user_id)[0].astype("int32")
    real_items = RATE_TEST[(np.where((RATE_TEST[:, 0] == user_id) & (RATE_TEST[:, 2] >= 3)))]
    for row in RATE_TEST[ids, :]:
        predicted_rating = DF.pred(user_id, row[1])
        if predicted_rating >= 3 and row[1] in real_items:
            correct_items_count = correct_items_count + 1
    user_id = user_id + 1

print("Độ chính xác của Demographic Filtering :", correct_items_count / real_items_user_like_count)

Ma trận tương đồng thông tin
[[1.         0.         0.66666667 ... 0.66666667 0.         0.66666667]
 [0.         1.         0.         ... 0.         0.33333333 0.        ]
 [0.66666667 0.         1.         ... 0.66666667 0.         0.66666667]
 ...
 [0.66666667 0.         0.66666667 ... 1.         0.         1.        ]
 [0.         0.33333333 0.         ... 0.         1.         0.        ]
 [0.66666667 0.         0.66666667 ... 1.         0.         1.        ]]
Số hàng của ma trận: 943
Số cột của ma trận:  943
Những items user 1 thật sự thích       :  [  0   1   8  21  25  29  31  37  39  67  74  88  92  98 118 132 134 135
 140 143 145 146 157 165 172 175 178 180 186 193 196 204 210 219 233 238
 245 248 255 256 267 268 269]
Những items user 1 được dự đoán thích  :  [0, 1, 8, 21, 25, 29, 31, 37, 67, 74, 88, 92, 98, 118, 132, 134, 135, 140, 143, 145, 146, 157, 165, 172, 175, 178, 180, 186, 193, 196, 204, 210, 219, 233, 238, 245, 248, 255, 256, 267, 268, 269]
Độ chính xác của Demog

In [None]:
# Perceptron
ids = np.where(RATE_TEST[:, 0] == 0)[0].astype("int32")

MATRIX_DF = []
MATRIX_CF = []

for row in RATE_TEST[ids, :]:
    p_cf = CF.pred(0, row[1])
    p_df = DF.pred(u=0, i=row[2])
    MATRIX_CF.append([0, row[1], p_cf])
    MATRIX_DF.append([0, row[1], p_df])
MATRIX_CF = np.asarray(MATRIX_CF)
MATRIX_DF = np.asarray(MATRIX_DF)

CF_predicted = np.asanyarray(MATRIX_CF[:, 2])
DF_predicted = MATRIX_DF[:, 2]
true_rating = RATE_TEST[ids, 2]

dataset = np.c_[CF_predicted, DF_predicted, true_rating]

# print("Ma trận dự đoán đánh giá CF, DF, True Rating")
# print(dataset)

PLA = Perceptron(dataset, 0.003, len(ids))
PLA.fit()
predicted_ratings_pla = PLA.predict()

print("Dự đoán đánh giá sau khi được điều chỉnh")
print(np.round(predicted_ratings_pla, 3))
