In [1]:
import numpy as np
from numba import njit, prange
import pandas as pd

from tqdm.notebook import tqdm

In [2]:
users = 3974
movies = 3564

train_csv = pd.read_csv("../../data/train_data.csv")
test_csv = pd.read_csv("../../data/test_data.csv")

train_csv["user_id"] = train_csv["user_id"].apply(lambda x: x - 1)
train_csv["movie_id"] = train_csv["movie_id"].apply(lambda x: x - 1)

test_csv["user_id"] = test_csv["user_id"].apply(lambda x: x - 1)
test_csv["movie_id"] = test_csv["movie_id"].apply(lambda x: x - 1)

train_set = train_csv.sample(frac=.8, axis=0)
val_set = train_csv.drop(train_set.index, axis=0)

assert train_csv.shape[0] == train_set.shape[0] + val_set.shape[0]

In [3]:
train_uir = train_set.values[:, :-1]
val_uir = val_set.values[:, :-1]

total_uir = train_csv.values[:, :-1]
test_ui = test_csv.values[:, 1:-1]

## Load FBC requirements

In [4]:
from fbc_funcs import generate_ratings_mat, predict_batch_fbc, rmse

In [5]:
sims_mat = np.load("combined_sims_mat.npy")
train_ratings_mat = generate_ratings_mat(train_uir, users, movies)
total_ratings_mat = generate_ratings_mat(total_uir, users, movies)

fbc_k = 88

## Load SVD requirements

In [6]:
from svd_funcs import fit_funk_svd, predict_batch_svd

In [7]:
svd_k = 3
svd_params_compressed = np.load("svd_params_alpha_.006.npy.npz")

In [8]:
μ, bu, bi, P, Q = (
    svd_params_compressed["μ"], 
    svd_params_compressed["bu"], 
    svd_params_compressed["bi"], 
    svd_params_compressed["P"], 
    svd_params_compressed["Q"]
)
svd_params = (μ, bu, bi, P, Q)

## Predict val set both models

In [9]:
fbc_val = predict_batch_fbc(val_uir[:, :-1], sims_mat, train_ratings_mat, fbc_k)

In [10]:
svd_val = predict_batch_svd(val_uir[:, :-1], svd_params)

## Combine both predictions

In [14]:
β = 0.5

comb_val = fbc_val*β + svd_val*(1-β)
print(rmse(comb_val, val_uir[:, -1]))

0.8616475379025268


## Predict test

In [15]:
fbc_test = predict_batch_fbc(test_ui, sims_mat, total_ratings_mat, fbc_k)
svd_test = predict_batch_svd(test_ui, svd_params)

In [16]:
comb_test = fbc_test*β + svd_test*(1-β)

In [17]:
out_df = pd.DataFrame.from_dict(
    {
        "id": list(test_csv["id"]),
        "rating": comb_test
    }
)

out_df.head()

Unnamed: 0,id,rating
0,0,3.513224
1,1,3.4436
2,2,3.371422
3,3,3.6953
4,4,3.35776


In [64]:
# out_df.to_csv("out_svd_.006.csv", index=False)