

*   Kernel Net is based on https://proceedings.mlr.press/v80/muller18a.html
*   .py files are taken and adapted from https://github.com/lorenzMuller/kernelNet_MovieLens

* this notebook was made to be ran on google collab 



In [None]:
%tensorflow_version 1.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')
else:
  print("GPU found")

import pandas as pd
import numpy as np
import os 

In [2]:
def extract_users_items_predictions(data_pd):
    users, movies = \
        [np.squeeze(arr) for arr in np.split(data_pd.Id.str.extract('r(\d+)_c(\d+)').values.astype(int) - 1, 2, axis=-1)]
    predictions = data_pd.Prediction.values
    return users, movies, predictions

**specify data directory:** </br>
*In order to use the notebook one simply has to create a folder, specificy the path to it and put the .py files of the kernelNet folder in the specified directory.*

In [3]:
#specify data directory and make sure required files are in the directory, despite of this nothing else has to be done to run the notebook
DATA_DIR = '/content/kernelNet'

assert os.path.isdir(DATA_DIR), "directory does not exist"
assert os.path.exists(os.path.join(DATA_DIR, "kernelNet_ml1m.py")), "upload kernelNet_ml1m.py to %s " %(str(DATA_DIR))
assert os.path.exists(os.path.join(DATA_DIR, "dataLoader.py")), "upload dataLoader.py to %s " %(str(DATA_DIR))

In [None]:
#download training data and sample predictions 
!pip install kaggle

!mkdir ~/.kaggle

import json

kaggle_username = "yuvalnis" #@param {type:"string"}
kaggle_api_key = "1800d5a286834f0416c338c7bd7f6dee" #@param {type:"string"}

assert len(kaggle_username) > 0 and len(kaggle_api_key) > 0

api_token = {"username": kaggle_username,"key": kaggle_api_key}

with open('kaggle.json', 'w') as file:
    json.dump(api_token, file)

!mv kaggle.json ~/.kaggle/kaggle.json

!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c cil-collaborative-filtering-2022

!unzip -n cil-collaborative-filtering-2022.zip

os.rename("data_train.csv", os.path.join(DATA_DIR,"data_train.csv"))
os.rename("sampleSubmission.csv", os.path.join(DATA_DIR,"sampleSubmission.csv"))

!rm cil-collaborative-filtering-2022.zip

In [5]:
x_train_pd = pd.read_csv(os.path.join(DATA_DIR, "data_train.csv"))

In [6]:
users,movies,predictions = extract_users_items_predictions(x_train_pd)
ratings_dict = {'userID': users,'movieID': movies,'rating': predictions}
df_train = pd.DataFrame(ratings_dict) 
np.savetxt(os.path.join(DATA_DIR, "ratings.dat"), df_train.values, delimiter='::', fmt='%s',encoding='utf-8')

In [7]:
#load entries to predict 
to_predict_pd = pd.read_csv(os.path.join(DATA_DIR, "sampleSubmission.csv"))
pred_users,pred_movies,pred_predictions = extract_users_items_predictions(to_predict_pd)
to_predict_dict = {'userID': pred_users,'movieID': pred_movies,'rating': pred_predictions}
df_predict = pd.DataFrame(to_predict_dict) 
np.savetxt(os.path.join(DATA_DIR, "to_predict.dat"), df_predict.values, delimiter='::', fmt='%s',encoding='utf-8')

# Training and prediction
* input parameters: -file to execute -data directory -regularization 1 (default 60) -regularization 2 (default 0.013)
* executing the file performs both training and prediction.
* For the prediction the number of epochs with the lowest validation error is used

In [None]:
file_to_execute = os.path.join(DATA_DIR,"kernelNet_ml1m.py")
!python $file_to_execute $DATA_DIR

In [9]:
# i can throw  0'th column of raw predictions away 
raw_predictions = pd.read_csv(os.path.join(DATA_DIR, "raw_predictions.csv"))

In [None]:
output = to_predict_pd.to_numpy()
final_predictions = raw_predictions.to_numpy()
for id,user in enumerate(pred_users):
  prediction = final_predictions[pred_movies[id]][user+1]
  output[id][1] = prediction 
print(output)

In [None]:
submission_df = pd.DataFrame(output, columns = ['Id', 'Prediction'])
print(submission_df)
try:
    os.makedirs(os.path.join(DATA_DIR,"final_predictions"))
except FileExistsError:
    # directory already exists
    pass
submission_df.to_csv(os.path.join(DATA_DIR,"final_predictions/kernelNet.csv"),index = False)