In [10]:
import os

import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVDpp
from surprise.model_selection import cross_validate

from sklearn.model_selection import StratifiedGroupKFold 

In [11]:
EXP_NAME = "svdpp_pred"
class configs:
    OUTPUT_DIR = os.path.join("/workspace", "working", EXP_NAME)
    INPUT_DIR = os.path.join("/workspace", "input", "atmaCup15_dataset")
    TRAIN_CSV = os.path.join(INPUT_DIR, "train_stratifiedgroupkfold.csv")
    ANIME_CSV = os.path.join(INPUT_DIR, "anime.csv")
    TEST_CSV = os.path.join(INPUT_DIR, "test.csv")
    SAMPLE_SUB_CSV = os.path.join(INPUT_DIR, "sample_submission.csv")
    SEED = 42

In [12]:
# The Reader object helps in parsing the file or dataframe
reader = Reader(rating_scale=(1, 10))

# Load the dataset
train_df = pd.read_csv('/workspace/input/atmaCup15_dataset/train.csv')

train_data = Dataset.load_from_df(train_df[['user_id', 'anime_id', 'score']], reader)

In [13]:
# Create the dataset to be used for building the filter
train_data = Dataset.load_from_df(train_df[['user_id', 'anime_id', 'score']], reader)

# Retrieve the trainset.
train_data = train_data.build_full_trainset()

# Build an algorithm, and train it.
algo = SVDpp()
_ = algo.fit(train_data)

In [14]:
# Load the test dataset
test_df = pd.read_csv('/workspace/input/atmaCup15_dataset/test.csv')
test_df['score'] = 0

# Convert the test dataset to the surprise format
test_set = Dataset.load_from_df(test_df, reader).build_full_trainset().build_testset()

# Predict ratings for the testset
predictions = algo.test(test_set)

submission = pd.read_csv('/workspace/input/atmaCup15_dataset/sample_submission.csv')

# Extract the predicted ratings and add them to the test dataframe
submission['score'] = [pred.est for pred in predictions]

sub_path = os.path.join(configs.OUTPUT_DIR, "submission.csv")
submission.to_csv(sub_path, index=False)