# Game RecSys: Baseline Models
In this notebook I develop baseline models for the recommender system

In [1]:
import numpy as np
import pandas as pd
from surprise import Reader, Dataset, SVD

## Load and format the data
### Training data

In [2]:
X_train = pd.read_csv("train_test_split/X_train_3k.csv")
X_train = X_train.set_index("Unnamed: 0")
y_train = pd.read_csv("train_test_split/y_train_3k.csv")
y_train = y_train.set_index("Unnamed: 0")
# join and reset index
train_df = pd.merge(X_train, y_train, left_index=True, right_index=True, validate="1:1")
train_df = train_df.reset_index(drop=True)

### Test data

In [3]:
X_test = pd.read_csv("train_test_split/X_test_3k.csv")
X_test = X_test.set_index("Unnamed: 0")
y_test = pd.read_csv("train_test_split/y_test_3k.csv")
y_test = y_test.set_index("Unnamed: 0")
# join and reset index
test_df = pd.merge(X_test, y_test, left_index=True, right_index=True, validate="1:1")
test_df = test_df.reset_index(drop=True)

In [4]:
test_df

Unnamed: 0,funny,helpful,item_id,last_edited,posted,review,user_id,user_url,recommend
0,,No ratings yet,386360,,Posted February 26.,"Pfft, everyone is waiting for jesus, i am wati...",sickbubblez,http://steamcommunity.com/id/sickbubblez,1
1,,2 of 2 people (100%) found this review helpful,4000,"Last edited September 30, 2015.","Posted June 20, 2013.","great if you like building role playing , bein...",GetALifeStopLookingAtMyUrl,http://steamcommunity.com/id/GetALifeStopLooki...,1
2,,No ratings yet,1250,,"Posted May 25, 2015.",Best zombie game on PC by far in my opinion!,kineticvine,http://steamcommunity.com/id/kineticvine,1
3,,No ratings yet,200210,,"Posted January 10, 2014.",guooo me envicio muchooooooooooooooooo ahahaha...,LeoNoHomo,http://steamcommunity.com/id/LeoNoHomo,1
4,,No ratings yet,4000,,"Posted February 1, 2014.",i give it a crash/10,itsdandytime,http://steamcommunity.com/id/itsdandytime,1
5,,0 of 1 people (0%) found this review helpful,377160,Last edited July 20.,Posted July 20.,what to do with this game: download mods,76561198134824579,http://steamcommunity.com/profiles/76561198134...,1
6,,1 of 1 people (100%) found this review helpful,261980,,"Posted December 4, 2013.",Confusion & program crashes,Gemini_Tiw,http://steamcommunity.com/id/Gemini_Tiw,0
7,1 person found this review funny,3 of 5 people (60%) found this review helpful,730,,"Posted December 21, 2013.",Buy....,76561198066969630,http://steamcommunity.com/profiles/76561198066...,1
8,,No ratings yet,34330,,"Posted July 23, 2014.",The height of Total War glory and creation. Gr...,76561198064162552,http://steamcommunity.com/profiles/76561198064...,1
9,,No ratings yet,271290,,"Posted February 15, 2014.",If you played MechWarrior or still do you are ...,u2late,http://steamcommunity.com/id/u2late,1


## Test a baseline recommender
Uses static user-game baseline ratings to predict new game ratings

In [17]:
from surprise import BaselineOnly, accuracy
from surprise.model_selection import PredefinedKFold

In [22]:
# need to write files first in order for surprise to read them properly
train_df[["user_id", "item_id", "recommend"]].to_csv("train_test_split/surprise_train.csv",index=False)
test_df[["user_id", "item_id", "recommend"]].to_csv("train_test_split/surprise_test.csv", index=False)

In [23]:
# create a reader object
reader = Reader(rating_scale=(0, 1), sep=",", skip_lines=1)

folds_files = [("train_test_split/surprise_train.csv", "train_test_split/surprise_test.csv")]

data = Dataset.load_from_folds(folds_files, reader=reader)
pkf = PredefinedKFold()

baseline = BaselineOnly()

for trainset, testset in pkf.split(data):

    # train and test algorithm.
    baseline.fit(trainset)
    predictions = baseline.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.mse(predictions, verbose=True)

Estimating biases using als...
MSE: 0.0869


## Test an SVD algorithm
Uses user/game latent factors to predict new game ratings

In [27]:
svd = SVD(n_factors=10)

for trainset, testset in pkf.split(data):

    # train and test algorithm.
    svd.fit(trainset)
    predictions = svd.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.mse(predictions, verbose=True)

MSE: 0.0862
