In [440]:
import pandas as pd
import numpy as np
import time
import os.path as path
import os, io
from ast import literal_eval
from itertools import product
import matplotlib.pyplot as plt

from surprise import BaselineOnly
from surprise import Dataset
from surprise import Reader
from surprise.model_selection.split import train_test_split
from surprise.model_selection import cross_validate, GridSearchCV
from surprise import KNNBasic, KNNWithMeans
from surprise import SVDpp
from surprise import SVD
from surprise import accuracy

## read in preprecessed files

In [441]:

ratings_df  = pd.read_csv('../processed_data/ratings.csv')
games = pd.read_csv("../processed_data/games_metadata.csv")
all_games = pd.read_csv("../processed_data/all_games_id_name_pair.csv")
popularity_df  = pd.read_csv('../processed_data/popularity.csv')

## KNN model

In [442]:
reader = Reader(rating_scale=(1,5))  #invoke reader instance of surprise library
data=Dataset.load_from_df(ratings_df,reader) #load dataset into Surprise datastructure Dataset

### Fit the model using full data

In [443]:
# build training data on full dataset.
trainset = data.build_full_trainset()

In [444]:
## Best parameter for KNN model 

sim_options = {
    'name': 'pearson', #similarity measure default is MSD
    'user_based': False, #user-based CF
    'min_support':25
}

In [445]:
# fit KNN model
algo = KNNBasic(sim_options=sim_options,k=10) # number of neighbours = 45
algo.fit(trainset) #fit model to the training set

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x18c5e46d0>

In [446]:
# Get list of item ID
item_id_list = ratings_df["item_id"].unique()

### Recommend using KNN model

In [447]:
from collections import defaultdict

known_items = user_item_df[user_item_df.user_id == user_id].sort_values(by="rating", ascending=False).item_id.unique()
def knnRecommendation(userId, topN = 3):
    userRatingList = defaultdict(list)
    for itemId in item_id_list:
        if itemId not in known_items:
            predicted = algo.predict(userId, itemId)
            userRatingList[uid].append((itemId,predicted[3]))
    userRatingList[uid].sort(key = lambda x:x[1], reverse = True)
    
    return userRatingList[uid][:topN]

## Hybrid Model

In [448]:
# pip install lightfm
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

In [449]:
# Build game features list
def build_game_features_list(item_features_cols):
    total_features = set()
    game_features_list = []
    for row in games.itertuples(index=False):
        features = set()
        for col in item_features_cols:
            val = getattr(row, col)
            if pd.isnull(val):
                continue
            if col in {"genres", "tags", "specs", "bundles"}:
                features.update(literal_eval(val))
            else:
                features.add(val)
        game_features_list.append(list(features))
        total_features.update(features)
    return game_features_list, list(total_features)

In [450]:
def getGameFeaturesData(item_features_cols):
    game_features_list, total_features = build_game_features_list(item_features_cols)
    game_tuple = list(zip(games.id, game_features_list))
    return (total_features, game_tuple)

In [451]:
# Features1(['genres'])
item_features_cols = ['genres']
total_features, game_tuple = getGameFeaturesData(item_features_cols)

In [452]:
dataset = Dataset()
dataset.fit(user_item_df['user_id'].unique(), # all the steam users
            all_games["item_id"], # all steam games
            item_features=total_features)

In [453]:
item_features = dataset.build_item_features(game_tuple)

In [454]:
(interactions, weights) = dataset1.build_interactions([(x[0], x[1]) for x in user_item_df.values])

In [455]:
# best parameters for hybrid model
best_paramaters = {'no_components': 50, 'loss': 'warp', 'random_state': 1, 'learning_rate': 0.11}

In [456]:
best_model = LightFM(**best_paramaters)

In [457]:
best_model.fit(interactions,
          item_features=item_features,
          epochs=5)

<lightfm.lightfm.LightFM at 0x5b60aa550>

### Recommend using Hybrid model

In [458]:
# mappings: (user id map, user feature map, item id map, item feature map)
mapping = dataset.mapping()

In [459]:
# series used to use index(internal id) to locate the real id
user_series = pd.Series(mapping[0])
item_series = pd.Series(mapping[2])

In [460]:
def getInternalId(real_id, series):
    return series[real_id]

In [461]:
def getRawId(internal_id, series):
    return series[series == internal_id].index[0]

In [462]:
#Make recommendations
def hybridRecommendataions(user_id, topK = 3):
    print(user_id)
    n_users, n_items = interactions.shape
    internal_user_id = getInternalId(user_id, user_series)
    all_scores = pd.Series(best_model.predict(internal_user_id, np.arange(n_items)))
    all_ranks = list(pd.Series(all_scores.sort_values(ascending=False).index))
    known_items = user_item_df[user_item_df.user_id == user_id].sort_values(by="rating", ascending=False).item_id
    known_items_internal_ids = known_items.apply(getInternalId, args=(item_series,))
    unknown_ranks = [x for x in all_ranks if x not in known_items_internal_ids]
    recommended_list = unknown_ranks[0:10]
    recommended_list_ids = [getRawId(id, item_series) for id in recommended_list]
    known_items_names = all_games.loc[all_games['item_id'].isin(known_items[:10])]
    recommended_items_names = all_games.loc[all_games['item_id'].isin(recommended_list_ids)]
    return recommended_items_names

## Popularity model   (for new users) 
The popularity model is used when a new user added into the system and no user-item interaction exists for the user.
The popularity ratings are  based on the number of a game's player on steam 

In [465]:
### use poplarity ratings loaded from popularity.csv in the begining of notebook
def popRecommendataions(topK = 3):
    popular_list = []
    for game in popularity_df["item_id"].head(topK):
        popular_list.append(all_games[all_games["item_id"] == game]["item_name"].values[0])
    return popular_list

In [466]:
popRecommendataions(3)

['Counter-Strike: Global Offensive', "Garry's Mod", 'Left 4 Dead 2']

### Final model 
Use both KNN and hybrid model for recommendation

In [467]:
user_id_list = ratings_df["user_id"].unique()

def final_model_rs(user_id, topK = 3): 
    if user_id not in user_id_list:
        return popRecommendataions(topK)
    
    final_list = []
    knn_rs_list = knnRecommendation(user_id, topK)
    knn_rs_df= pd.DataFrame(knn_rs_list, columns = ["item_id", "ratings"])
    hybrid_rs_df = hybridRecommendataions(user_id, topK)
    hybrid_rs_series = hybrid_rs_df["item_id"]
    knn_rs_series = knn_rs_df["item_id"]
    common_rs_games = pd.Series(list(set(knn_rs_series).intersection(set(hybrid_rs_series))))
    for game in common_rs_games:
        final_list.append(game)
    rs_left = topK - common_rs_games.size
    for game in knn_rs_series[:int((rs_left + 1)/2)]:
        final_list.append(game)
    for game in hybrid_rs_series[:int(rs_left/2)]:
        final_list.append(game)
    
    game_list = []
    for game in final_list:
        game_list.append(all_games[all_games["item_id"] == game]["item_name"].values[0])
    return game_list
    

In [469]:
## Example for recommendation
final_model_rs('76561197970982479')

76561197970982479


  common_rs_games = pd.Series(list(set(knn_rs_series).intersection(set(hybrid_rs_series))))


['Corpse Party', 'The Black Death', 'Sanctum']