### Import Libraries

In [29]:
# import required libraries
import os
import os.path
import numpy as np
import pandas as pd
from math import sqrt
from heapq import nlargest
from tqdm import trange
from tqdm.auto import tqdm
from scipy import stats
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics import mean_squared_error

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

import matplotlib.pyplot as plt

### Readin Data

In [30]:
champions_played_df = pd.read_csv("player_champions.csv")

In [31]:
champions_played_df = champions_played_df.iloc[:,1:]

In [32]:
champions_played_df

Unnamed: 0,Summoner,Champion_Name,Champion_ID,Matches_Played,Normalized_Matches_Played
0,Winston4,Olaf,82,195,0.280172
1,Winston4,Graves,36,79,0.113506
2,Winston4,Lee Sin,62,79,0.113506
3,Winston4,Elise,25,42,0.060345
4,Winston4,Gragas,35,40,0.057471
...,...,...,...,...,...
1691234,Röss,Lissandra,64,1,0.006993
1691235,Röss,Darius,20,1,0.006993
1691236,Röss,Rek'Sai,92,1,0.006993
1691237,Röss,Kayn,55,1,0.006993


In [33]:
champions_played_df['Summoner'] = champions_played_df['Summoner'].astype('category')
champions_played_df["Summoner_ID"] = champions_played_df["Summoner"].cat.codes

In [34]:
champions_played_df.head()

Unnamed: 0,Summoner,Champion_Name,Champion_ID,Matches_Played,Normalized_Matches_Played,Summoner_ID
0,Winston4,Olaf,82,195,0.280172,41320
1,Winston4,Graves,36,79,0.113506,41320
2,Winston4,Lee Sin,62,79,0.113506,41320
3,Winston4,Elise,25,42,0.060345,41320
4,Winston4,Gragas,35,40,0.057471,41320


In [35]:
champions_played_df['Champion_Name'] = champions_played_df['Champion_Name'].astype('category')
champions_played_df["Champion_ID"] = champions_played_df["Champion_Name"].cat.codes
df = champions_played_df[['Summoner_ID','Champion_ID','Normalized_Matches_Played']]

In [36]:
champions_played_df.head()

Unnamed: 0,Summoner,Champion_Name,Champion_ID,Matches_Played,Normalized_Matches_Played,Summoner_ID
0,Winston4,Olaf,82,195,0.280172,41320
1,Winston4,Graves,36,79,0.113506,41320
2,Winston4,Lee Sin,62,79,0.113506,41320
3,Winston4,Elise,25,42,0.060345,41320
4,Winston4,Gragas,35,40,0.057471,41320


In [37]:
df.head()

Unnamed: 0,Summoner_ID,Champion_ID,Normalized_Matches_Played
0,41320,82,0.280172
1,41320,36,0.113506
2,41320,62,0.113506
3,41320,25,0.060345
4,41320,35,0.057471


### Data Preprocessing


For the champion recommender system, it will try to predict the matches played per champion and recommend champion with the highest predicted value, but as we saw in the EDA, the total matches played per summoner are diverse. This will add bias to the summoners with higher match counts which is not desirable. Therefore, we will normalize the matches played per champion with total champions so that all summoners are in the same scale.

In [38]:
num_users = len(df.Summoner_ID.unique())
num_items = len(df.Champion_ID.unique())
print("Number of users:", num_users)
print("Number of items:", num_items)

Number of users: 54478
Number of items: 148


In [39]:
def dataPreprocessor(df, num_users, num_items):
    """
        INPUT: 
            data: pandas DataFrame. columns=['userID', 'itemID', 'rating' ...]
            num_row: int. number of users
            num_col: int. number of items
            
        OUTPUT:
            matrix: 2D numpy array. 
            
        NOTE 1: see where something very similar is done in the lab in function 'buildUserItemMatrix'    
            
        NOTE 2: data can have more columns, but your function should ignore 
              additional columns.
    """
    ########### your code goes here ###########
    matrix = np.zeros((num_users, num_items))
    
    for (index, userID, itemID, rating) in df.itertuples():
        matrix[userID,itemID] = rating

    ###########         end         ###########
    return matrix

In [40]:
df_sparse = dataPreprocessor(df, num_users, num_items)

In [41]:
df_sparse

array([[0.        , 0.        , 0.        , ..., 0.00775194, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.00464396,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.01092896, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.05494505, 0.        , 0.01098901, ..., 0.        , 0.        ,
        0.        ]])

### Evaluation

Evaluation is critical in machine learning projects, because it allows us to compare different algorithms and hyperparameter choices for these models.

One key aspect of evaluation is to ensure that the trained model generalizes for data it was not trained on, using Cross-validation techniques. The system utilizes 5 fold for evaluation.

In recommender system, the common evaluation metrics are R@K, P@K and RMSE. Even though the code evaluates for all three metrics, the highest priority will be R@K as we want recommender system to recommend most or all champions that users would prefer.

In [42]:
class CrossValidation(object):
    def __init__(self, metric, df=df):
        """
            INPUT:
                metric: string. from['RMSE','P@K','R@K']
        """
        self.folds = self._getData(df)
        self.metric_name = metric
        self.metric = self._getMetric(self.metric_name)
        
    def _getMetric(self, metric_name):
        """
            Don't change this
        """
        switcher = {
            'RMSE': self.rmse,
            'P@K': self.patk,
            'R@K': self.ratk,
        }
        
        return switcher[metric_name]
    
    @staticmethod
    def rmse(data, k, num_users, num_items, pred, true='Normalized Match'):
        """
            data: pandas DataFrame. 
            pred: string. Column name that corresponding to the prediction
            true: string. Column name that corresponding to the true rating
        """
        return sqrt(mean_squared_error(data[pred], data[true]))
        

    def patk(self, data, k, num_users, num_items, pred, true='Normalized Match'):
        """
            data: pandas DataFrame. 
            k: top-k items retrived
            pred: string. Column name that corresponding to the prediction
            true: string. Column name that corresponding to the true rating
        """
        prediction = self.getMatrix(data, num_users, num_items, pred)
        testSet =  self.getMatrix(data, num_users, num_items, true)
    
        # Initialize sum and count vars for average calculation
        sumPrecisions = 0
        countPrecisions = 0

        # Define function for converting 0-1 continuous value to discrete 0/1 (Plays / Not Plays)
        vf = np.vectorize(lambda x: 1 if x >= 0.05 else 0)

        for userID in range(num_users):
            # Pick top K based on predicted rating
            userVector = prediction[userID,:]
            topK = nlargest(k, range(len(userVector)), userVector.take)

            # Convert test set ratings to like / don't like
            userTestVector = vf(testSet[userID,:]).nonzero()[0]

            # Calculate precision
            precision = float(len([item for item in topK if item in userTestVector]))/len(topK)

            # Update sum and count
            sumPrecisions += precision
            countPrecisions += 1

        # Return average P@k
        return float(sumPrecisions)/countPrecisions

    # Recall at k
    def ratk(self, data, k, num_users, num_items, pred, true='Normalized Match'):
        """
            data: pandas DataFrame. 
            k: top-k items relevant
            pred: string. Column name that corresponding to the prediction
            true: string. Column name that corresponding to the true rating
        """
        prediction = self.getMatrix(data, num_users, num_items, pred)
        testSet =  self.getMatrix(data, num_users, num_items, true)
        # Initialize sum and count vars for average calculation
        sumRecalls = 0
        countRecalls = 0

        # Define function for converting 0-1 continuous value to discrete 0/1 (Plays / Not Plays)
        vf = np.vectorize(lambda x: 1 if x >= 0.05 else 0)

        for userID in range(num_users):
            # Pick top K based on predicted rating
            userVector = prediction[userID,:]
            topK = nlargest(k, range(len(userVector)), userVector.take)

            # Convert test set ratings to like / don't like
            userTestVector = vf(testSet[userID,:]).nonzero()[0]

            # Ignore user if has no ratings in the test set
            if (len(userTestVector) == 0):
                continue

            # Calculate recall
            recall = float(len([item for item in topK if item in userTestVector]))/len(userTestVector)

            # Update sum and count
            sumRecalls += recall
            countRecalls += 1

        # Return average R@k
        return float(sumRecalls)/countRecalls
    
    @staticmethod
    def getMatrix(df, num_users, num_items, column_name):
        matrix = np.zeros((num_users, num_items))
    
        for (index, userID, itemID, value) in df[['Summoner_ID','Champion_ID', column_name]].itertuples():
            matrix[userID, itemID] = value
            
        return matrix
    
    @staticmethod
    def _getData(df,k=5):
        """
            Don't change this function
        """
        folds = []
        kf = KFold(n_splits=k)
        for train_index, test_index in kf.split(df):
            train_set, test_set = df.loc[train_index], df.loc[test_index]
            folds.append([train_set, test_set])

        return folds
    
    def run(self, algorithms, num_users, num_items, k=1):
        """
            5-fold cross-validation
            algorithms: list. a list of algorithms. 
                        eg: [user_cosine_recsys, item_euclidean_recsys]
        """
        
        scores = {}
        for algorithm in algorithms:
            print('Processing algorithm {0}'.format(algorithm.getPredColName()))
            fold_scores = []
            for fold in self.folds:
                algorithm.reset()
                algorithm.predict_all(fold[0], num_users, num_items)
                prediction = algorithm.evaluate_test(fold[1])
                pred_col = algorithm.getPredColName()
                fold_scores.append(self.metric(prediction, k, num_users, num_items, pred_col))
                
            mean = np.mean(fold_scores)
            scores[algorithm.getPredColName()] = [fold_scores, mean]
            
        results = scores    
    
        return results

### Baseline Models - Popularity

Popularity simply recommends to users based on the popularity of the champions. Popularity is a good baseline system, but has limitations as it recommends same list of champions to all summoners. 

In [43]:
class BaseLineRecSys(object):
    def __init__(self, method, processor=dataPreprocessor):
        """
            method: string. From ['popularity','useraverage']
            processor: function name. dataPreprocessor by default
        """
        self.method_name = method
        self.method = self._getMethod(self.method_name)
        self.processor = processor
        self.pred_column_name = self.method_name
        
    def _getMethod(self, method_name):
        """
            Don't change this
        """
        switcher = {
            'popularity': self.popularity,
        }
        
        return switcher[method_name]
        
    @staticmethod
    def popularity(train_matrix, num_users, num_items):
        """
            INPUT:
                train_matrix: 2D numpy array.
                num_users: int. Number of Users.
                num_items: int. Number of Items.
            OUTPUT:
                predictionMatrix: 2D numpy array.
                
            NOTE: see where something very similar is done in the lab in function 'predictByPopularity'    
        """
        
        ########### your code goes here ###########
        # Initialize the predicted rating matrix with zeros
        predictionMatrix = np.zeros((num_users, num_items))

        # Define function for converting 0-1 continuous value to discrete 0/1 (Plays / Not Plays)

        vf = np.vectorize(lambda x: 1 if x >= 0.05 else 0)
        
        itemPopularity = np.zeros((num_items))
        for item in range(num_items):
            numOfUsersRated = len(train_matrix[:, item].nonzero()[0])
            numOfUsersLiked = len(vf(train_matrix[:, item]).nonzero()[0])
            if numOfUsersRated == 0:
                itemPopularity[item] = 0
            else:
                itemPopularity[item] = numOfUsersLiked/numOfUsersRated

        for (user,item), rating in np.ndenumerate(train_matrix):
            # Predict rating for every item that wasn't ranked by the user (rating == 0)
            predictionMatrix[user, item] = itemPopularity[item]

            # report progress every 100 users
            if (user % 100 == 0 and item == 1):
                print ("calculated %d users" % (user,))
                
        ###########         end         ###########
        return predictionMatrix    
    
    def predict_all(self, train_df, num_users, num_items):
        
        train_matrix = self.processor(train_df, num_users, num_items)
        self.__model = self.method(train_matrix, num_users, num_items)
        
    def evaluate_test(self, test_df, copy=False):
        
        if copy:
            prediction = test_df.copy()
        else:
            prediction = test_df
            
        prediction[self.pred_column_name] = np.nan
        
        for (index, 
             userID, 
             itemID) in tqdm(prediction[['Summoner_ID','Champion_ID']].itertuples()):
            prediction.loc[index, self.pred_column_name] = self.__model[userID, itemID]

        return prediction
        
    def getModel(self):
        """
            return predicted user-item matrix
        """
        return self.__model
    
    def getPredColName(self):
        """
            return prediction column name
        """
        return self.pred_column_name
    
    def reset(self):
        """
            reuse the instance of the class by removing model
        """
        try:
            self.model = None
        except:
            print("You don not have model..")
            

In [17]:
popularity_recsys = BaseLineRecSys('popularity')

In [18]:
popularity_recsys.predict_all(df, num_users, num_items)

calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users
calculated 3200 users
calculated 3300 users
calculated 3400 users
calculated 3500 users
calculated 3600 users
calculated 3700 users
calculated 3800 users
calculated 3900 users
calculated 4000 users
calculated 4100 users
calculated 4200 users
calculated 4300 users
calculated 4400 users
calculated 4500 users


calculated 36600 users
calculated 36700 users
calculated 36800 users
calculated 36900 users
calculated 37000 users
calculated 37100 users
calculated 37200 users
calculated 37300 users
calculated 37400 users
calculated 37500 users
calculated 37600 users
calculated 37700 users
calculated 37800 users
calculated 37900 users
calculated 38000 users
calculated 38100 users
calculated 38200 users
calculated 38300 users
calculated 38400 users
calculated 38500 users
calculated 38600 users
calculated 38700 users
calculated 38800 users
calculated 38900 users
calculated 39000 users
calculated 39100 users
calculated 39200 users
calculated 39300 users
calculated 39400 users
calculated 39500 users
calculated 39600 users
calculated 39700 users
calculated 39800 users
calculated 39900 users
calculated 40000 users
calculated 40100 users
calculated 40200 users
calculated 40300 users
calculated 40400 users
calculated 40500 users
calculated 40600 users
calculated 40700 users
calculated 40800 users
calculated 

### Collaborative Filtering - User - User & Item - Item

In [44]:
class SimBasedRecSys(object):

    def __init__(self, base, method, processor=dataPreprocessor):
        """
            base: string. From ['user', 'item']. User-based Similarity or Item-based
            method: string. From ['cosine', 'euclidean', 'somethingelse']
            processor: function name. dataPreprocessor by default
        """
        self.base = base
        self.method_name = method
        self.method = self._getMethod(self.method_name)
        self.processor = processor
        self.pred_column_name = self.base+'-'+self.method_name
    
    def _getMethod(self, method_name):
        """
            Don't change this
        """
        switcher = {
            'cosine': self.cosine,
            'euclidean': self.euclidean,
        }
        
        return switcher[method_name]
    
    @staticmethod
    def cosine(matrix):
        """
            cosine similarity
        """
        similarity_matrix = 1 - pairwise_distances(matrix, metric='cosine')
        return similarity_matrix
    
    @staticmethod
    def euclidean(matrix):
        """
            euclidean similarity
        """
        ########### your code goes here ###########
        
        similarity_matrix = 1/(pairwise_distances(matrix, metric='euclidean') + 1)
        
    
        ###########         end         ###########    
        
        return similarity_matrix
    
        
    def predict_all(self, train_df, num_users, num_items):
        """
            INPUT: 
                data: pandas DataFrame. columns=['userID', 'itemID', 'rating'...]
                num_row: scalar. number of users
                num_col: scalar. number of items
            OUTPUT:
                no return... this method assigns the result to self.model
            
            NOTES:
                self.__model should contain predictions for *all* user and items
                (don't worry about predicting for observed (user,item) pairs,
                 since we won't be using these predictions in the evaluation)
                (see code in for an efficient vectorized example)
        """
        train_matrix = self.processor(train_df, num_users, num_items)
        
        if self.base == 'user':
            ########### your code goes here ###########
            # Initialize the predicted rating matrix with zeros
            temp_matrix = np.zeros(train_matrix.shape)
            temp_matrix[train_matrix.nonzero()] = 1
            self.similarity = self.method(train_matrix)
            normalizer = np.matmul(self.similarity, temp_matrix)
            normalizer[normalizer == 0] = 1e-5
            predictionMatrix = np.matmul(self.similarity, train_matrix)/normalizer
            useraverage = np.sum(train_matrix, axis=1)/(np.sum(temp_matrix, axis=1)+ 1e-5)
            
            columns = np.sum(predictionMatrix, axis=0)
            
            predictionMatrix[:, columns==0] = predictionMatrix[:, columns==0] + np.expand_dims(useraverage, axis=1)
            
            self.__model = predictionMatrix
            
        elif self.base == 'item':
            ########### your code goes here ###########
            train_matrix = train_matrix.T
            temp_matrix = np.zeros(train_matrix.shape)
            temp_matrix[train_matrix.nonzero()] = 1
            self.similarity = self.method(train_matrix)
            normalizer = np.matmul(self.similarity, temp_matrix)
            normalizer[normalizer == 0] = 1e-5
            predictionMatrix = np.matmul(self.similarity, train_matrix)/normalizer
            useraverage = np.sum(train_matrix, axis=1)/(np.sum(temp_matrix, axis=1)+ 1e-5)
            
            columns = np.sum(predictionMatrix, axis=0)
            
            predictionMatrix[:, columns==0] = predictionMatrix[:, columns==0] + np.expand_dims(useraverage, axis=1)
            
            self.__model = predictionMatrix.T
            
            ###########         end         ###########
        else:
            print('No other option available')
        
    def evaluate_test(self, test_df, copy=False):
        """
            INPUT:
                data: pandas DataFrame. columns=['userID', 'itemID', 'rating'...]
            OUTPUT:
                predictions:  pandas DataFrame. 
                              columns=['userID', 'itemID', 'rating', 'base-method'...]
                              
            NOTE: 1. data can have more columns, but your function should ignore 
                  additional columns.
                  2. 'base-method' depends on your 'base' and 'method'. For example,
                  if base == 'user' and method == 'cosine', 
                  then base-method == 'user-cosine'
                  3. your predictions go to 'base-method' column
        """
        if copy:
            prediction = test_df.copy()
        else:
            prediction = test_df
        prediction[self.pred_column_name] = np.nan
        
        for (index, 
             userID, 
             itemID) in tqdm(prediction[['Summoner_ID','Champion_ID']].itertuples()):
            prediction.loc[index, self.pred_column_name] = self.__model[userID, itemID]
    
        return prediction
    
    def getModel(self):
        """
            return predicted user-item matrix
        """
        return self.__model
    
    
    def getPredColName(self):
        """
            return prediction column name
        """
        return self.pred_column_name
    
    def reset(self):
        """
            reuse the instance of the class by removing model
        """
        try:
            self.model = None
        except:
            print("You do not have model..")

In [None]:
#User-User Cosine Similaritiy
user_cosine_recsys = SimBasedRecSys('user','cosine')
user_cosine_recsys.predict_all(df, num_users, num_items)

In [23]:
#User-User Euclidean Similaritiy
user_euc_recsys = SimBasedRecSys('user','euclidean')
user_euc_recsys.predict_all(df, num_users, num_items)

In [45]:
#Item-Item Cosine Similarity
item_cosine_recsys = SimBasedRecSys('item','cosine')
item_cosine_recsys.predict_all(df, num_users, num_items)

In [25]:
#Item-Item Euclidean Similarity
item_euc_recsys = SimBasedRecSys('item','euclidean')
item_euc_recsys.predict_all(df, num_users, num_items)

### PMF

In [46]:
class PMFRecSys(object):
    def __init__(self, num_feat=10, epsilon=1, _lambda=0.1, momentum=0.8, maxepoch=20, num_batches=10, batch_size=1000):
        """
            num_feat: int, number of latent features
            epsilon: float, learning rate
            _lambda: float, L2 regularization,
            momentum: float, momentum of the gradient,
            maxepoch: float, Number of epoch before stop,
            num_batches: int, Number of batches in each epoch (for SGD optimization),
            batch_size:Number int, of training samples used in each batches (for SGD optimization)
            
        """
        self.num_feat = num_feat  # Number of latent features,
        self.epsilon = epsilon  # learning rate,
        self._lambda = _lambda  # L2 regularization,
        self.momentum = momentum  # momentum of the gradient,
        self.maxepoch = maxepoch  # Number of epoch before stop,
        self.num_batches = num_batches  # Number of batches in each epoch (for SGD optimization),
        self.batch_size = batch_size  # Number of training samples used in each batches (for SGD optimization)
        self.test = False
        self.w_Item = None  # Item feature vectors
        self.w_User = None  # User feature vectors
        
        self.rmse_train = []
        self.rmse_test = []
        self.pred_column_name='PMF'

    def predict_all(self, train_vec, num_user, num_item):
        """
            INPUT: 
                data: pandas DataFrame. columns=['userID', 'itemID', 'rating'...]
                num_user: scalar. number of users
                num_item: scalar. number of items
            OUTPUT:
                no return... this method update w_User and w_Item
            
            NOTES:
                self.W_Item and self.W_User are use to do the final predition for a user
                
        """
        # select 'userID', 'itemID', 'rating only
        train_vec = train_vec.iloc[:, :3].values
        if self.test:
          train_vec, val_vec = train_test_split(train_vec)
          pairs_val = val_vec.shape[0] #num of rating for validation
          self.mean_rating_test = np.mean(val_vec[:, 2])
        self.mean_rating_train = np.mean(train_vec[:, 2])  # avg rating
        pairs_train = train_vec.shape[0]  # num of rating for training
                
        # initialize
        self.epoch = 0
        
        ########### your code goes here ###########

        self.w_Item = 0.1 * np.random.randn(num_item, self.num_feat)
        self.w_User = 0.1 * np.random.randn(num_user, self.num_feat) 

        ###########         end         ###########  

        self.w_Item_inc = np.zeros((num_item, self.num_feat))  # accumulate the gradient
        self.w_User_inc = np.zeros((num_user, self.num_feat))  # accumulate the gradient
        while self.epoch < self.maxepoch: 
            self.epoch += 1

            # Shuffle training truples
            shuffled_order = np.arange(train_vec.shape[0])  
            np.random.shuffle(shuffled_order)  #shuffled

            # Batch update
            for batch in range(self.num_batches): 
                # print "epoch %d batch %d" % (self.epoch, batch+1)

                test = np.arange(self.batch_size * batch, self.batch_size * (batch + 1))
                batch_idx = np.mod(test, shuffled_order.shape[0])  # get the real data index


                batch_UserID = np.array(train_vec[shuffled_order[batch_idx], 0], dtype='int32')
                batch_ItemID = np.array(train_vec[shuffled_order[batch_idx], 1], dtype='int32')

                # Compute Compute mean rating subtracted rating  
                ########### your code goes here ###########
            
                pred_out = np.sum(np.multiply(self.w_User[batch_UserID, :],self.w_Item[batch_ItemID, :]),axis=1)
                
                ###########         end         ########### 

                rawErr = pred_out + self.mean_rating_train - train_vec[shuffled_order[batch_idx], 2]

                # Compute gradients
                Ix_User = 2 * np.multiply(rawErr[:, np.newaxis], self.w_Item[batch_ItemID, :]) \
                       + self._lambda * self.w_User[batch_UserID, :]
                Ix_Item = 2 * np.multiply(rawErr[:, np.newaxis], self.w_User[batch_UserID, :]) \
                       + self._lambda * (self.w_Item[batch_ItemID, :])  # np.newaxis :increase the dimension

                dw_Item = np.zeros((num_item, self.num_feat))
                dw_User = np.zeros((num_user, self.num_feat))

                # loop to aggreate the gradients of the same element
                for i in range(self.batch_size):
                    dw_Item[batch_ItemID[i], :] += Ix_Item[i, :]
                    dw_User[batch_UserID[i], :] += Ix_User[i, :]

                # Update with momentum
                self.w_Item_inc = self.momentum * self.w_Item_inc + self.epsilon * dw_Item / self.batch_size
                self.w_User_inc = self.momentum * self.w_User_inc + self.epsilon * dw_User / self.batch_size

                self.w_Item = self.w_Item - self.w_Item_inc
                self.w_User = self.w_User - self.w_User_inc

                # Compute Compute mean rating subtracted rating 
                if batch == self.num_batches - 1:
                    train_user_idx = np.array(train_vec[:, 0], dtype='int32')
                    train_item_idx = np.array(train_vec[:, 1], dtype='int32')
                    ########### your code goes here ###########
                    
                    pred_out = np.sum(np.multiply(self.w_User[train_user_idx, :],self.w_Item[train_item_idx, :]),axis=1)
            
                    ###########         end         ########### 
                    rawErr = pred_out + self.mean_rating_train - train_vec[:, 2] 
                    obj = np.linalg.norm(rawErr) ** 2 \
                          + 0.5 * self._lambda * (np.linalg.norm(self.w_User) ** 2 + np.linalg.norm(self.w_Item) ** 2)

                    self.rmse_train.append(np.sqrt(obj / pairs_train))

                # Compute validation error
                if batch == self.num_batches - 1 and self.test:
                    val_user_idx = np.array(val_vec[:, 0], dtype='int32')
                    val_item_idx = np.array(val_vec[:, 1], dtype='int32')
                    ########### your code goes here ###########
            
                    pred_out = np.sum(np.multiply(self.w_User[val_user_idx, :],self.w_Item[val_item_idx, :]),axis=1)
                
                    ###########         end         ########### 
                    rawErr = pred_out + self.mean_rating_test - val_vec[:, 2]
                    self.rmse_test.append(np.linalg.norm(rawErr) / np.sqrt(pairs_val))


        
    def evaluate_test(self, test_df, copy=False):
        """
            INPUT:
                data: pandas DataFrame. columns=['userID', 'itemID', 'rating'...]
            OUTPUT:
                predictions:  pandas DataFrame. 
                              columns=['userID', 'itemID', 'rating', 'base-method'...]
                              
        """
        if copy:
            prediction = pd.DataFrame(test_df.copy(), columns=['Summoner_ID','Champion_ID', 'Normalized Match'])
        else:
            prediction = pd.DataFrame(test_df, columns=['Summoner_ID','Champion_ID', 'Normalized Match'])
        prediction[self.pred_column_name] = np.nan
        
        for (index, 
             userID, 
             itemID) in tqdm(prediction[['Summoner_ID','Champion_ID']].itertuples()):
            prediction.loc[index, self.pred_column_name] = (np.dot(self.w_Item, self.w_User[int(userID), :]) + self.mean_rating_train)[int(itemID)]
    
        return prediction
    
    def plot_error(self):
        if self.test:
            plt.plot(range(pmf.maxepoch), pmf.rmse_test, marker='v', label='Test Data')
            plt.plot(range(pmf.maxepoch), pmf.rmse_train, marker='o', label='Training Data')
            plt.title('The MovieLens Dataset Learning Curve')
            plt.xlabel('Number of Epochs')
            plt.ylabel('RMSE')
            plt.legend()
            plt.grid()
            plt.show()
          
    def getPredColName(self):
        """
            return prediction column name
        """
        return self.pred_column_name
    
    def reset(self):
        """
            reuse the instance of the class by removing model
        """
        try:
            self.w_Item = None 
            self.w_User = None 
        except:
            print("You do not have w_Item, w_User")

    def set_params(self, parameters):
        if isinstance(parameters, dict):
            self.num_feat = parameters.get("num_feat", 10)
            self.epsilon = parameters.get("epsilon", 1)
            self._lambda = parameters.get("_lambda", 0.1)
            self.momentum = parameters.get("momentum", 0.8)
            self.maxepoch = parameters.get("maxepoch", 20)
            self.num_batches = parameters.get("num_batches", 10)
            self.batch_size = parameters.get("batch_size", 1000)
            self.test = parameters.get("test_mode", False)

In [201]:
pmf = PMFRecSys()
pmf.set_params({"num_feat": 10, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 100, "num_batches": 100,
                "batch_size": 1000, 'test_mode':True})

In [195]:
pmf.set_params({"num_feat": 10, "epsilon": 1, "_lambda": 0.1, "momentum": 0.8, "maxepoch": 10, "num_batches": 100,
                "batch_size": 1000, 'test_mode':False})

In [196]:
pmf.predict_all(df, num_users, num_items)

## Model Implementation

In [378]:
algorithm_instances = [popularity_recsys, 
                       user_cosine_recsys,
                       item_cosine_recsys,
                       pmf]

cv_patk = CrossValidation('P@K')
cv_ratk = CrossValidation('R@K')
cv_rmse = CrossValidation('RMSE')

patk_score = cv_patk.run(algorithm_instances, num_users, num_items,k=5)
ratk_score = cv_ratk.run(algorithm_instances, num_users, num_items,k=5)
rmse_score = cv_rmse.run(algorithm_instances, num_users, num_items,k=5)

Processing algorithm popularity
calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm user-cosine


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm item-cosine


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm PMF


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm popularity
calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm user-cosine


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm item-cosine


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm PMF


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm popularity
calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


calculated 0 users
calculated 100 users
calculated 200 users
calculated 300 users
calculated 400 users
calculated 500 users
calculated 600 users
calculated 700 users
calculated 800 users
calculated 900 users
calculated 1000 users
calculated 1100 users
calculated 1200 users
calculated 1300 users
calculated 1400 users
calculated 1500 users
calculated 1600 users
calculated 1700 users
calculated 1800 users
calculated 1900 users
calculated 2000 users
calculated 2100 users
calculated 2200 users
calculated 2300 users
calculated 2400 users
calculated 2500 users
calculated 2600 users
calculated 2700 users
calculated 2800 users
calculated 2900 users
calculated 3000 users
calculated 3100 users


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm user-cosine


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm item-cosine


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Processing algorithm PMF


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [379]:
col_name = ['RMSE Mean','P@K Mean','R@K Mean']
rmse_score_df = pd.DataFrame(rmse_score)[1:].T
patk_score_df = pd.DataFrame(patk_score)[1:].T
ratk_score_df = pd.DataFrame(ratk_score)[1:].T

data_df = pd.concat([rmse_score_df,patk_score_df,ratk_score_df], axis =1)

data_df.columns = col_name


In [380]:
data_df

Unnamed: 0,RMSE Mean,P@K Mean,R@K Mean
popularity,0.48374,0.0572484,0.304603
user-cosine,0.0960986,0.00556688,0.0265991
item-cosine,0.0888415,0.0561146,0.324303
PMF,0.0896907,0.0390828,0.239742


Based on the result above, item-cosine tends to perform the best overall. Also, having a user-user similarity may not be so scalable due to large pool of users. It will take a very long time predicting for the champions.

## Make Recommendations Per Item

In [28]:
from numpy import savez_compressed
iisimilarity = item_cosine_recsys.similarity
savez_compressed('iisimilarity.npz', iisimilarity)

In [47]:
iisimilarity = item_cosine_recsys.similarity

In [48]:
def top5(iisimilarity, champions_played_df, champion_name,k=10):
    # Pick top K based on predicted rating
    Champion_ID = champions_played_df[champions_played_df['Champion_Name']==champion_name]['Champion_ID'].values[0]
    itemVector = iisimilarity[Champion_ID,:]
    topK = nlargest(k+1, range(len(itemVector)), itemVector.take)
    topK = topK[1:]
    topChamp = list(map(lambda x: champions_played_df[champions_played_df.Champion_ID == x]["Champion_Name"].values[0], topK))
    return topChamp

In [67]:
top5(iisimilarity, champions_played_df,'Syndra')

['LeBlanc',
 'Orianna',
 'Cassiopeia',
 'Zoe',
 'Sylas',
 'Ryze',
 'Viktor',
 'Qiyana',
 'Akali',
 'Ezreal']

In [68]:
top5(iisimilarity, champions_played_df,'Ziggs')

['Orianna',
 'Veigar',
 'Syndra',
 'Corki',
 'Varus',
 'Xerath',
 'Ezreal',
 'Viktor',
 'Morgana',
 'Senna']

In [69]:
top5(iisimilarity, champions_played_df,'Lee Sin')

['Elise',
 'Jarvan IV',
 'Graves',
 'Olaf',
 'Ekko',
 'Gragas',
 "Rek'Sai",
 'Nidalee',
 "Kha'Zix",
 'Sylas']

Based on the result, the model actually finds similar champions very well. For exmaple, Syndra is a champion that is a mage, played at middle lane. The recommended system shows most champions that are played at mid that also have similar level of difficulty. 

## Make Recommendations Per User

### Get User Data from Riot API

In [96]:
from riotwatcher import LolWatcher, ApiError

In [97]:
import json

In [98]:
champion = pd.read_json("champion.json")

In [116]:
champion

Unnamed: 0,type,format,version,data
Aatrox,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Aatrox', '..."
Ahri,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Ahri', 'ke..."
Akali,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Akali', 'k..."
Alistar,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Alistar', ..."
Amumu,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Amumu', 'k..."
...,...,...,...,...
Zed,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Zed', 'key..."
Ziggs,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Ziggs', 'k..."
Zilean,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Zilean', '..."
Zoe,champion,standAloneComplex,10.10.3216176,"{'version': '10.10.3216176', 'id': 'Zoe', 'key..."


In [120]:
import json
with open('api_champion_list_dict.json', 'r') as f:
    data = json.load(f)


In [131]:
import json
with open('champion_list_dict.json', 'r') as f:
    data = json.load(f)


In [133]:
data['Lucian']

65

In [None]:
champion_list_dict

In [99]:
champion_IDs = []
champion_names = []
champion_tags = []

for data in champion['data']:
    champion_IDs.append(data['key'])
    champion_names.append(data['name'])
    champion_tags.append(data['tags'])
    
champion_list = pd.DataFrame({'Champion_ID': champion_IDs,
                             'Tags': champion_tags}, index = champion_names)
champion_list['Champion_ID'] = champion_list['Champion_ID'].astype(int)

In [100]:
champion_list.index

Index(['Aatrox', 'Ahri', 'Akali', 'Alistar', 'Amumu', 'Anivia', 'Annie',
       'Aphelios', 'Ashe', 'Aurelion Sol',
       ...
       'Xin Zhao', 'Yasuo', 'Yorick', 'Yuumi', 'Zac', 'Zed', 'Ziggs', 'Zilean',
       'Zoe', 'Zyra'],
      dtype='object', length=148)

In [101]:
# golbal variables
# Include your own API Key
api_key = '---'
watcher = LolWatcher(api_key)
region = 'na1'

In [102]:
def get_total_games(region, summoner_id):
    ranked_stats = watcher.league.by_summoner(region,summoner_id)
    total_games = 0

    for ranked_mode in ranked_stats:
        total_games += ranked_mode['wins'] + ranked_mode['losses']
    return total_games

In [103]:
    summoner_id = summoner_info['id']
    summoner_account_id = summoner_info['accountId']


In [104]:
ranked_stats = watcher.league.by_summoner(region,summoner_id)

In [105]:
match_lists = watcher.match.matchlist_by_account(region, summoner_account_id,queue=[420,440],begin_index=0, end_index=100)

In [107]:
match_lists['matches']

[{'platformId': 'NA1',
  'gameId': 3885477840,
  'champion': 18,
  'queue': 420,
  'season': 13,
  'timestamp': 1619666467638,
  'role': 'DUO_CARRY',
  'lane': 'BOTTOM'},
 {'platformId': 'NA1',
  'gameId': 3883795959,
  'champion': 360,
  'queue': 420,
  'season': 13,
  'timestamp': 1619572450645,
  'role': 'DUO_CARRY',
  'lane': 'BOTTOM'},
 {'platformId': 'NA1',
  'gameId': 3883708804,
  'champion': 498,
  'queue': 420,
  'season': 13,
  'timestamp': 1619569624707,
  'role': 'DUO_CARRY',
  'lane': 'BOTTOM'},
 {'platformId': 'NA1',
  'gameId': 3883637431,
  'champion': 51,
  'queue': 420,
  'season': 13,
  'timestamp': 1619564615860,
  'role': 'DUO_CARRY',
  'lane': 'BOTTOM'},
 {'platformId': 'NA1',
  'gameId': 3883577452,
  'champion': 202,
  'queue': 420,
  'season': 13,
  'timestamp': 1619559737647,
  'role': 'DUO_CARRY',
  'lane': 'BOTTOM'},
 {'platformId': 'NA1',
  'gameId': 3877554613,
  'champion': 12,
  'queue': 420,
  'season': 13,
  'timestamp': 1619142084077,
  'role': 'DUO_

In [57]:
def champion_count_list(region, summoner_account_id, total_games):

    champion_count={}
    for idx in range(0,total_games,100):
        if idx+100 > total_games:
            match_lists = watcher.match.matchlist_by_account(region, summoner_account_id,queue=[420,440],begin_index=idx, end_index=total_games)
        else:
            match_lists = watcher.match.matchlist_by_account(region, summoner_account_id,queue=[420,440],begin_index=idx, end_index=idx+100)

        for match in match_lists['matches']:
            champion_id = match['champion']
            champion_name = champion_id
            #champion_list[champion_list['Champion_ID']==champion_id].index[0]

            if champion_name in champion_count.keys():
                champion_count[champion_name] +=1/total_games
            else:
                champion_count[champion_name] =1/total_games
    
    return champion_count

In [58]:
def create_userVector(champion_count):
    matrix = np.zeros((num_items))
    
    for champion,count in champion_count.items():
        champion_id = champions_played_df[champions_played_df.Champion_Name == champion]["Champion_ID"].values[0]
        matrix[champion_id]=count
    return matrix

In [59]:
def get_predictions(userVector, iisimilarity):
    temp_matrix = np.zeros(userVector.shape)
    temp_matrix[userVector.nonzero()] = 1
    normalizer = np.matmul(iisimilarity, temp_matrix)
    normalizer[normalizer == 0] = 1e-5
    
    predictionUser = np.matmul(iisimilarity, userVector)/normalizer
    
    return predictionUser

In [60]:
def get_top_n_recs(result, q):
    recs = []
    for i in range(len(result)):
        if q[i] < 0.05: # Recommend if user played < 5% of the time
            champ_name = champions_played_df[champions_played_df.Champion_ID == i]["Champion_Name"].values[0]
            recs.append((champ_name, result[i]))
            # recs.append((i, result[i])) #leave this to verify things actually working
    recs = sorted(recs, key=lambda tup: tup[1], reverse=True)

    return recs

In [61]:
def get_recommendations(region, summoner,iisimilarity):
    summoner_info = watcher.summoner.by_name(region, summoner)
    summoner_id = summoner_info['id']
    summoner_account_id = summoner_info['accountId']
    
    ranked_stats = watcher.league.by_summoner(region,summoner_id)

    total_games = get_total_games(region, summoner_id)
    
    champion_count = champion_count_list(region, summoner_account_id, total_games)
    
    user_vector = create_userVector((champion_count))
    
    predictions = get_predictions(user_vector, iisimilarity)
    
    return get_top_n_recs(predictions, user_vector)

In [62]:
iisimilarity = item_cosine_recsys.similarity