This notebook is used to make recommendations by using lightfm module

import packages 

In [63]:
import numpy as np 
import pandas as pd 
from scipy import sparse
import random
import lightfm
from lightfm import LightFM, cross_validation
from lightfm.evaluation import precision_at_k, auc_score
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import json 
import os
import pickle
import redis
from utils import *
import pyarrow as pa 

i loaded the data from the api and saved to seperate folder 

In [3]:
input_path = "download/heroes"
df = pd.DataFrame()
for file in os.listdir(input_path):
    if file.endswith('.json'):
        p = os.path.join(input_path, file)
        f = open(p)
        tmp = json.load(f)
        tmp_df = pd.DataFrame(tmp)
        if not tmp_df.empty:
            tmp_df.loc[:, 'hero_id'] = int(file.split("_")[0])
            df = df.append(tmp_df.copy())


In [4]:
print(f"data shape: {df.shape}")

data shape: (12300, 13)


#### Data preproccessing

In [5]:
api = OpenDotaAPI()
heroes  = api._call(" https://api.opendota.com/api/heroes", parameters=None)
heroes_df = pd.DataFrame(heroes)
heroes_df

Unnamed: 0,id,name,localized_name,primary_attr,attack_type,roles,legs
0,1,npc_dota_hero_antimage,Anti-Mage,agi,Melee,"[Carry, Escape, Nuker]",2
1,2,npc_dota_hero_axe,Axe,str,Melee,"[Initiator, Durable, Disabler, Jungler, Carry]",2
2,3,npc_dota_hero_bane,Bane,int,Ranged,"[Support, Disabler, Nuker, Durable]",4
3,4,npc_dota_hero_bloodseeker,Bloodseeker,agi,Melee,"[Carry, Disabler, Jungler, Nuker, Initiator]",2
4,5,npc_dota_hero_crystal_maiden,Crystal Maiden,int,Ranged,"[Support, Disabler, Nuker, Jungler]",2
...,...,...,...,...,...,...,...
118,128,npc_dota_hero_snapfire,Snapfire,str,Ranged,"[Support, Nuker, Disabler, Escape]",2
119,129,npc_dota_hero_mars,Mars,str,Melee,"[Carry, Initiator, Disabler, Durable]",2
120,135,npc_dota_hero_dawnbreaker,Dawnbreaker,str,Melee,"[Carry, Durable]",2
121,136,npc_dota_hero_marci,Marci,str,Melee,"[Support, Carry, Initiator, Disabler, Escape]",2


In [6]:
df =df.groupby('hero_id').filter(lambda x: len(x)>=50)
df = df[df.groupby('account_id').hero_id.transform('nunique')>=10]

df_freq = df.groupby(['account_id', 'hero_id']).agg('size').reset_index().rename(columns={0:'freq'})[['account_id', 'hero_id', 'freq']].sort_values(['freq'], ascending=False)
df_freq.head()


Unnamed: 0,account_id,hero_id,freq
2801,1262266956,91,30
2570,1173255449,8,22
2695,1228355759,2,20
2706,1228355759,96,18
580,131774502,36,18


In [35]:
df_hero = pd.DataFrame(df_freq["hero_id"].unique())
df_hero = df_hero.reset_index()
df_hero = df_hero.rename(columns={'index':'hero_index', 0:'hero_id'})
df_hero = df_hero.merge(heroes_df[['id', 'localized_name']], left_on='hero_id', right_on='id', how='left').drop("id",axis=1)
df_hero = df_hero.rename(columns={'localized_name':'hero_name'})

df_hero.head()

Unnamed: 0,hero_index,hero_id,hero_name
0,0,91,Io
1,1,8,Juggernaut
2,2,2,Axe
3,3,96,Centaur Warrunner
4,4,36,Necrophos


In [8]:
print(f"heroes dataframe shape: {df_hero.shape}")

heroes dataframe shape: (123, 3)


#### Define functions

In [9]:
def create_interactions(df,user_id, item_id, rating_col):
    '''
    This function is used to define user-item interactions as matrix 
    Parameters 
        - df = Pandas DataFrame containing user-item interactions
        - user_id = column contains player's identifier
        - item_id = column  contains hero's identifier
        - rating_col = column contains number of interactions happened between player and hero
    Returns 
        - Pandas dataframe with user-item interactions for recommendation algorithm
    '''
    interactions = df.groupby([user_id, item_id])[rating_col].sum().unstack().reset_index().fillna(0).set_index(user_id)
    return interactions

In [10]:

def get_user_dict(interactions):
    '''
    This function is used to seperate the user's interactions 
    Parameters
        interactions : user-hero interactions
    Returns 
        user_dict : dictionary of key:player_id and value:player's interaction count 
    '''
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict


In [11]:
def get_item_dict(df,id_col,name_col):
    '''
    This function is used to seperate the hero's interactions
    Parameters
        - df : a dataframe contains hero's information 
        - id_col: hero's identifier
        - name_col : hero's name
    Returns
        item_dict : dictionary of key:hero_id and value:hero's interaction count
    '''
    item_dict ={}
    for i in range(df.shape[0]):
        item_dict[(df.loc[i,id_col])] = df.loc[i,name_col]
    return item_dict

#### Train Model 

In [12]:
def train_LightFM(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4):
    '''
    This function trains the lightfm model for given interactions 
    Parameters 
        - interactions : player-hero interactions 
        - n_components : number of embeddings to define player and hero
        - loss : loss function other options are logistic, brp
        - epoch : number of epochs 
        - n_jobs : number of cores 
    Returns 
        model: lightfm model
    '''
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(X,epochs=epoch,num_threads = n_jobs)
    return model

#### Make recommendations

In [60]:
def recommend_by_user(model,interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 10, show = True):
    '''
    This function makes hero recommendations for given player 
    Parameters  
        - model :lightfm model
        - interactions : player hero interactions 
        - user_id : player id whom we want to recommend heros 
        - user_dict : dictionary of key:player_id and value:player's interaction count  
        - item_dict : dictionary of key:hero_id and value:hero's interaction count
        - threshold : value above which the rating is favorable in new interaction matrix
        - nrec_items : number of recommendations 
    Returns 
        - returns list of heros that player already played
        - returns recommendations 
    '''
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    previously_played = list(pd.Series(interactions.loc[user_id,:][interactions.loc[user_id,:] > threshold].index).sort_values(ascending=False))
    
    scores = [x for x in scores if x not in previously_played]
    return_score_list = scores[0:nrec_items]
    previously_played = list(pd.Series(previously_played).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    recommendations = [{"hero_id":str(id),"name":item_dict[id]} for id in return_score_list]
    if show == True:
        print("Previoulsy played:")
        counter = 1
        for i in previously_played:
            print(str(counter) + ':' + i)
            counter+=1

        print("\n Recommended heroes:")
        counter = 1
        for i in scores:
            print(str(counter) + ':' + i)
            counter+=1
            
    return recommendations



#### Generate data

In [46]:
interactions = create_interactions(df = df_freq, user_id = "account_id", item_id = 'hero_id', rating_col = 'freq')
interactions.head()

hero_id,1,2,3,4,5,6,7,8,9,10,...,119,120,121,123,126,128,129,135,136,137
account_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
27178898,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32128719,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41637292,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
56981677,14.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
58429537,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0


In [106]:
r = redis.Redis(host='localhost', port=6379, db=0)
context = pa.default_serialization_context()
r.set("interactions", context.serialize(interactions).to_buffer().to_pybytes())
# context.deserialize(r.get("interactions"))

True

In [107]:
user_dict = get_user_dict(interactions=interactions)
user_dict_permuted = {str(k):str(v) for k,v in user_dict.items()}
r.hmset('user_dict_permuted', user_dict_permuted)

  r.hmset('user_dict_permuted', user_dict_permuted)


True

In [108]:
hero_dict = get_item_dict(df = df_hero, id_col = 'hero_id', name_col = 'hero_name')
hero_dict_permuted = {str(k):v for k,v in hero_dict.items()}
r.hmset("hero_dict_permuted", hero_dict_permuted)

  r.hmset("hero_dict_permuted", hero_dict_permuted)


True

In [37]:
X = sparse.csr_matrix(interactions.values)
train, test = lightfm.cross_validation.random_train_test_split(X, test_percentage=0.2, random_state=None)


#### Model build

In [18]:
model = train_LightFM(interactions = train,
                 n_components = 30,
                 loss = 'warp',
                 k = 15,
                 epoch = 30,
                 n_jobs = 4)

In [64]:
# save model 
with open('models/lighfm.pickle', 'wb') as file:
    pickle.dump(model,file)

#### Model results

In [19]:
train_auc = auc_score(model, train, num_threads=4).mean()
print('Train AUC: %s' % train_auc)

Train AUC: 0.9621279


In [20]:
test_auc = auc_score(model, test, train_interactions=train, num_threads=4).mean()
print('Test AUC: %s' % test_auc)

Test AUC: 0.977266


In [21]:
train_precision = precision_at_k(model, train, k=1).mean()
test_precision = precision_at_k(model, test, k=1, train_interactions=train).mean()
print(f"train precision@1 : {train_precision}, test precision@1: {test_precision}")

train precision@1 : 0.7530864477157593, test precision@1: 0.7914893627166748


#### Let's see some examples 

In [22]:
hero_dict

{0: 'Io',
 1: 'Juggernaut',
 2: 'Axe',
 3: 'Centaur Warrunner',
 4: 'Necrophos',
 5: 'Zeus',
 6: 'Shadow Demon',
 7: 'Phantom Assassin',
 8: 'Viper',
 9: 'Ancient Apparition',
 10: 'Ogre Magi',
 11: 'Shadow Shaman',
 12: 'Lone Druid',
 13: 'Terrorblade',
 14: 'Anti-Mage',
 15: 'Earthshaker',
 16: 'Outworld Destroyer',
 17: 'Magnus',
 18: 'Alchemist',
 19: 'Grimstroke',
 20: 'Medusa',
 21: 'Wraith King',
 22: 'Invoker',
 23: 'Queen of Pain',
 24: 'Dark Willow',
 25: 'Sand King',
 26: 'Bane',
 27: 'Dragon Knight',
 28: 'Lycan',
 29: 'Venomancer',
 30: 'Sven',
 31: 'Pugna',
 32: 'Phantom Lancer',
 33: 'Jakiro',
 34: 'Abaddon',
 35: 'Tinker',
 36: 'Slardar',
 37: 'Broodmother',
 38: 'Shadow Fiend',
 39: 'Techies',
 40: 'Spirit Breaker',
 41: 'Gyrocopter',
 42: 'Monkey King',
 43: 'Legion Commander',
 44: 'Pangolier',
 45: 'Huskar',
 46: 'Treant Protector',
 47: 'Elder Titan',
 48: 'Chaos Knight',
 49: "Nature's Prophet",
 50: 'Clinkz',
 51: 'Disruptor',
 52: 'Drow Ranger',
 53: 'Riki',
 54

In [62]:
rec_list = recommend_by_user(model = model, 
                                      interactions = interactions, 
                                      user_id = 92949094, 
                                      user_dict = user_dict,
                                      item_dict = hero_dict, 
                                      threshold = 0,
                                      nrec_items = 10,
                                      show = True)
rec_list

Previoulsy played:
1:Oracle
2:Phoenix
3:Underlord
4:Abaddon
5:Io
6:Ancient Apparition
7:Clockwerk
8:Warlock
9:Witch Doctor
10:Shadow Shaman

 Recommended heroes:
1:Treant Protector
2:Enchantress
3:Vengeful Spirit
4:Elder Titan
5:Bane
6:Chen
7:Mirana
8:Dazzle
9:Disruptor
10:Grimstroke


[{'hero_id': 83, 'name': 'Treant Protector'},
 {'hero_id': 58, 'name': 'Enchantress'},
 {'hero_id': 20, 'name': 'Vengeful Spirit'},
 {'hero_id': 103, 'name': 'Elder Titan'},
 {'hero_id': 3, 'name': 'Bane'},
 {'hero_id': 66, 'name': 'Chen'},
 {'hero_id': 9, 'name': 'Mirana'},
 {'hero_id': 50, 'name': 'Dazzle'},
 {'hero_id': 87, 'name': 'Disruptor'},
 {'hero_id': 121, 'name': 'Grimstroke'}]

In [61]:
rec_list = recommend_by_user(model = model, 
                                      interactions = interactions, 
                                      user_id = 87012746, 
                                      user_dict = user_dict,
                                      item_dict = hero_dict, 
                                      threshold = 0,
                                      nrec_items = 5,
                                      show = True)


Previoulsy played:
1:Primal Beast
2:Dawnbreaker
3:Mars
4:Legion Commander
5:Elder Titan
6:Visage
7:Brewmaster
8:Doom
9:Dark Seer
10:Death Prophet
11:Razor

 Recommended heroes:
1:Beastmaster
2:Broodmother
3:Tidehunter
4:Lycan
5:Timbersaw
