In [1]:
# Authors: Michael Treacy and Nathan Lamberson
# Primary Content: Item-based recommender and model 1 recommender

In [2]:
# Primary sources of help: http://www.salemmarafi.com/code/collaborative-filtering-with-python/#:~:text=Collaborative%20Filtering%20with%20Python%201%20Refresher%3A%20The%20Last.FM,collaborative%20Filtering.%20...%204%20Entire%20Code%205%20Referenence
#                          https://medium.com/@sam.mail2me/recommendation-systems-collaborative-filtering-just-with-numpy-and-pandas-a-z-fa9868a95da2
#                          https://en.wikipedia.org/wiki/Collaborative_filtering

In [3]:
# Get needed libraries
import numpy as np
import pandas as pd

In [4]:
# Read data in
main_df = pd.read_csv(filepath_or_buffer = 'steam-200k.csv', names = ['player', 'game', 'behavior', 'quantity', 'other'])

In [5]:
# See how it's configured
main_df.head()

Unnamed: 0,player,game,behavior,quantity,other
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0
1,151603712,The Elder Scrolls V Skyrim,play,273.0,0
2,151603712,Fallout 4,purchase,1.0,0
3,151603712,Fallout 4,play,87.0,0
4,151603712,Spore,purchase,1.0,0


In [6]:
# Check each column's type
main_df.dtypes

player        int64
game         object
behavior     object
quantity    float64
other         int64
dtype: object

In [7]:
# Check if 'other' column serves purpose
main_df['other'].sum()

0

In [8]:
# Check for NaN cells
main_df.isnull()

Unnamed: 0,player,game,behavior,quantity,other
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
...,...,...,...,...,...
199995,False,False,False,False,False
199996,False,False,False,False,False
199997,False,False,False,False,False
199998,False,False,False,False,False


In [9]:
# Clean data down to only relevant info
main_df = main_df.drop(labels = 'other', axis = 1)
main_df = main_df.dropna()
main_df = main_df[main_df.behavior != 'purchase']
main_df = main_df.drop(labels = 'behavior', axis = 1)
main_df = main_df[main_df.quantity != 0]
main_df = main_df.rename(columns = {'quantity': 'hours'})
main_df = main_df.reset_index(drop = True)

In [10]:
# Check result of cleaning
main_df.head()

Unnamed: 0,player,game,hours
0,151603712,The Elder Scrolls V Skyrim,273.0
1,151603712,Fallout 4,87.0
2,151603712,Spore,14.9
3,151603712,Fallout New Vegas,12.1
4,151603712,Left 4 Dead 2,8.9


In [11]:
# Check mean hours that games are generally played
main_df['hours'].mean()

48.878063243911484

In [12]:
# Show total number of players
len(main_df['player'].unique())

11350

In [13]:
# Show total number of games
len(main_df['game'].unique())

3600

In [14]:
# Make df of players
players_df = main_df[['player']]

In [15]:
# Get rid of players who've played less than 5 games
main_df = main_df[players_df.replace(players_df.apply(pd.Series.value_counts)).gt(4).all(1)]

In [16]:
# Check results
main_df['player'].value_counts()

62990992     498
11403772     314
138941587    299
47457723     298
49893565     297
            ... 
110238251      5
238773402      5
191988742      5
5976642        5
83168693       5
Name: player, Length: 2436, dtype: int64

In [17]:
# Save a player
player_id = 151603712

In [18]:
# Display their games
main_df.loc[main_df['player'] == player_id]

Unnamed: 0,player,game,hours
0,151603712,The Elder Scrolls V Skyrim,273.0
1,151603712,Fallout 4,87.0
2,151603712,Spore,14.9
3,151603712,Fallout New Vegas,12.1
4,151603712,Left 4 Dead 2,8.9
5,151603712,HuniePop,8.5
6,151603712,Path of Exile,8.1
7,151603712,Poly Bridge,7.5
8,151603712,Left 4 Dead,3.3
9,151603712,Team Fortress 2,2.8


In [19]:
# Get their list of games and add cos (important later for excluding in later recommendations list)
player_games = list(main_df.loc[main_df['player'] == player_id].game)
player_games.append('cos')

In [20]:
# Display them
player_games

['The Elder Scrolls V Skyrim',
 'Fallout 4',
 'Spore',
 'Fallout New Vegas',
 'Left 4 Dead 2',
 'HuniePop',
 'Path of Exile',
 'Poly Bridge',
 'Left 4 Dead',
 'Team Fortress 2',
 'Tomb Raider',
 'The Banner Saga',
 'Dead Island Epidemic',
 'BioShock Infinite',
 'Dragon Age Origins - Ultimate Edition',
 'Fallout 3 - Game of the Year Edition',
 'SEGA Genesis & Mega Drive Classics',
 'Grand Theft Auto IV',
 'Realm of the Mad God',
 'Marvel Heroes 2015',
 'Eldevin',
 'Dota 2',
 'BioShock',
 'Robocraft',
 "Garry's Mod",
 'Jazzpunk',
 'cos']

In [21]:
# Check mean hours games are generally played now
main_df['hours'].mean()

40.580387270934395

In [22]:
# Show total number of players now
len(main_df['player'].unique())

2436

In [23]:
# Show total number of games now
len(main_df['game'].unique())

3544

In [24]:
# Create total game hours df
game_hours_df = main_df.drop(labels = 'player', axis = 1)
game_hours_df = game_hours_df.groupby(by = ['game']).sum()
game_hours_df = game_hours_df.rename(columns = {'hours': 'total_hours'})
game_hours_df = game_hours_df.sort_values(by = 'total_hours', ascending = False)

In [25]:
# See most played games
game_hours_df.head()

Unnamed: 0_level_0,total_hours
game,Unnamed: 1_level_1
Dota 2,373034.6
Counter-Strike Global Offensive,259751.4
Team Fortress 2,114525.0
Counter-Strike Source,77208.5
Counter-Strike,75982.4


In [26]:
# See least played games
game_hours_df.tail()

Unnamed: 0_level_0,total_hours
game,Unnamed: 1_level_1
One Way To Die Steam Edition,0.1
Slip,0.1
Guardians of Orion,0.1
Greed Black Border,0.1
Tiamat X,0.1


In [27]:
# Calculate max hours of gaming for each player
players_max_df = main_df.groupby(by = ['player'], as_index = False, sort = False).max()
players_max_df = players_max_df.rename(columns = {'hours': 'max'})
players_max_df = players_max_df.drop(labels = 'game', axis = 1)

In [28]:
# View results
players_max_df.head()

Unnamed: 0,player,max
0,151603712,273.0
1,59945701,238.0
2,53875128,86.0
3,26122540,92.0
4,126340495,1784.0


In [29]:
# Calculate min hours of gaming for each player
players_min_df = main_df.groupby(by = ['player'], as_index = False, sort = False).min()
players_min_df = players_min_df.rename(columns = {'hours': 'min'})
players_min_df = players_min_df.drop(labels = 'game', axis = 1)

In [30]:
# See results
players_min_df.head()

Unnamed: 0,player,min
0,151603712,0.1
1,59945701,0.1
2,53875128,0.1
3,26122540,1.1
4,126340495,0.3


In [31]:
# Add rating column to main df
main_df = main_df.merge(right = players_max_df, how = 'left', on = 'player')
main_df = main_df.merge(right = players_min_df, how = 'left', on = 'player')
main_df['rating'] = ((main_df['hours'] - main_df['min']) / (main_df['max'] - main_df['min'])) * (10 - 1) + 1

In [32]:
# See results
main_df.head()

Unnamed: 0,player,game,hours,max,min,rating
0,151603712,The Elder Scrolls V Skyrim,273.0,273.0,0.1,10.0
1,151603712,Fallout 4,87.0,273.0,0.1,3.865885
2,151603712,Spore,14.9,273.0,0.1,1.488091
3,151603712,Fallout New Vegas,12.1,273.0,0.1,1.395749
4,151603712,Left 4 Dead 2,8.9,273.0,0.1,1.290216


In [33]:
# Create players-by-games df using rating data
main_df = main_df.drop(labels = ['hours', 'max', 'min'], axis = 1)
players_by_games_df = main_df.pivot_table(index = 'player', columns = 'game', values = 'rating')
players_by_games_df = players_by_games_df.fillna(0)

In [34]:
# Check resulting df
players_by_games_df.head()

game,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5250,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76767,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
86540,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
229911,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
298950,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
# Check types
players_by_games_df.dtypes

game
007 Legends                                                   float64
0RBITALIS                                                     float64
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby)    float64
10 Second Ninja                                               float64
10,000,000                                                    float64
                                                               ...   
rymdkapsel                                                    float64
sZone-Online                                                  float64
the static speaks my name                                     float64
theHunter                                                     float64
theHunter Primal                                              float64
Length: 3544, dtype: object

In [36]:
# Create games-by-games df
games_by_games_df = players_by_games_df.transpose().dot(players_by_games_df)

In [37]:
# Check results
games_by_games_df.head()

game,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007 Legends,1.026785,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0RBITALIS,0.0,3.084812,0.0,1.062825,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.041005,0.0
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),0.0,0.0,10.364868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.012898,0.0,0.0,0.0
10 Second Ninja,0.0,1.062825,0.0,2.420024,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10000000,0.0,0.0,0.0,0.0,1.191003,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
# Specify number of games to recommend
games_to_rec = 20

In [39]:
# Make recommendation-by-game matrix
rec_by_game_df = pd.DataFrame(index = games_by_games_df.columns, columns = range(1, games_to_rec + 1))

In [40]:
# Create slice object for game recommendations
games_slice = slice(0, games_to_rec)

In [41]:
# Fill in each game's most similar games
for each_game in range(len(games_by_games_df.columns)):
    games_value_col = games_by_games_df.iloc[:, each_game]
    sorted_values_to_games = games_value_col.sort_values(ascending = False)[games_slice].index
    rec_by_game_df.iloc[each_game, games_slice] = sorted_values_to_games

In [42]:
# Display recommendation matrix
rec_by_game_df.head()

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
007 Legends,Empire Total War,Stronghold Crusader 2,FIFA Manager 11,Grand Theft Auto V,Homefront,Sid Meier's Civilization V,Total War ROME II - Emperor Edition,RAGE,Call of Duty Modern Warfare 2 - Multiplayer,Ryse Son of Rome,Company of Heroes 2,Call of Duty Advanced Warfare,R.U.S.E,Stronghold 3,Call of Duty Black Ops,Grand Theft Auto IV,Call of Duty Modern Warfare 2,Crysis 2,Call of Duty Black Ops - Multiplayer,Mafia II
0RBITALIS,Terraria,Elite Dangerous,Garry's Mod,Empire Total War,Counter-Strike Global Offensive,Team Fortress 2,Fallout 4,Don't Starve,Sid Meier's Civilization V,Dying Light,Grand Theft Auto V,The Witcher 3 Wild Hunt,Borderlands The Pre-Sequel,Watch_Dogs,H1Z1,Dishonored,Far Cry 4,Batman Arkham Knight,0RBITALIS,Metro Last Light
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),Defense Grid The Awakening,Dota 2,Team Fortress 2,Spiral Knights,Blacklight Retribution,Terraria,Dungeon Defenders,Borderlands 2,Call of Duty Modern Warfare 2 - Multiplayer,Unturned,Saints Row The Third,Defiance,Left 4 Dead 2,Deus Ex Human Revolution,Call of Duty Ghosts - Multiplayer,Garry's Mod,Counter-Strike Global Offensive,Sid Meier's Civilization V,Portal 2,Super Meat Boy
10 Second Ninja,Fallout New Vegas,Elite Dangerous,Fallout 4,Don't Starve,Counter-Strike Global Offensive,Garry's Mod,Terraria,Mount & Blade Warband,Sid Meier's Civilization V,Grand Theft Auto V,The Witcher 3 Wild Hunt,Watch_Dogs,H1Z1,Far Cry 4,Batman Arkham Knight,Dying Light,Bejeweled 3,Grow Home,Half-Life 2,RollerCoaster Tycoon 3 Platinum!
10000000,The Elder Scrolls V Skyrim,The Elder Scrolls IV Oblivion,Fallout New Vegas,Fallout 3,Team Fortress 2,Kingdoms of Amalur Reckoning,Sid Meier's Civilization IV,The Elder Scrolls III Morrowind,Red Faction Guerrilla Steam Edition,Portal 2,Left 4 Dead,Deus Ex Human Revolution,Dishonored,Tomb Raider,Torchlight,BioShock,Torchlight II,Sid Meier's Civilization IV Beyond the Sword,Darksiders,RAGE


In [43]:
# Show recommendations for an FPS game
print(rec_by_game_df.loc[['Counter-Strike Global Offensive']])

                                                               1       2  \
game                                                                       
Counter-Strike Global Offensive  Counter-Strike Global Offensive  Dota 2   

                                               3            4  \
game                                                            
Counter-Strike Global Offensive  Team Fortress 2  Garry's Mod   

                                                     5         6  \
game                                                               
Counter-Strike Global Offensive  Counter-Strike Source  Unturned   

                                             7               8         9  \
game                                                                       
Counter-Strike Global Offensive  Left 4 Dead 2  Counter-Strike  Terraria   

                                                         10        11  \
game                                                                

In [44]:
# Show recommendations for a strategy game
print(rec_by_game_df.loc[['Sid Meier\'s Civilization V']])

                                                     1  \
game                                                     
Sid Meier's Civilization V  Sid Meier's Civilization V   

                                                     2                3  \
game                                                                      
Sid Meier's Civilization V  The Elder Scrolls V Skyrim  Team Fortress 2   

                                                          4       5         6  \
game                                                                            
Sid Meier's Civilization V  Counter-Strike Global Offensive  Dota 2  Terraria   

                                            7              8              9  \
game                                                                          
Sid Meier's Civilization V  Fallout New Vegas  Borderlands 2  Left 4 Dead 2   

                                     10        11                  12  \
game                                        

In [45]:
# Create array filled with each player's ratings
players_arr = players_by_games_df.values

In [46]:
# Display array
players_arr

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [47]:
# Get players-by-games df value row according to player id passed in
def get_player_row(player_id):
    x = 0
    for each_player_index in players_by_games_df.index:
        if each_player_index == player_id:
            return players_arr[x]
        x = x + 1

In [48]:
# Get players-by-games df reg index according to player id passed in
def get_player_index(player_id):
    x = 0
    for each_player_index in players_by_games_df.index:
        if each_player_index == player_id:
            return x
        x = x + 1

In [49]:
# Get players-by-games df index according to reg index passed in
def get_other_index(regular_index):
    x = 0
    for each_player_index in players_by_games_df.index:
        if x == regular_index:
            return each_player_index
        x = x + 1

In [50]:
# Specify number of most similar players to include
num_similar_players = 20

In [51]:
# Create slice
high_slice = slice(0, num_similar_players)

In [52]:
# Create similarity matrix for specified player beginning with saving (player, cos) as first in similarity list
similar_players = [(player_id, 1)]

In [53]:
# Square, sum, and root of this player
player = get_player_row(player_id)
denom_rx = [np.square(denom_rx) for denom_rx in player]
denom_rx = sum(denom_rx)
denom_rx = np.sqrt(denom_rx)

In [54]:
# Make players array without this player's row
other_players_arr = players_arr.copy()
player_index = get_player_index(player_id)
other_players_arr = np.delete(other_players_arr, player_index, axis = 0)

In [55]:
# Fill (similar player, cos) matrix
x = 1
for each_players_hours in other_players_arr:
    # Square, sum, and root of other
    denom_ry = [np.square(denom_ry) for denom_ry in each_players_hours]
    denom_ry = sum(denom_ry)
    denom_ry = np.sqrt(denom_ry)

    # Sum of products for this player and other
    prod = [numer_rx * numer_ry for numer_rx, numer_ry in zip(player, each_players_hours)]
    sum_of_prod = sum(prod)

    # Square, sum, and root of player times square, sum, and root of other
    square_sum_root_prod = denom_rx * denom_ry

    # Add (other, cos) to similarity list
    i = get_other_index(x)
    similar_players.append((i, sum_of_prod / square_sum_root_prod))
    x = x + 1

In [56]:
# Get most similar players
similar_players = sorted(similar_players, key = lambda player_cos: player_cos[1])
similar_players.reverse()
similar_players_high = similar_players[high_slice]

In [57]:
# Display players
similar_players_high

[(151603712, 1),
 (124070622, 0.8641064642216141),
 (92482012, 0.8283351504344969),
 (96836693, 0.8162650438778727),
 (205343727, 0.8156451077146665),
 (95044091, 0.8137643072409486),
 (162649407, 0.8100441032324922),
 (115959445, 0.8097016484236411),
 (158655503, 0.790178060275933),
 (98649241, 0.7887874378277481),
 (28997716, 0.7831151406600689),
 (99133844, 0.7814537673332405),
 (53258793, 0.7801456858506098),
 (114924461, 0.7788369547200822),
 (131032344, 0.7749811226676611),
 (196066421, 0.7563990551774754),
 (85210874, 0.7454941217536963),
 (58768356, 0.7437891879456942),
 (92107940, 0.7414970367872876),
 (160767652, 0.7413220449580241)]

In [58]:
# Create df for similar players
similar_players_high_df = pd.DataFrame()

In [59]:
# Fill df with players and games rating data
for each_similar_player in similar_players_high:
    similar_players_high_df = similar_players_high_df.append(players_by_games_df.loc[each_similar_player[0]])

In [60]:
# Add cos column
similar_players_high_df['cos'] = [each_player[1] for each_player in similar_players_high]

In [61]:
# Display result
similar_players_high_df

Unnamed: 0,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal,cos
151603712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
124070622,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.864106
92482012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.828335
96836693,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.816265
205343727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.815645
95044091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.813764
162649407,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.810044
115959445,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.809702
158655503,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.790178
98649241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.788787


In [62]:
# Sum cosines
cos_sum = sum([each_player_cos[1] for each_player_cos in similar_players_high])

In [63]:
# Fill rating of non-rated games for specified player with weighted average
x = 0
for each_game_rating in similar_players_high_df.loc[player_id]:
    rating_cos_total = 0
    if each_game_rating == 0.0:
        for each_player in range(1, num_similar_players):
            their_rating = similar_players_high_df.iloc[each_player][x]
            their_cos = similar_players_high_df.iloc[each_player][len(similar_players_high_df.columns) - 1]
            rating_cos_total = rating_cos_total + their_rating * their_cos
        similar_players_high_df.loc[player_id][x] = rating_cos_total / cos_sum
    x = x + 1

In [64]:
# View result
similar_players_high_df

Unnamed: 0,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal,cos
151603712,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
124070622,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.864106
92482012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.828335
96836693,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.816265
205343727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.815645
95044091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.813764
162649407,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.810044
115959445,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.809702
158655503,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.790178
98649241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.788787


In [65]:
# Create recommendation list
rec_by_player = []

In [66]:
# Look for unplayed games with highest calculated rating
for game_index, rating in enumerate(similar_players_high_df.loc[player_id].values):
    if rating > 0.0:
        game_name = similar_players_high_df.columns[game_index]
        if game_name not in player_games:
            rec_by_player.append((game_name, rating))

In [67]:
# Sort recommendations from best to worst
rec_by_player.sort(key = lambda game_rating: game_rating[1], reverse = True)

In [68]:
# Show player's played games and rating
main_df.loc[main_df['player'] == player_id]

Unnamed: 0,player,game,rating
0,151603712,The Elder Scrolls V Skyrim,10.0
1,151603712,Fallout 4,3.865885
2,151603712,Spore,1.488091
3,151603712,Fallout New Vegas,1.395749
4,151603712,Left 4 Dead 2,1.290216
5,151603712,HuniePop,1.277025
6,151603712,Path of Exile,1.263833
7,151603712,Poly Bridge,1.244045
8,151603712,Left 4 Dead,1.105533
9,151603712,Team Fortress 2,1.089044


In [69]:
# Display recommendations for them
rec_by_player

[('Counter-Strike Global Offensive', 0.6374164855927679),
 ('Day of Defeat Source', 0.5788698799346529),
 ('The Lord of the Rings War in the North', 0.5717334664315696),
 ('Terraria', 0.5610010490698443),
 ('F1 2012', 0.5412891282034933),
 ('Counter-Strike', 0.5395506765237545),
 ('Napoleon Total War', 0.5097540530799733),
 ('Unturned', 0.5072519983330679),
 ('Counter-Strike Source', 0.4905549597440934),
 ("Sid Meier's Civilization V", 0.489973440644455),
 ('Total War SHOGUN 2', 0.4659205986532864),
 ('Rome Total War', 0.42359980415828646),
 ('7 Days to Die', 0.4224902484622054),
 ('Company of Heroes 2', 0.3772725184292423),
 ('Tropico 4', 0.32329855986783346),
 ('Portal 2', 0.30721761929703667),
 ('Warframe', 0.2988080822184833),
 ('Hitman Absolution', 0.28485252835714614),
 ('Insurgency', 0.2635509152351086),
 ('PlanetSide 2', 0.2412169529852012),
 ('Rust', 0.229031772491217),
 ('Magic 2014 ', 0.22379410947943962),
 ('Killing Floor', 0.2216003283525404),
 ('Heroes & Generals', 0.2198

In [70]:
# Fin