In [3]:
%matplotlib inline

import pandas as pd
import numpy as np
from scipy.spatial.distance import cosine
from sklearn.preprocessing import Imputer

def getSummaries(ratings, matrix, index):
    data = pd.DataFrame(np.array(ratings) * np.array(matrix.iloc[:,2:])
        , index=matrix[index], columns=matrix.iloc[:,2:].columns)

    #display(data.head(15))
    ids = pd.unique(matrix[index])
    packed = pd.DataFrame(index=ids,columns=data.columns)
    #display(packed.head(5))
    #display(packed.loc[272])
    summed = data.groupby([data.index]).max()
    
    # This Imputer is pretty naive. A KNN imputer would be much better.
    impute = Imputer(missing_values='NaN', strategy='mean', axis=0)
    X_imputed = pd.DataFrame(data=impute.fit_transform(summed.as_matrix()))
    X_imputed.index = summed.index
    X_imputed.columns = summed.columns
    display(X_imputed.head(5))
    return X_imputed

def findNeighbors(data, data_ibs):
    # Lets fill in those empty spaces with cosine similarities
    # Loop through the columns
    for i in range(0,len(data_ibs.columns)) :
        # Loop through the columns for each column
        if i % 100 == 0:
            display(i)
        for j in range(0,len(data_ibs.columns)) :
            # Fill in placeholder with cosine similarities
            data_ibs.iloc[i,j] = 1-cosine(data.iloc[:,i],data.iloc[:,j])
        
    #display(data_ibs.head(5))

    # Create a placeholder items for closes neighbours to an item
    data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,21))
 
    # Loop through our similarity dataframe and fill in neighbouring item names
    for i in range(0,len(data_ibs.columns)):
        data_neighbours.iloc[i,:20] = data_ibs.iloc[0:,i].sort_values(ascending=False)[:20].index
 
    return data_neighbours


rawdata = pd.read_csv('boardgame-frequent-users.csv')
rawdata=rawdata.rename(columns = {"Compiled from boardgamegeek.com by Matt Borthwick":'userID'})

details = pd.read_csv('boardgame-details.csv')
details = details.rename(columns = {"boardgamegeek.com game ID": 'gameID'})
titles = details.iloc[:,0:2]

gamedata = pd.merge(rawdata.copy(), titles, on='gameID')
X = pd.get_dummies(gamedata, columns=['title'])
X = X.replace(0, np.nan)
del X['gameID']
ratings = X.iloc[:,1:2].copy()

gsum = getSummaries(ratings, X,'userID')
display(gsum.head(6))
gsum.to_csv('gsum.csv', sep='\t')

data_ibs = pd.DataFrame(index=gsum.columns,columns=gsum.columns)
gneighbors = findNeighbors(gsum, data_ibs)
display(gneighbors.head(55))

gneighbors.to_csv('gneighbors.csv', sep='\t')


Unnamed: 0_level_0,title_1960: The Making of the President,title_6 nimmt!,title_7 Wonders,title_7 Wonders Duel,title_7 Wonders: Cities,title_7 Wonders: Leaders,title_A Feast for Odin,title_A Few Acres of Snow,title_A Game of Thrones (first edition),title_A Game of Thrones: The Board Game (Second Edition),...,title_Wizard,title_XCOM: The Board Game,title_YINSH,title_Yahtzee,title_Yspahan,title_Zombicide,title_Zombicide: Black Plague,title_Zombie Dice,title_Zombies!!!,title_Zooloretto
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
83,7.0,6.660236,9.0,9.0,7.471476,7.0,7.80902,7.065177,8.0,7.020574,...,6.489303,7.0,7.317442,4.737351,9.0,6.500865,7.003776,5.37743,4.685604,7.0
119,7.0,6.660236,7.0,7.720892,7.471476,6.0,7.80902,7.0,6.675523,7.5,...,10.0,6.640796,7.317442,4.737351,6.908843,6.500865,7.003776,5.37743,4.0,5.0
144,7.0,6.660236,7.0,8.0,7.471476,7.377796,8.0,7.5,6.675523,7.0,...,6.489303,7.5,7.317442,4.737351,6.908843,6.5,7.0,5.37743,4.685604,6.5
156,7.209443,6.660236,8.0,7.720892,7.471476,7.377796,7.5,7.065177,6.675523,7.020574,...,6.489303,7.0,7.317442,4.737351,8.0,7.75,7.003776,4.0,3.0,6.580351
186,7.0,6.660236,8.0,7.720892,7.471476,7.377796,7.80902,7.065177,7.0,7.020574,...,6.489303,6.640796,8.0,6.0,8.0,6.0,7.003776,6.0,5.0,7.0


Unnamed: 0_level_0,title_1960: The Making of the President,title_6 nimmt!,title_7 Wonders,title_7 Wonders Duel,title_7 Wonders: Cities,title_7 Wonders: Leaders,title_A Feast for Odin,title_A Few Acres of Snow,title_A Game of Thrones (first edition),title_A Game of Thrones: The Board Game (Second Edition),...,title_Wizard,title_XCOM: The Board Game,title_YINSH,title_Yahtzee,title_Yspahan,title_Zombicide,title_Zombicide: Black Plague,title_Zombie Dice,title_Zombies!!!,title_Zooloretto
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
83,7.0,6.660236,9.0,9.0,7.471476,7.0,7.80902,7.065177,8.0,7.020574,...,6.489303,7.0,7.317442,4.737351,9.0,6.500865,7.003776,5.37743,4.685604,7.0
119,7.0,6.660236,7.0,7.720892,7.471476,6.0,7.80902,7.0,6.675523,7.5,...,10.0,6.640796,7.317442,4.737351,6.908843,6.500865,7.003776,5.37743,4.0,5.0
144,7.0,6.660236,7.0,8.0,7.471476,7.377796,8.0,7.5,6.675523,7.0,...,6.489303,7.5,7.317442,4.737351,6.908843,6.5,7.0,5.37743,4.685604,6.5
156,7.209443,6.660236,8.0,7.720892,7.471476,7.377796,7.5,7.065177,6.675523,7.020574,...,6.489303,7.0,7.317442,4.737351,8.0,7.75,7.003776,4.0,3.0,6.580351
186,7.0,6.660236,8.0,7.720892,7.471476,7.377796,7.80902,7.065177,7.0,7.020574,...,6.489303,6.640796,8.0,6.0,8.0,6.0,7.003776,6.0,5.0,7.0
225,7.209443,6.660236,8.0,7.720892,7.471476,7.377796,7.80902,8.5,6.675523,9.0,...,7.0,6.640796,7.317442,4.737351,7.5,6.500865,7.003776,5.37743,6.0,7.0


0

100

200

300

400

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
title_1960: The Making of the President,title_1960: The Making of the President,title_Sushi Go Party!,title_Arkham Horror: The Card Game,title_Dixit Quest,title_Lords of Waterdeep: Scoundrels of Skullport,title_Ticket to Ride: USA 1910,title_Arkham Horror: Dunwich Horror Expansion,title_Mechs vs. Minions,title_Clank!: A Deck-Building Adventure,title_Mr. Jack Pocket,title_Targi,title_King of Tokyo: Power Up!,title_Zombicide: Black Plague,title_Viticulture Essential Edition,title_Citadels: The Dark City,title_Ticket to Ride: Nordic Countries,title_Above and Below,title_Fury of Dracula (third edition),title_Legendary Encounters: An Alien Deck Buil...,title_Quadropolis
title_6 nimmt!,title_6 nimmt!,title_Sushi Go Party!,title_Ticket to Ride: USA 1910,title_Mr. Jack Pocket,title_Arkham Horror: Dunwich Horror Expansion,title_Kingdomino,title_Fury of Dracula (third edition),title_Dixit Quest,title_Lords of Waterdeep: Scoundrels of Skullport,title_Citadels: The Dark City,title_Quadropolis,title_Ca$h 'n Guns (Second Edition),title_Arkham Horror: The Card Game,title_King of Tokyo: Power Up!,title_Labyrinth,title_7 Wonders: Cities,title_The Downfall of Pompeii,title_Carcassonne: Expansion 1 – Inns & Cathed...,title_Mechs vs. Minions,title_Above and Below
title_7 Wonders,title_7 Wonders,title_7 Wonders: Leaders,title_7 Wonders Duel,title_7 Wonders: Cities,title_Sushi Go Party!,title_Above and Below,title_Lords of Waterdeep: Scoundrels of Skullport,title_Fresco,title_Targi,title_Ticket to Ride: Nordic Countries,title_Orléans,title_Citadels: The Dark City,title_Abyss,title_Dixit Quest,title_Elysium,title_Viticulture Essential Edition,title_Clank!: A Deck-Building Adventure,title_Between Two Cities,title_Ticket to Ride: USA 1910,title_Forbidden Desert
title_7 Wonders Duel,title_7 Wonders Duel,title_Sushi Go Party!,title_Lords of Waterdeep: Scoundrels of Skullport,title_Patchwork,title_Viticulture Essential Edition,title_Mechs vs. Minions,title_Clank!: A Deck-Building Adventure,title_Targi,title_7 Wonders: Cities,title_Kingdomino,title_Zombicide: Black Plague,title_Mr. Jack Pocket,title_Arkham Horror: The Card Game,title_Quadropolis,title_A Feast for Odin,title_Dixit Quest,title_Above and Below,title_Santorini,title_Mombasa,title_Ticket to Ride: USA 1910
title_7 Wonders: Cities,title_7 Wonders: Cities,title_7 Wonders: Leaders,title_Sushi Go Party!,title_Lords of Waterdeep: Scoundrels of Skullport,title_Dixit Quest,title_Arkham Horror: Dunwich Horror Expansion,title_Citadels: The Dark City,title_Ticket to Ride: USA 1910,title_King of Tokyo: Power Up!,title_Quadropolis,title_Mr. Jack Pocket,title_Pandemic: On the Brink,title_Mechs vs. Minions,title_Arkham Horror: The Card Game,title_Clank!: A Deck-Building Adventure,title_Kingdomino,title_Zombicide: Black Plague,title_Munchkin 2: Unnatural Axe,title_Fury of Dracula (third edition),title_Ca$h 'n Guns (Second Edition)
title_7 Wonders: Leaders,title_7 Wonders: Leaders,title_7 Wonders: Cities,title_Lords of Waterdeep: Scoundrels of Skullport,title_Sushi Go Party!,title_Dixit Quest,title_Arkham Horror: Dunwich Horror Expansion,title_King of Tokyo: Power Up!,title_Ticket to Ride: USA 1910,title_Citadels: The Dark City,title_Mr. Jack Pocket,title_Kingdomino,title_Quadropolis,title_Pandemic: On the Brink,title_Mechs vs. Minions,title_Clank!: A Deck-Building Adventure,title_Carcassonne: Expansion 2 – Traders & Bui...,title_Ca$h 'n Guns (Second Edition),title_Viticulture Essential Edition,title_Dominion: Prosperity,title_Munchkin 2: Unnatural Axe
title_A Feast for Odin,title_A Feast for Odin,title_Sushi Go Party!,title_Mechs vs. Minions,title_Viticulture Essential Edition,title_Clank!: A Deck-Building Adventure,title_Quadropolis,title_Lords of Waterdeep: Scoundrels of Skullport,title_Targi,title_Great Western Trail,title_Zombicide: Black Plague,title_Ticket to Ride: USA 1910,title_Mombasa,title_Arkham Horror: The Card Game,title_Arkham Horror: Dunwich Horror Expansion,title_Dixit Quest,title_Kingdomino,title_Fury of Dracula (third edition),title_7 Wonders: Cities,title_Abyss,title_Above and Below
title_A Few Acres of Snow,title_A Few Acres of Snow,title_Mechs vs. Minions,title_Sushi Go Party!,title_Dixit Quest,title_Arkham Horror: The Card Game,title_Mr. Jack Pocket,title_Friday,title_Zombicide: Black Plague,title_Arkham Horror: Dunwich Horror Expansion,title_Clank!: A Deck-Building Adventure,title_Quadropolis,title_7 Wonders: Cities,title_Lords of Waterdeep: Scoundrels of Skullport,title_Fury of Dracula (third edition),title_King of Tokyo: Power Up!,title_Citadels: The Dark City,title_Viticulture Essential Edition,title_Mombasa,title_London,title_Targi
title_A Game of Thrones (first edition),title_A Game of Thrones (first edition),title_A Game of Thrones: The Board Game (Secon...,title_Space Hulk,title_Sushi Go Party!,title_Arkham Horror: Dunwich Horror Expansion,title_Zombicide: Black Plague,title_Arkham Horror: The Card Game,title_Citadels: The Dark City,title_Lords of Waterdeep: Scoundrels of Skullport,title_Fury of Dracula (third edition),title_Dixit Quest,title_Pandemic: The Cure,title_Mechs vs. Minions,title_7 Wonders: Cities,title_Above and Below,title_Small World Underground,title_Clank!: A Deck-Building Adventure,title_King of Tokyo: Power Up!,title_Mombasa,title_Quadropolis
title_A Game of Thrones: The Board Game (Second Edition),title_A Game of Thrones: The Board Game (Secon...,title_Sushi Go Party!,title_Zombicide: Black Plague,title_A Game of Thrones (first edition),title_Arkham Horror: The Card Game,title_Fury of Dracula (third edition),title_Lords of Waterdeep: Scoundrels of Skullport,title_Mechs vs. Minions,title_Arkham Horror: Dunwich Horror Expansion,title_Dixit Quest,title_Star Wars: Imperial Assault,title_Star Wars: Rebellion,title_Legendary Encounters: An Alien Deck Buil...,title_Clank!: A Deck-Building Adventure,title_Space Hulk,title_Quadropolis,title_Ticket to Ride: Nordic Countries,title_Above and Below,title_Ticket to Ride: USA 1910,title_A Feast for Odin


In [4]:
usum = gsum.transpose()
display(usum.head(6))
usum.to_csv('usum.csv', sep='\t')
data_ibs = pd.DataFrame(index=usum.columns,columns=usum.columns)
uneighbors = findNeighbors(usum, data_ibs)
display(uneighbors.head(6))
uneighbors.to_csv('uneighbors.csv', sep='\t')

userID,83,119,144,156,186,225,238,272,319,387,...,192302,192640,192681,193034,193103,193129,193184,193266,193339,193491
title_1960: The Making of the President,7.0,7.0,7.0,7.209443,7.0,7.209443,7.209443,1.0,7.5,7.209443,...,7.209443,7.209443,7.209443,7.209443,7.209443,7.209443,7.209443,7.209443,8.5,7.209443
title_6 nimmt!,6.660236,6.660236,6.660236,6.660236,6.660236,6.660236,6.0,1.0,6.660236,8.0,...,6.0,8.0,6.660236,6.0,6.660236,6.660236,6.660236,6.5,5.5,6.0
title_7 Wonders,9.0,7.0,7.0,8.0,8.0,8.0,7.0,1.0,7.0,8.0,...,4.5,7.0,8.0,7.481301,7.0,8.0,7.0,7.5,7.5,7.5
title_7 Wonders Duel,9.0,7.720892,8.0,7.720892,7.720892,7.720892,7.720892,7.720892,8.0,7.720892,...,7.5,7.720892,9.0,9.0,7.5,9.0,8.0,7.5,9.0,8.0
title_7 Wonders: Cities,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,...,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476,7.471476
title_7 Wonders: Leaders,7.0,6.0,7.377796,7.377796,7.377796,7.377796,7.377796,7.377796,8.0,7.0,...,7.377796,7.377796,7.377796,7.377796,7.377796,7.377796,7.377796,7.377796,8.0,6.0


0

100

200

300

400

500

KeyboardInterrupt: 