In [3]:
#!/usr/bin/env python3.7
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import timeit
from datetime import date
import seaborn as sns

import statsmodels.api as sm

from scipy import stats
import scipy as scipy

import svgutils.transform as sg
import sys

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LassoCV
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import silhouette_score

pd.set_option('display.max_columns', None)  # Give us all the columns without truncation    

In [4]:
def importCSV(filePath):
    dataFrame = pd.read_csv(filePath, header=0, low_memory=False)
    return dataFrame

def reIndex(DF):
    DF.reset_index(drop=True, inplace=True)
    return 0

def nullDrop(DF,col):
    return DF.dropna(subset=col)

def dfDrop(DF,col,inPlace):
    if(inPlace):
        DF.drop(col, inplace=True, axis=1)
        return 0
    else:
        return DF.drop(col,axis=1)

def makeDict(value,key):
    myDict = value
    myDict.index = key
    return myDict.to_dict()


def normaliseDF(DF):
    columns = list(DF) 
    for i in columns:
        DF[i] = DF[i]/DF[i].max()
    return DF

In [116]:
####################################### Dataframes ##########################################################

#paidGames = importCSV('data/processed/paidGames.csv')  #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days
#freeGames = importCSV('data/processed/freeGames.csv') #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days

#paidDLC = importCSV('data/processed/paidDLC.csv') #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days
#freeDLC = importCSV('data/processed/freeDLC.csv') #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days

#paidMods = importCSV('data/processed/paidMods.csv') #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days
#freeMods = importCSV('data/processed/freeMods.csv') #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days

gameLocPriceCount = importCSV('data/processed/gameLocPriceCount.csv') #Country  Total_Revenue  Revenue_Per_Player
gameLocPriceCountTop = importCSV('data/processed/gameLocPriceCountTop.csv') # Country  Total_Revenue  Revenue_Per_Player
playerCountriesCount = importCSV('data/processed/playerCountriesCount.csv') #Country Percentage of Players

#socialData = importCSV('data/processed/socialData.csv') #steamid  daysOld  Number_Friends  Number_Groups  Mult_Play_2Week  Mult_Play_All  NonMult_Play_2Week  NonMult_Play_All 

#games = importCSV('data/processed/games.csv') # steamid, appid, playtime_2weeks, playtime_forever, dateretrieved

appInfo = importCSV("data/App_ID_Info.csv") # appid, title, type (game, mod, dlc, hardware etc), Required_Age, Is_multiplayer, Price, Rating

#gameWithGenre = importCSV('data/processed/gameWithGenre.csv') #steamid   appid  playtime_2weeks  playtime_forever   genre

playerProfile = importCSV('data/processed/playerProfile.csv') #playerProfile # steamid   appid  playtime_forever  Friends  Rating

#############################################################################################################
#############################################################################################################
#############################################################################################################
#############################################################################################################

In [80]:
##### Name of the most popoular game within each country
############ Should have done this originally... Plan approach better next time

playerProfile.insert(5,'Location',0)
playerData = importCSV("data/Player_Summaries.csv")
playerData = nullDrop(playerData,['loccountrycode'])
steamidLocationDict = makeDict(playerData['loccountrycode'],playerData['steamid'])
playerProfile['Location'] = playerProfile.steamid.map(steamidLocationDict).fillna(playerProfile['Location'])
playerProfile = playerProfile[playerProfile['Location']!=0]
playerProfile.head()
reIndex(playerProfile)
mostPopularGames = playerProfile.groupby(['Location','appid']).sum()
dfDrop(mostPopularGames,['steamid','Friends','Rating'],1)
mostPopularGames.reset_index(inplace=True) 
uniqueCounteries = mostPopularGames.Location.unique()
apps = [None] * 196
for i in range(0,196):
    temp = mostPopularGames[mostPopularGames.Location==uniqueCounteries[i]].sort_values(by=['playtime_forever'], ascending=False)
    reIndex(temp)
    apps[i]= temp.appid[0]
    
countryTopApp = pd.DataFrame(list(map(np.ravel, uniqueCounteries)))
countryTopApp['Apps'] = apps
countryTopApp['App_Name'] = apps
appIDNameDict = makeDict(appInfo['Title'],appInfo['appid'])
countryTopApp['App_Name'] = countryTopApp.Apps.map(appIDNameDict).fillna(countryTopApp['App_Name'])

topAppDict = makeDict(countryTopApp['App_Name'],countryTopApp[0])


In [127]:
playerCountriesCount['Most Popular Game'] = 0
playerCountriesCount['Most Popular Game'] = playerCountriesCount.Country.map(topAppDict).fillna(playerCountriesCount['Most Popular Game'])


In [133]:
gameLocPriceCount['Most Popular Game'] = 0
gameLocPriceCount['Most Popular Game'] = gameLocPriceCount.Country.map(topAppDict).fillna(gameLocPriceCount['Most Popular Game'])

gameLocPriceCountTop['Most Popular Game'] = 0
gameLocPriceCountTop['Most Popular Game'] = gameLocPriceCountTop.Country.map(topAppDict).fillna(gameLocPriceCountTop['Most Popular Game'])




In [128]:
playerCountriesCount.head()

Unnamed: 0,Country,Percentage of Players,Most Popular Game
0,US,19.228014,Team Fortress 2
1,RU,11.297254,Dota 2
2,DE,6.510147,Counter-Strike: Source
3,GB,4.9423,Team Fortress 2
4,BR,4.930362,Dota 2


In [129]:
print(playerCountriesCount[0:10])
playerCountriesCount.shape

  Country  Percentage of Players       Most Popular Game
0      US              19.228014         Team Fortress 2
1      RU              11.297254                  Dota 2
2      DE               6.510147  Counter-Strike: Source
3      GB               4.942300         Team Fortress 2
4      BR               4.930362                  Dota 2
5      FR               4.377238  Counter-Strike: Source
6      CA               3.688818  Counter-Strike: Source
7      PL               2.594509          Counter-Strike
8      UA               2.562674                  Dota 2
9      AU               2.423398                  Dota 2


(211, 3)

In [135]:
print(gameLocPriceCount[0:10])
gameLocPriceCount.shape


  Country  Total_Revenue  Revenue_Per_Player       Most Popular Game
0      US   2.996877e+06           13.511861         Team Fortress 2
1      GB   9.525183e+05           13.573277         Team Fortress 2
2      DE   6.325907e+05           14.014283  Counter-Strike: Source
3      CA   5.895755e+05           13.760017  Counter-Strike: Source
4      RU   4.747703e+05           13.807080                  Dota 2
5      AU   3.576973e+05           13.683384                  Dota 2
6      FR   3.425370e+05           13.504849  Counter-Strike: Source
7      SE   2.492805e+05           13.683198          Counter-Strike
8      BR   2.333884e+05           13.136061                  Dota 2
9      PL   2.058247e+05           12.466666          Counter-Strike


(196, 4)

In [134]:
print(gameLocPriceCountTop[0:10])
gameLocPriceCountTop.shape

  Country  Total_Revenue  Revenue_Per_Player       Most Popular Game
0      DE   6.325907e+05           14.014283  Counter-Strike: Source
1      RU   4.747703e+05           13.807080                  Dota 2
2      CA   5.895755e+05           13.760017  Counter-Strike: Source
3      AU   3.576973e+05           13.683384                  Dota 2
4      SE   2.492805e+05           13.683198          Counter-Strike
5      GB   9.525183e+05           13.573277         Team Fortress 2
6      US   2.996877e+06           13.511861         Team Fortress 2
7      FR   3.425370e+05           13.504849  Counter-Strike: Source
8      BR   2.333884e+05           13.136061                  Dota 2
9      PL   2.058247e+05           12.466666          Counter-Strike


(10, 4)