In [1]:
#!/usr/bin/env python3.7
import os
import gc

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import timeit
from datetime import date
import seaborn as sns

import statsmodels.api as sm

from scipy import stats
import scipy as scipy

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LassoCV
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

pd.set_option('display.max_columns', None)  # Give us all the columns without truncation    

In [2]:
def importCSV(filePath):
    dataFrame = pd.read_csv(filePath, header=0, low_memory=False)
    return dataFrame

def reIndex(DF):
    DF.reset_index(drop=True, inplace=True)
    return 0

def nullDrop(DF,col):
    return DF.dropna(subset=col)#DF[pd.notnull(DF[col])]

def dfDrop(DF,col,inPlace):
    if(inPlace):
        DF.drop(col, inplace=True, axis=1)
        return 0
    else:
        return DF.drop(col,axis=1)

def makeDict(value,key):
    myDict = value
    myDict.index = key
    return myDict.to_dict()

In [3]:
###  Games   #########################################
# steamid, appid, playtime_2weeks, playtime_forever, dateretrieved

games = pd.concat([importCSV("data/Games_1.csv"), 
                   importCSV("data/Games_2.csv")
                   #importCSV("data/Games_1-000000000001.csv"),
                   #importCSV("data/Games_2-000000000000.csv"),
                   #importCSV("data/Games_2-000000000001.csv"),
                   #importCSV("data/Games_2-000000000002.csv")
                  ])
reIndex(games)
games = nullDrop(games,['steamid','appid'])

##################################################################################

################### Game Info Lookup Data Files###################################
####  Games_Developers
# appid, Developer
####  Games_Genres
# appid, Genre
####  Games_Publishers
# appid, Publisher
####  App_ID_Info:
# appid, title, type (game, mod, dlc, hardware etc), Required_Age, Is_multiplayer, Price, Rating

appInfo = importCSV("data/App_ID_Info.csv")

###################################################################################

##################### Player Info Lookup #########################################
#### Player_Summaries
# steamid, personaname, profileurl, avatar, avatarmedium, avatarfull, personastate, communityvisibilitystate,
# profilestate, lastlogoff, commentpermission, realname, primaryclanid, timecreated, gameid, gameserverip,
# gameextrainfo, cityid, loccountrycode, locstatecode, loccityid, dateretrieved


playerData = importCSV("data/Player_Summaries.csv") #pd.concat([importCSV("data/Player_Summaries-000000000000.csv"), 
                        #importCSV("data/Player_Summaries-000000000001.csv"),
                        #importCSV("data/Player_Summaries-000000000002.csv"),
                        #importCSV("data/Player_Summaries-000000000003.csv"),
                        #importCSV("data/Player_Summaries-000000000004.csv"),
                        #importCSV("data/Player_Summaries-000000000005.csv")
                     #  ])
reIndex(playerData)
dfDrop(playerData,['personaname', 'avatar', 'avatarmedium', 'avatarfull', 'profileurl', 'primaryclanid'],1)

playerDataCountry = nullDrop(playerData,['loccountrycode'])
reIndex(playerDataCountry)
gc.collect()
###################################################################################

### Social Network ##############################
####  Friends
# steamid_a, steamid_b, friend_since
##############################################################

friendsData = importCSV("data/Friends.csv")#pd.concat([importCSV("data/Friends-000000000000.csv"), 
              #           importCSV("data/Friends-000000000001.csv")])
reIndex(friendsData)
dfDrop(friendsData,'dateretrieved',1)

####  Groups
# steamid, groupid, dateretrieved
groupsData = importCSV("data/Groups.csv")
dfDrop(groupsData,'dateretrieved',1)

####  Achievement_Percentages   #### steamid vs achievement would be useful...
# appid, Name, Percentage
############################################################################

gc.collect()

0

In [10]:
groupsData.shape

(144751, 2)

In [4]:
dropList = ['personastate','communityvisibilitystate','profilestate','lastlogoff','commentpermission','realname','gameid','gameserverip','gameextrainfo','cityid','loccountrycode','locstatecode','loccityid','dateretrieved']
socialData = dfDrop(playerData, dropList,0)

socialData.insert(2, "Number_Friends", 0)
socialData.insert(3, "Number_Groups", 0)
socialData.insert(4, "Mult_Play_2Week", np.nan)
socialData.insert(5, "Mult_Play_All", np.nan)
socialData.insert(6, "NonMult_Play_2Week", np.nan)
socialData.insert(7, "NonMult_Play_All", np.nan)
#socialData.insert(8, "Number_Common_Games", np.nan)      #TBA
#socialData.insert(9, "AppID_Most_Played_Common", np.nan) #TBA
reIndex(socialData)


friendships = pd.concat([friendsData.groupby(['steamid_a']).size().reset_index(name='count').rename(columns={"steamid_a": "steamid"}), 
               friendsData.groupby(['steamid_b']).size().reset_index(name='count').rename(columns={"steamid_b": "steamid"})])
reIndex(friendships)
friendCountDic = makeDict(friendships['count'],friendships['steamid'])
socialData['Number_Friends'] = socialData.steamid.map(friendCountDic).fillna(socialData['Number_Friends'])


groupCounts = groupsData.groupby(['steamid']).size().reset_index(name='count')
groupsCountDic = makeDict(groupCounts['count'],groupCounts['steamid'])
socialData['Number_Groups'] = socialData.steamid.map(groupsCountDic).fillna(socialData['Number_Groups'])

##############################

isMultDic = makeDict(appInfo['Is_Multiplayer'],appInfo['appid'])

gameMult = dfDrop(games,'dateretrieved',0)
gameMult.insert(4, "isMult", 0)

gameMult['isMult'] = gameMult.appid.map(isMultDic).fillna(gameMult['isMult'])

gameNonMultGroup = (gameMult[gameMult['isMult']==0].groupby(['steamid']).sum()).drop(['appid','isMult'],axis=1)
gameMultGroup = (gameMult[gameMult['isMult']!=0].groupby(['steamid']).sum()).drop(['appid','isMult'], axis=1)

gameNonMultGroup2Week_Dic = makeDict(gameNonMultGroup['playtime_2weeks'],gameNonMultGroup.index)
gameNonMultGroupForever_Dic = makeDict(gameNonMultGroup['playtime_forever'],gameNonMultGroup.index)

gameMultGroup2Week_Dic = makeDict(gameMultGroup['playtime_2weeks'],gameMultGroup.index)
gameMultGroupForever_Dic = makeDict(gameMultGroup['playtime_forever'],gameMultGroup.index)



socialData['Mult_Play_2Week'] = socialData.steamid.map(gameMultGroup2Week_Dic).fillna(socialData['Mult_Play_2Week'])
socialData['Mult_Play_All'] = socialData.steamid.map(gameMultGroupForever_Dic).fillna(socialData['Mult_Play_All'])
socialData['NonMult_Play_2Week'] = socialData.steamid.map(gameNonMultGroup2Week_Dic).fillna(socialData['NonMult_Play_2Week'])
socialData['NonMult_Play_All'] = socialData.steamid.map(gameNonMultGroupForever_Dic).fillna(socialData['NonMult_Play_All'])


socialData.rename(columns={"timecreated": "daysOld"}, inplace=True)
reIndex(socialData)

today = pd.Series([date.today()])
todaySocial = today.repeat(socialData.shape[0])
todaySocial.index = socialData.index

socialData['daysOld'] = (pd.to_datetime(todaySocial) - pd.to_datetime(socialData['daysOld']).dt.tz_localize(None)).dt.days



In [5]:
############## Player Demographics #########################################
###### Countries

colNames = ['Country', 'Percentage of Players']
playerCountriesCount = pd.DataFrame(columns=colNames)

reIndex(playerDataCountry)
playerDataCountryCounts = playerDataCountry.groupby(['loccountrycode']).size().reset_index(name='count')

playerCountriesCountTotal = playerDataCountryCounts['count'].sum()

playerCountriesCount['Country'] = playerDataCountryCounts['loccountrycode']
playerCountriesCount['Percentage of Players'] = (playerDataCountryCounts['count']*100) / playerCountriesCountTotal

playerCountriesCount = playerCountriesCount.sort_values(by=['Percentage of Players'], ascending=False)
reIndex(playerCountriesCount)

##################################################################################


0

In [6]:
############## Player Demographics #########################################
###### Money Spent
## Look at all games with appid and steamid -> morph steamid to country code, morph game to price

# games: df with all owned games: steamid, appid
# gameIDs = games[['steamid', 'appid']].copy()
# playerDataCountry: df with steamid and loccountrycode
# gameLocs = playerDataCountry[['steamid', 'loccountrycode']].copy()
# appInfo: appid, Price, Rating
# gameInfos = appInfo[['appid', 'Price']].copy()

#colNames = ['appid', 'steamid', 'loccountrycode', 'Price', 'Rating']
gameMoney = pd.DataFrame()#columns=colNames)

gameMoney.insert(0, "appid", games.appid)
gameMoney.insert(1, "steamid", games.steamid)
gameMoney.insert(2, "loccountrycode", np.nan)
gameMoney.insert(3, "Price", np.nan)
gameMoney.insert(4, "Rating", np.nan)

dictionary1 = makeDict(playerDataCountry['loccountrycode'],playerDataCountry['steamid'])
dictionary2 = makeDict(appInfo['Price'],appInfo['appid'])
dictionary3 = makeDict(appInfo['Rating'],appInfo['appid'])


gameMoney['loccountrycode'] = games.steamid.map(dictionary1).fillna(gameMoney['loccountrycode'])
gameMoney['Price'] = games.appid.map(dictionary2).fillna(gameMoney['Price'])
gameMoney['Rating'] = games.appid.map(dictionary3).fillna(gameMoney['Rating'])

gameLocPrice = dfDrop(gameMoney,['appid','steamid','Rating'],0)
gameLocPrice = nullDrop(gameLocPrice,['loccountrycode','Price'])
reIndex(gameLocPrice)

colNames = ['Country', 'Total_Revenue', 'Revenue_Per_Player']
gameLocPriceCount = pd.DataFrame(columns=colNames)

reIndex(gameLocPrice)
sumRev = gameLocPrice.groupby(['loccountrycode']).sum()
countRev = gameLocPrice.groupby(['loccountrycode']).size().reset_index(name='count')


reIndex(gameLocPrice)
countRev = gameLocPrice.groupby(['loccountrycode']).size().reset_index(name='count')
sumRev = gameLocPrice.groupby(['loccountrycode']).sum().reset_index()

gameLocPriceCount['Country'] = sumRev['loccountrycode']
gameLocPriceCount['Total_Revenue'] = sumRev['Price']
gameLocPriceCount['Revenue_Per_Player'] = sumRev['Price'] / countRev['count']
reIndex(gameLocPriceCount)

gameLocPriceCount = gameLocPriceCount.sort_values(by=['Total_Revenue'], ascending=False)

gameLocPriceCountTop = gameLocPriceCount[0:10].sort_values(by=['Revenue_Per_Player'], ascending=False)
reIndex(gameLocPriceCountTop)


0

In [7]:
############## Playing around with game genre searches and seeing what FTP is...

# Given FTP as genre, what is the total revenue?

#1. Profits from DLC/Mods vs One off
#      DLC from FTP vs One off w/o DLC vs One off with DLC
reIndex(appInfo)
ftpGames = appInfo[['appid','Rating']][appInfo['Price'] == 0.0]
otherGames = appInfo[['appid','Rating']][appInfo['Price'] != 0.0]

reIndex(ftpGames)
reIndex(otherGames)

#######################################

otherGames.insert(2, "type", np.nan)
ftpGames.insert(2, "type", np.nan)
otherGames.insert(3, "Price", np.nan)
ftpGames.insert(3, "Price", np.nan)
otherGames.insert(4, "Units_Sold", np.nan)
ftpGames.insert(4, "Units_Sold", np.nan)
otherGames.insert(5, "Total_Revenue", np.nan)
ftpGames.insert(5, "Total_Revenue", np.nan)
otherGames.insert(6, "playtime_2weeks", np.nan)
ftpGames.insert(6, "playtime_2weeks", np.nan)
otherGames.insert(7, "playtime_forever", np.nan)
ftpGames.insert(7, "playtime_forever", np.nan)
otherGames.insert(8, "store_time_days", np.nan)
ftpGames.insert(8, "store_time_days", np.nan)

######
##Could use regex/replace instead of dictionary but I prefer the latter
#DF['KEY'] = DF['KEY'].replace(to_replace={'.*OLD.*': 'NEW'}, regex=True)
######

########## Add count info here
gameIDCount = games.groupby(['appid']).size().reset_index(name='count')

gameCountDic = makeDict(gameIDCount['count'],gameIDCount['appid'])

otherGames['Units_Sold'] = otherGames.appid.map(gameCountDic).fillna(otherGames['Units_Sold'])
ftpGames['Units_Sold'] = ftpGames.appid.map(gameCountDic).fillna(ftpGames['Units_Sold'])

otherGames['Units_Sold'] = otherGames['Units_Sold'].fillna(0)
ftpGames['Units_Sold'] = ftpGames['Units_Sold'].fillna(0)
##############################

############# PlayTime
gamePlayCount = games.groupby(['appid']).sum().reset_index()
dfDrop(gamePlayCount,['steamid'],1)

gamePlay2weekCountDic = makeDict(gamePlayCount['playtime_2weeks'],gamePlayCount['appid'])
gamePlayForeverCountDic = makeDict(gamePlayCount['playtime_forever'],gamePlayCount['appid'])

otherGames['playtime_2weeks'] = otherGames.appid.map(gamePlay2weekCountDic).fillna(otherGames['playtime_2weeks'])
ftpGames['playtime_2weeks'] = ftpGames.appid.map(gamePlay2weekCountDic).fillna(ftpGames['playtime_2weeks'])
otherGames['playtime_forever'] = otherGames.appid.map(gamePlayForeverCountDic).fillna(otherGames['playtime_forever'])
ftpGames['playtime_forever'] = ftpGames.appid.map(gamePlayForeverCountDic).fillna(ftpGames['playtime_forever'])

otherGames['playtime_2weeks'] = otherGames['playtime_2weeks'].fillna(0)
otherGames['playtime_forever'] = otherGames['playtime_forever'].fillna(0)

ftpGames['playtime_2weeks'] = otherGames['playtime_2weeks'].fillna(0)
ftpGames['playtime_forever'] = otherGames['playtime_forever'].fillna(0)
###################


########## StoreTime

#dateretrieved from games       by   appid
#Release_Date from app_info     by   appid

# If I could ensure the same order then this would be easy. But for a fool proof approach
# I'll use a dictionary etc.

dateFromDic = makeDict(appInfo['Release_Date'],appInfo['appid'])

dateFromOG = otherGames.appid.map(dateFromDic).fillna(0)
dateFromftp = ftpGames.appid.map(dateFromDic).fillna(0)

today = pd.Series([date.today()])
todayOG = today.repeat(dateFromOG.size)
todayFTP = today.repeat(dateFromftp.size)

reIndex(dateFromOG)
reIndex(dateFromftp)

todayOG.index = dateFromOG.index
todayFTP.index = dateFromftp.index

otherGames['store_time_days'] = (pd.to_datetime(todayOG) - pd.to_datetime(dateFromOG).dt.tz_localize(None)).dt.days
ftpGames['store_time_days'] = (pd.to_datetime(todayFTP) - pd.to_datetime(dateFromftp).dt.tz_localize(None)).dt.days

#####################

apptypeDic = makeDict(appInfo['Type'],appInfo['appid'])
appPriceDic = makeDict(appInfo['Price'],appInfo['appid'])


otherGames['type'] = otherGames.appid.map(apptypeDic).fillna(otherGames['type'])
ftpGames['type'] = ftpGames.appid.map(apptypeDic).fillna(ftpGames['type'])
otherGames['Price'] = otherGames.appid.map(appPriceDic).fillna(otherGames['Price'])
ftpGames['Price'] = ftpGames.appid.map(appPriceDic).fillna(ftpGames['Price'])

############# Revenue
otherGames['Total_Revenue'] = otherGames['Units_Sold'] * otherGames['Price']
ftpGames['Total_Revenue'] = ftpGames['Units_Sold']

otherGames = otherGames.sort_values(by=['Total_Revenue'], ascending=False, na_position='first')
ftpGames = ftpGames.sort_values(by=['Total_Revenue'], ascending=False, na_position='first')
###################


###### Filter out demo, video, dlc and mod. ie just keep 'game'
paidGames = otherGames[otherGames['type'] == 'game']
freeGames = ftpGames[ftpGames['type'] == 'game']

paidDLC = otherGames[otherGames['type'] == 'dlc']
freeDLC = ftpGames[ftpGames['type'] == 'dlc']

paidMods = otherGames[otherGames['type'] == 'mod']
freeMods = ftpGames[ftpGames['type'] == 'mod']

reIndex(paidGames)
reIndex(freeGames)

reIndex(paidDLC)
reIndex(freeDLC)

reIndex(paidMods)
reIndex(freeMods)



0

In [8]:
freeGames[freeGames.appid==570]

Unnamed: 0,appid,Rating,type,Price,Units_Sold,Total_Revenue,playtime_2weeks,playtime_forever,store_time_days
1,570,90,game,0.0,29971.0,29971.0,684.0,6008.0,2372


In [9]:
#############   Quick sanity check on reciprocity 
#print(friendsData['steamid_a'][friendsData['steamid_b']==76561198061647456])
# (a) 76561198081638400 has (b) friends, 76561198061647456 and 76561198050245657 

#a = friendsData.groupby(['steamid_a']).size().reset_index(name='count')
#b = friendsData.groupby(['steamid_b']).size().reset_index(name='count')

#c = pd.concat([a.rename(columns={"steamid_a": "steamid"}), b.rename(columns={"steamid_b": "steamid"})])
#c.reset_index(drop=True, inplace=True)

#print(a.shape[0]+b.shape[0])
#print(c)
########################################################################################################

In [10]:
############## Playing around with game genre searches and seeing what FTP is...

####  Games_Genres
# appid, Genre

#gameGenreFile = "data/Games_Genres.csv"
#gameGenre = pd.read_csv(gameGenreFile, header=0, low_memory=False)
#drop_list = ['title', 'Required_Age']
#appInfo.drop(drop_list, inplace=True, axis=1)
#gameGenre = gameGenre[pd.notnull(gameGenre['appid'])]
#gameGenre = gameGenre[pd.notnull(gameGenre['Genre'])]

#print(gameGenre['Genre'].unique())

#appInfoFile = "data/App_ID_Info.csv"
#appInfo2 = pd.read_csv(appInfoFile, header=0, low_memory=False)
##ftp = gameGenre['appid'][gameGenre['Genre'] == 'Free to Play']
#ftp.reset_index(drop=True, inplace=True)
#for i in range(ftp.size-1):
#    print(appInfo2['Title'][appInfo2['appid']==ftp[i]])

#There are duplicates in game genre!! GOTCHA moment...
##################################################################################

In [11]:
#################### GENRES

gameWithGenre  = dfDrop(games,['dateretrieved'],0)
gameWithGenre.insert(4,'genre',np.nan)

gameGenreFile = "data/Games_Genres.csv"
gameGenre = pd.read_csv(gameGenreFile, header=0, low_memory=False)
gameGenre = gameGenre[pd.notnull(gameGenre['appid'])]
reIndex(gameGenre)
genreDict = makeDict(gameGenre['Genre'],gameGenre['appid'])
gameWithGenre['genre'] = gameWithGenre.appid.map(genreDict).fillna(gameWithGenre['genre'])

print(gameWithGenre.head())
################################################

             steamid   appid  playtime_2weeks  playtime_forever   genre
0  76561198001291264    8870              NaN            1392.0  Action
1  76561198001291264     400              NaN             239.0  Action
2  76561198001291264  212910              NaN             130.0     NaN
3  76561198001291264     550              NaN           17547.0  Action
4  76561198001291264     420              NaN             534.0  Action


In [12]:
paidGames.insert(9,'genre',np.nan)
freeGames.insert(9,'genre',np.nan)

paidDLC.insert(9,'genre',np.nan)
freeDLC.insert(9,'genre',np.nan)

paidMods.insert(9,'genre',np.nan)
freeMods.insert(9,'genre',np.nan)

paidGames['genre'] = paidGames.appid.map(genreDict).fillna(paidGames['genre'])
freeGames['genre'] = freeGames.appid.map(genreDict).fillna(freeGames['genre'])

paidDLC['genre'] = paidDLC.appid.map(genreDict).fillna(paidDLC['genre'])
freeDLC['genre'] = freeDLC.appid.map(genreDict).fillna(freeDLC['genre'])

paidMods['genre'] = paidMods.appid.map(genreDict).fillna(paidMods['genre'])
freeMods['genre'] = freeMods.appid.map(genreDict).fillna(freeMods['genre'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[ro

In [13]:
############# playerProfile # steamid   appid  playtime_forever  Friends  Rating

#get Friends from socialData
#get Rating from appInfo
#get steamid, appid, playtime_forever from games

playerProfile  = dfDrop(games,['playtime_2weeks','dateretrieved'],0)
playerProfile.insert(3,'Friends',np.nan)
playerProfile.insert(4,'Rating',np.nan)

friendDict = makeDict(socialData['Number_Friends'],socialData['steamid'])
ratingDict = makeDict(appInfo['Rating'],appInfo['appid'])


playerProfile['Friends'] = playerProfile.steamid.map(friendDict).fillna(playerProfile['Friends'])
playerProfile['Rating'] = playerProfile.appid.map(ratingDict).fillna(playerProfile['Rating'])

print(playerProfile.head())

             steamid   appid  playtime_forever  Friends  Rating
0  76561198001291264    8870            1392.0      1.0    94.0
1  76561198001291264     400             239.0      1.0    90.0
2  76561198001291264  212910             130.0      1.0     NaN
3  76561198001291264     550           17547.0      1.0    89.0
4  76561198001291264     420             534.0      1.0    90.0


In [14]:
#############################################################################################################
#############################################################################################################
############################# Data Loaded and Conditioned(ish) ##############################################
#############################################################################################################
#############################################################################################################

In [15]:
####################################### Dataframes ##########################################################

paidGames.to_csv('data/processed/paidGames.csv',index= False) #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days
freeGames.to_csv('data/processed/freeGames.csv',index= False) #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days

paidDLC.to_csv('data/processed/paidDLC.csv',index= False) #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days
freeDLC.to_csv('data/processed/freeDLC.csv',index= False) #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days

paidMods.to_csv('data/processed/paidMods.csv',index= False) #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days
freeMods.to_csv('data/processed/freeMods.csv',index= False) #appid  Rating  type  Price  Units_Sold  Total_Revenue  playtime_2weeks playtime_forever  store_time_days

gameLocPriceCount.to_csv('data/processed/gameLocPriceCount.csv',index= False) #Country  Total_Revenue  Revenue_Per_Player
gameLocPriceCountTop.to_csv('data/processed/gameLocPriceCountTop.csv',index= False) # Country  Total_Revenue  Revenue_Per_Player

playerCountriesCount.to_csv('data/processed/playerCountriesCount.csv',index= False) #Country Percentage of Players

socialData.to_csv('data/processed/socialData.csv',index= False) #steamid  daysOld  Number_Friends  Number_Groups  Mult_Play_2Week  Mult_Play_All  NonMult_Play_2Week  NonMult_Play_All 

#games.to_csv('data/processed/games.csv',index= False)

gameWithGenre.to_csv('data/processed/gameWithGenre.csv',index= False) #steamid   appid  playtime_2weeks  playtime_forever   genre

playerProfile.to_csv('data/processed/playerProfile.csv',index= False) #playerProfile # steamid   appid  playtime_forever  Friends  Rating

#############################################################################################################
#############################################################################################################
#############################################################################################################
#############################################################################################################

In [16]:
gc.collect()

60

In [18]:
playerCountriesCount

Unnamed: 0,Country,Percentage of Players
0,US,19.228014
1,RU,11.297254
2,DE,6.510147
3,GB,4.942300
4,BR,4.930362
...,...,...
206,TO,0.003979
207,BJ,0.003979
208,MV,0.003979
209,NU,0.003979
