# Waka
## An exploration of the best-selling videogames in history

### 1. Setup workspace

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys

#Append custom libraries from its folder
sys.path.append('libraries')

import shinypanda

vg_sales = pd.read_csv("data/vgsales.csv") 
vg_sales.head()

Unnamed: 0,Rank,Name,basename,Genre,ESRB_Rating,Platform,Publisher,Developer,VGChartz_Score,Critic_Score,...,NA_Sales,PAL_Sales,JP_Sales,Other_Sales,Year,Last_Update,url,status,Vgchartzscore,img_url
0,1,Wii Sports,wii-sports,Sports,E,Wii,Nintendo,Nintendo EAD,,7.7,...,,,,,2006.0,,http://www.vgchartz.com/game/2667/wii-sports/?...,1,,/games/boxart/full_2258645AmericaFrontccc.jpg
1,2,Super Mario Bros.,super-mario-bros,Platform,,NES,Nintendo,Nintendo EAD,,10.0,...,,,,,1985.0,,http://www.vgchartz.com/game/6455/super-mario-...,1,,/games/boxart/8972270ccc.jpg
2,3,Mario Kart Wii,mario-kart-wii,Racing,E,Wii,Nintendo,Nintendo EAD,,8.2,...,,,,,2008.0,11th Apr 18,http://www.vgchartz.com/game/6968/mario-kart-w...,1,8.7,/games/boxart/full_8932480AmericaFrontccc.jpg
3,4,PlayerUnknown's Battlegrounds,playerunknowns-battlegrounds,Shooter,,PC,PUBG Corporation,PUBG Corporation,,,...,,,,,2017.0,13th Nov 18,http://www.vgchartz.com/game/215988/playerunkn...,1,,/games/boxart/full_8052843AmericaFrontccc.jpg
4,5,Wii Sports Resort,wii-sports-resort,Sports,E,Wii,Nintendo,Nintendo EAD,,8.0,...,,,,,2009.0,,http://www.vgchartz.com/game/24656/wii-sports-...,1,8.8,/games/boxart/full_7295041AmericaFrontccc.jpg


### 2. Clean dataframe

#### 2.1 Remove VGChartz meta columns
*Last_Update*, *url*, *status*, *Vgchartzscore* and *img_url*

In [2]:
try:
    vg_sales = vg_sales.drop(["Last_Update", "url", "status", "Vgchartzscore", "img_url"], axis=1)
except:
    print("Columns could not be removed. Probably the cell had been executed before.")
    
vg_sales.head()

Unnamed: 0,Rank,Name,basename,Genre,ESRB_Rating,Platform,Publisher,Developer,VGChartz_Score,Critic_Score,User_Score,Total_Shipped,Global_Sales,NA_Sales,PAL_Sales,JP_Sales,Other_Sales,Year
0,1,Wii Sports,wii-sports,Sports,E,Wii,Nintendo,Nintendo EAD,,7.7,,82.86,,,,,,2006.0
1,2,Super Mario Bros.,super-mario-bros,Platform,,NES,Nintendo,Nintendo EAD,,10.0,,40.24,,,,,,1985.0
2,3,Mario Kart Wii,mario-kart-wii,Racing,E,Wii,Nintendo,Nintendo EAD,,8.2,9.1,37.14,,,,,,2008.0
3,4,PlayerUnknown's Battlegrounds,playerunknowns-battlegrounds,Shooter,,PC,PUBG Corporation,PUBG Corporation,,,,36.6,,,,,,2017.0
4,5,Wii Sports Resort,wii-sports-resort,Sports,E,Wii,Nintendo,Nintendo EAD,,8.0,8.8,33.09,,,,,,2009.0


#### 2.2 Remove columns with a significative percentage of empty values

In [3]:
#By default, 'wipe_empty_columns' remove columns with more than 70% of NaN
vg_sales = shinypanda.wipe_empty_columns(vg_sales)
vg_sales.head()

Unnamed: 0,Rank,Name,basename,Genre,ESRB_Rating,Platform,Publisher,Developer,Global_Sales,Year
0,1,Wii Sports,wii-sports,Sports,E,Wii,Nintendo,Nintendo EAD,,2006.0
1,2,Super Mario Bros.,super-mario-bros,Platform,,NES,Nintendo,Nintendo EAD,,1985.0
2,3,Mario Kart Wii,mario-kart-wii,Racing,E,Wii,Nintendo,Nintendo EAD,,2008.0
3,4,PlayerUnknown's Battlegrounds,playerunknowns-battlegrounds,Shooter,,PC,PUBG Corporation,PUBG Corporation,,2017.0
4,5,Wii Sports Resort,wii-sports-resort,Sports,E,Wii,Nintendo,Nintendo EAD,,2009.0


#### 2.3 Turn years into int

In [6]:
#Fill 'NaN' with 0 to avoid conversion error
vg_sales["Year"] = vg_sales["Year"].fillna(0)

#Convert values to 'int'
vg_sales["Year"] = vg_sales["Year"].astype("int")

#Drop zeros
vg_sales["Year"].replace(0, np.nan, inplace=True)

vg_sales.head()

Unnamed: 0,Rank,Name,basename,Genre,ESRB_Rating,Platform,Publisher,Developer,Global_Sales,Year
0,1,Wii Sports,wii-sports,Sports,E,Wii,Nintendo,Nintendo EAD,,2006.0
1,2,Super Mario Bros.,super-mario-bros,Platform,,NES,Nintendo,Nintendo EAD,,1985.0
2,3,Mario Kart Wii,mario-kart-wii,Racing,E,Wii,Nintendo,Nintendo EAD,,2008.0
3,4,PlayerUnknown's Battlegrounds,playerunknowns-battlegrounds,Shooter,,PC,PUBG Corporation,PUBG Corporation,,2017.0
4,5,Wii Sports Resort,wii-sports-resort,Sports,E,Wii,Nintendo,Nintendo EAD,,2009.0
