# Video Game Data Analysis

## Joyce Liao. Dec 17, 2018

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
df = pd.read_csv('./vgsales.csv')

In [3]:
df = df[df.Global_Sales > 0.1]

In [4]:
# df.head(10)

## The most common video game publisher

In [5]:
df.Publisher.value_counts().head(1)

Electronic Arts    1161
Name: Publisher, dtype: int64

In [6]:
# METHOD 2:

In [7]:
most_common_publisher = df.Publisher.mode().get(0)
most_common_publisher

'Electronic Arts'

## The most common platform

In [8]:
df.Platform.value_counts().head(1)

PS2    1520
Name: Platform, dtype: int64

In [9]:
# METHOD 2:

In [10]:
most_common_platform = df.Platform.mode().get(0)
most_common_platform

'PS2'

## The most common genre

In [11]:
df.Genre.value_counts().head(1)

Action    2201
Name: Genre, dtype: int64

In [12]:
# METHOD 2:

In [13]:
popular_genre = df.Global_Sales.groupby(df.Genre)
series = popular_genre.count()
series.sort_values(ascending=False).head(1)

Genre
Action    2201
Name: Global_Sales, dtype: int64

## The top 20 highest grossing games

In [14]:
df[['Global_Sales', 'Name']].sort_values(by=['Global_Sales'], ascending=False).head(20)

Unnamed: 0,Global_Sales,Name
0,82.74,Wii Sports
1,40.24,Super Mario Bros.
2,35.82,Mario Kart Wii
3,33.0,Wii Sports Resort
4,31.37,Pokemon Red/Pokemon Blue
5,30.26,Tetris
6,30.01,New Super Mario Bros.
7,29.02,Wii Play
8,28.62,New Super Mario Bros. Wii
9,28.31,Duck Hunt


## North American video game sales median

In [15]:
# na_sales_median = df.NA_Sales.median()
# na_sales_median
# print(f'NA median sales is ${na_sales_median}')

In [16]:
# METHOD 2:

In [17]:
na_sales = df[df.NA_Sales > 0 ]
median = na_sales.NA_Sales.median()
median


0.22

## NA_Sales median, 10 games:

In [18]:
by_na_sales = df.sort_values(by=['NA_Sales'])
by_na_sales[by_na_sales.NA_Sales == median].head(10)


Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
6665,6667,Spider-Man: Web of Shadows,Wii,2008.0,Action,Activision,0.22,0.01,0.0,0.02,0.25
6009,6011,Area 51,XB,2005.0,Shooter,Midway Games,0.22,0.06,0.0,0.01,0.29
4110,4112,Crash: Mind Over Mutant,X360,2008.0,Platform,Vivendi Games,0.22,0.21,0.0,0.05,0.48
2421,2423,Dragon Ball Z: Burst Limit,PS3,2008.0,Fighting,Atari,0.22,0.34,0.18,0.12,0.86
3232,3234,Wet,PS3,2009.0,Shooter,Bethesda Softworks,0.22,0.27,0.01,0.11,0.63
6027,6029,The New Tetris,N64,1999.0,Puzzle,Nintendo,0.22,0.05,0.02,0.0,0.29
6658,6660,NHL 08,X360,2007.0,Action,Electronic Arts,0.22,0.01,0.0,0.02,0.25
6010,6012,The Smurfs: Dance Party,Wii,2011.0,Misc,Ubisoft,0.22,0.04,0.0,0.02,0.29
4099,4101,Dark Sector,X360,2008.0,Shooter,D3Publisher,0.22,0.2,0.01,0.05,0.48
6195,6197,"The Chronicles of Narnia: The Lion, The Witch ...",GC,,Action,Disney Interactive Studios,0.22,0.06,0.0,0.01,0.28


## Standard Deviation

In [19]:
games_na_sales = df
top_selling_all_time = df.head(1)
top_selling_all_time_na_sales = float(top_selling_all_time.NA_Sales)
mean_na_sales = games_na_sales.mean().NA_Sales
std_dev = games_na_sales.std().NA_Sales

std_dev

1.0029493033681236

## Nintendo Wii & Other Games On the Same Platform

In [20]:
# CREATE A NEW COLUMN 'WII' THAT HAS VALUE 0 AND 1
df['WII'] = df.Platform.apply(lambda x: 1 if x == 'Wii' else 0)

# CREATE TWO LISTS: LIST_WII AND LIST_NOT_WII
list_wii = df[df.WII == 1]
list_not_wii = df[df.WII == 0]

#GROUP ROWS INTO WII AND NON-WII
wii = df.WII.groupby(df.WII)
series = wii.count()

# OBTAIN AVERAGE FOR LIST_WII AND LIST_NOT_WII
wii_avg = round(list_wii.Global_Sales.mean(), 2)
not_wii_avg = round(list_not_wii.Global_Sales.mean(), 2)
not_wii_avg

print(f'The average sale for Wii is {wii_avg}; the average sale for non-Wii is {not_wii_avg}.')



The average sale for Wii is 0.99; the average sale for non-Wii is 0.81.


## Additional Question (1): What's the top five sales for Wii:

In [21]:
wii_top_five = list_wii.sort_values(by=['Global_Sales']).tail(5)
wii_top_five

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,WII
8,9,New Super Mario Bros. Wii,Wii,2009.0,Platform,Nintendo,14.59,7.06,4.7,2.26,28.62,1
7,8,Wii Play,Wii,2006.0,Misc,Nintendo,14.03,9.2,2.93,2.85,29.02,1
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0,1
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82,1
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74,1


## Additional Question (2): What's the avg sale of Wii's top five games 

In [22]:
wii_top_five_mean = round(wii_top_five.Global_Sales.mean(), 2)
wii_top_five_mean

41.84

## Additional Question (3): What's the avg sale of Non-Wii's top five games

In [23]:
not_wii_top_five = list_not_wii.sort_values(by=['Global_Sales']).tail(5)
not_wii_top_five_mean = round(not_wii_top_five.Global_Sales.mean(), 2)
not_wii_top_five_mean

32.04