# Video Game Sales Analysis

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('./vgsales.csv')

# Sales are in millions

## Which company is the most common video game publisher?

In [3]:
df.Publisher.mode()

0    Electronic Arts
dtype: object

## What’s the most common platform?

In [4]:
df.Platform.mode()

0    DS
dtype: object

## What about the most common genre?

In [5]:
df.Genre.mode()

0    Action
dtype: object

## What are the top 20 highest grossing games?

In [6]:
df[['Name','Global_Sales']].nlargest(20,'Global_Sales')

Unnamed: 0,Name,Global_Sales
0,Wii Sports,82.74
1,Super Mario Bros.,40.24
2,Mario Kart Wii,35.82
3,Wii Sports Resort,33.0
4,Pokemon Red/Pokemon Blue,31.37
5,Tetris,30.26
6,New Super Mario Bros.,30.01
7,Wii Play,29.02
8,New Super Mario Bros. Wii,28.62
9,Duck Hunt,28.31


## For North American video game sales, what’s the median?

In [7]:
df.NA_Sales.median()

0.08

## Provide a secondary output showing ‘about’ ten games surrounding the median sales output

In [8]:
criteria = df['NA_Sales'].between(0.0799999, 0.0800001, inclusive=False)

In [9]:
df[criteria]


Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
446,447,Dragon Warrior IV,NES,1990.0,Role-Playing,Enix Corporation,0.08,0.00,3.03,0.01,3.12
497,498,World Soccer Winning Eleven 7 International,PS2,2003.0,Sports,Konami Digital Entertainment,0.08,1.24,1.13,0.45,2.90
1617,1619,Farming Simulator 2015,PC,2014.0,Simulation,Focus Home Interactive,0.08,1.02,0.00,0.13,1.23
1926,1928,Pro Evolution Soccer 2008,X360,2007.0,Sports,Konami Digital Entertainment,0.08,0.90,0.04,0.05,1.07
2067,2069,Winning Eleven: Pro Evolution Soccer 2007 (All...,X360,2006.0,Sports,Konami Digital Entertainment,0.08,0.90,0.02,0.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...
11403,11405,My English Coach: Para Hispanoparlantes,DS,2009.0,Misc,Ubisoft,0.08,0.00,0.00,0.01,0.08
11431,11433,Little League World Series Baseball: Double Play,DS,2010.0,Sports,Activision,0.08,0.00,0.00,0.01,0.08
11432,11434,DanceDanceRevolution,X360,2011.0,Simulation,Konami Digital Entertainment,0.08,0.00,0.00,0.01,0.08
11455,11457,The Hidden,3DS,,Adventure,Unknown,0.08,0.00,0.00,0.00,0.08


## For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?


In [10]:
df[['Name','NA_Sales']].nlargest(20,'NA_Sales').std()

NA_Sales    8.500318
dtype: float64

## The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?


In [11]:
df.groupby('Platform').describe().Global_Sales['mean']

Platform
2600    0.729925
3DO     0.033333
3DS     0.486169
DC      0.307115
DS      0.380254
GB      2.606633
GBA     0.387470
GC      0.358561
GEN     1.050370
GG      0.040000
N64     0.686144
NES     2.561939
NG      0.120000
PC      0.269604
PCFX    0.030000
PS      0.610920
PS2     0.581046
PS3     0.720722
PS4     0.827679
PSP     0.244254
PSV     0.149952
SAT     0.194162
SCD     0.311667
SNES    0.837029
TG16    0.080000
WS      0.236667
Wii     0.699404
WiiU    0.572448
X360    0.774672
XB      0.313422
XOne    0.662254
Name: mean, dtype: float64

## What's the best-selling game in Japan?

In [12]:
df[['Name','JP_Sales']].nlargest(1,'JP_Sales')

Unnamed: 0,Name,JP_Sales
4,Pokemon Red/Pokemon Blue,10.22


## How many games came out each year?

In [13]:
df.groupby('Year').describe().Rank['count']

Year
1980.0       9.0
1981.0      46.0
1982.0      36.0
1983.0      17.0
1984.0      14.0
1985.0      14.0
1986.0      21.0
1987.0      16.0
1988.0      15.0
1989.0      17.0
1990.0      16.0
1991.0      41.0
1992.0      43.0
1993.0      60.0
1994.0     121.0
1995.0     219.0
1996.0     263.0
1997.0     289.0
1998.0     379.0
1999.0     338.0
2000.0     349.0
2001.0     482.0
2002.0     829.0
2003.0     775.0
2004.0     763.0
2005.0     941.0
2006.0    1008.0
2007.0    1202.0
2008.0    1428.0
2009.0    1431.0
2010.0    1259.0
2011.0    1139.0
2012.0     657.0
2013.0     546.0
2014.0     582.0
2015.0     614.0
2016.0     344.0
2017.0       3.0
2020.0       1.0
Name: count, dtype: float64

## What is the least common genre?

In [14]:
df.groupby('Genre').describe().Rank['count'].nsmallest(1)

Genre
Puzzle    582.0
Name: count, dtype: float64