# Data Analysis With Pandas

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./vgsales.csv')

In [3]:
df.head(10)

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37
5,6,Tetris,GB,1989.0,Puzzle,Nintendo,23.2,2.26,4.22,0.58,30.26
6,7,New Super Mario Bros.,DS,2006.0,Platform,Nintendo,11.38,9.23,6.5,2.9,30.01
7,8,Wii Play,Wii,2006.0,Misc,Nintendo,14.03,9.2,2.93,2.85,29.02
8,9,New Super Mario Bros. Wii,Wii,2009.0,Platform,Nintendo,14.59,7.06,4.7,2.26,28.62
9,10,Duck Hunt,NES,1984.0,Shooter,Nintendo,26.93,0.63,0.28,0.47,28.31


### Which company is the most common video game publisher?

In [80]:
common_publisher = df['Publisher'].mode()
common_publisher
most_common_publisher = common_publisher[0]
most_common_publisher



'Electronic Arts'

### What’s the most common platform?

In [29]:
com_plat = df['Platform'].mode()
com_plat
most_common_platform = com_plat[0]
most_common_platform

'DS'

### What about the most common genre?

In [31]:
common_genre = df['Genre'].mode()
common_genre
most_common_genre = common_genre[0]
most_common_genre

'Action'

### What are the top 20 highest grossing games?

In [32]:
twenty_highest_grossing_games = df[['Name', 'Rank']].sort_values('Rank').head(20)
twenty_highest_grossing_games
top_twenty_highest_grossing_games = twenty_highest_grossing_games.Name
top_twenty_highest_grossing_games

0                                       Wii Sports
1                                Super Mario Bros.
2                                   Mario Kart Wii
3                                Wii Sports Resort
4                         Pokemon Red/Pokemon Blue
5                                           Tetris
6                            New Super Mario Bros.
7                                         Wii Play
8                        New Super Mario Bros. Wii
9                                        Duck Hunt
10                                      Nintendogs
11                                   Mario Kart DS
12                     Pokemon Gold/Pokemon Silver
13                                         Wii Fit
14                                    Wii Fit Plus
15                              Kinect Adventures!
16                              Grand Theft Auto V
17                   Grand Theft Auto: San Andreas
18                               Super Mario World
19    Brain Age: Train Your Bra

### For North American video game sales, what’s the median?

In [8]:
na_median_sales = df['NA_Sales'].median()
na_median_sales

0.08

### Provide a secondary output showing ten games surrounding the median sales output

In [35]:
median_na_seller_names = df[df['NA_Sales'] == 0.08][['Name', 'NA_Sales', 'Rank']].sort_values('Rank', ascending = False)
median_na_seller_names
ten_median_na_seller_names = median_na_seller_names.NA_Sales[497]
ten_median_na_seller_names

0.08

### For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [10]:
stan_dev = df[df['Rank'] == 1].mean()
stan_dev

Rank               1.00
Year            2006.00
NA_Sales          41.49
EU_Sales          29.02
JP_Sales           3.77
Other_Sales        8.46
Global_Sales      82.74
dtype: float64

In [11]:
na_std = df['NA_Sales'].std()
na_std




0.8166830292988796

In [12]:
na_sales_mean = df['NA_Sales'].mean()
na_sales_mean

0.26466742981082064

In [13]:
number_one = df[df['Rank'] == 1]['NA_Sales']
number_one

0    41.49
Name: NA_Sales, dtype: float64

In [14]:
best_na_std = (number_one - na_sales_mean) / na_std
best_na_std

0    50.478988
Name: NA_Sales, dtype: float64

### The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [15]:
wii_mean = df[df['Platform'] == "Wii" ]["Global_Sales"].mean().round(2)
wii_mean

0.7

In [16]:
all_plats = df["Global_Sales"].mean().round(2)
all_plats

0.54

- The Wii is above average when it comes to all other platforms global sales.

### What is the least popular video game?

In [78]:
worst_game = df['Name'].tail(1)
worst_game
name_worst = worst_game[16597]
name_worst
# answer = name_worst[1]
# answer

'Spirits & Spells'

### What platform is that game on?

In [18]:
df[['Name', 'Platform']].tail(1)

Unnamed: 0,Name,Platform
16597,Spirits & Spells,GBA


### What are the 10 most grossing games?

In [19]:
high_gross = df[['Name','Global_Sales']].head(10)
high_gross

Unnamed: 0,Name,Global_Sales
0,Wii Sports,82.74
1,Super Mario Bros.,40.24
2,Mario Kart Wii,35.82
3,Wii Sports Resort,33.0
4,Pokemon Red/Pokemon Blue,31.37
5,Tetris,30.26
6,New Super Mario Bros.,30.01
7,Wii Play,29.02
8,New Super Mario Bros. Wii,28.62
9,Duck Hunt,28.31


In [81]:
def test():

    # assert most_common_publisher == 'Electronic Arts'
    # print('Success')
    def assert_equal(actual,expected):
        assert actual == expected

    assert_equal(most_common_publisher, 'Electronic Arts')
    assert_equal(most_common_platform, 'DS')
    assert_equal(most_common_genre, 'Action')
    assert_equal(top_twenty_highest_grossing_games[0], 'Wii Sports')
    assert_equal(top_twenty_highest_grossing_games[19], 'Brain Age: Train Your Brain in Minutes a Day')
    assert_equal(na_median_sales, 0.08)
    assert_equal(ten_median_na_seller_names, 0.08)
    assert_equal(name_worst, 'Spirits & Spells')

    print("Success!!!")

test()
# Configuration

Success!!!
