#  Data Analysis with Pandas
## Video Game Sales
## By Sara Zwairi
## 9th of aug 2021

In [295]:
import pandas as pd

In [296]:
df = pd.read_csv("vg-stats/vgsales.csv")
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


### Which company is the most common video game publisher?


In [297]:
most_common_publisher=df["Publisher"].value_counts().idxmax()
most_common_publisher

'Electronic Arts'

### What’s the most common platform?

In [298]:
most_common_platform=df["Platform"].value_counts().idxmax()
most_common_platform

'DS'

### What about the most common genre?


In [299]:
most_common_genre=df["Genre"].value_counts().idxmax()
most_common_genre

'Action'

### What are the top 20 highest grossing games?

In [300]:
top_twenty_highest_grossing_games=df[["Name","Global_Sales"]].sort_values("Global_Sales",ascending=False)
top_twenty_highest_grossing_games.head(20)

Unnamed: 0,Name,Global_Sales
0,Wii Sports,82.74
1,Super Mario Bros.,40.24
2,Mario Kart Wii,35.82
3,Wii Sports Resort,33.0
4,Pokemon Red/Pokemon Blue,31.37
5,Tetris,30.26
6,New Super Mario Bros.,30.01
7,Wii Play,29.02
8,New Super Mario Bros. Wii,28.62
9,Duck Hunt,28.31


### For North American video game sales, what’s the median? 

In [301]:
na_median_sales=df["NA_Sales"].median()
na_median_sales

0.08

### * Provide a secondary output showing ten games surrounding the median sales output

### * assume that games with same median value are sorted in descending order


In [302]:
north_american_ten_vg_sales_median=df[df["NA_Sales"]==na_median_sales]
north_american_ten_vg_sales_median[["Rank","Name","Platform"]].sort_values("Rank",ascending=False).head(10)

Unnamed: 0,Rank,Name,Platform
11492,11494,Ultimate Shooting Collection,Wii
11455,11457,The Hidden,3DS
11432,11434,DanceDanceRevolution,X360
11431,11433,Little League World Series Baseball: Double Play,DS
11403,11405,My English Coach: Para Hispanoparlantes,DS
11390,11392,Super Robot Taisen OG Saga: Endless Frontier,DS
11386,11388,Sushi Academy,DS
11376,11378,Face Racers: Photo Finish,3DS
11375,11377,Dream Day: Wedding Destinations,DS
11371,11373,Death Jr. and the Science Fair of Doom,DS


### For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [303]:
standard_deviations=(df["NA_Sales"].head(1)-df["NA_Sales"].mean())/df["NA_Sales"].std()
standard_deviations

0    50.478988
Name: NA_Sales, dtype: float64

In [304]:
# above=df[df["NA_Sales"]>standard_deviations][["Name","NA_Sales"]]
# above

ValueError: Can only compare identically-labeled Series objects

In [None]:
# bellow=df[df["NA_Sales"]<standard_deviations][["Name","NA_Sales"]]
# bellow

### The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [None]:
#wii avg
wii_avg_global_sales=df[df["Platform"]=="Wii"]["Global_Sales"].mean()
print(wii_avg_global_sales)

#other platforms
others_avg_global_sales_without_wii=df[df["Platform"]!="Wii"]["Global_Sales"].mean()
others_avg_global_sales_without_wii

### Come up with 3 more questions that can be answered with this data set.

### year of highest global sales of vg

In [None]:
highest_year =df.groupby(["Year"])["Global_Sales"].sum().idxmax()
highest_year

### how many was the higest global sales?

In [None]:
highest_sales = df[df["Year"]==highest_year]["Global_Sales"].sum()
highest_sales

### sports vg only

In [None]:
sports_vg=df[df["Genre"]=="Sports"].head(10)
sports_vg

In [None]:
def test():

    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"

    assert_equal(most_common_publisher, "Electronic Arts")
    assert_equal(most_common_platform, "DS")
    assert_equal(most_common_genre, "Action")
    assert_equal(top_twenty_highest_grossing_games.iloc[0].Name, "Wii Sports")
    assert_equal(top_twenty_highest_grossing_games.iloc[19].Name, "Brain Age: Train Your Brain in Minutes a Day")
    assert_equal(na_median_sales, 0.08)

    print("Success!!!")

test()