# Data Analysis with Pandas - Video Game Sales dataset - Trad Almelhem 15-11-2021

In [1]:
import pandas as pd
df = pd.read_csv('vgsales.csv')


In [2]:
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


## Which company is the most common video game publisher?

In [3]:
def most_common_publisher():
    return df['Publisher'].value_counts().sort_values(ascending=False).index[0]
most_common_publisher()

'Electronic Arts'

## What’s the most common platform?

In [4]:
def most_common_platform():
    return df['Platform'].mode()[0]
most_common_platform()

'DS'

## What about the most common genre?

In [5]:
def most_common_genre():
    return df['Genre'].mode()[0]
most_common_genre()

'Action'

## What are the top 20 highest grossing games?

In [6]:
def top_twenty_grossing_games():
    return df.sort_values(by='Global_Sales',ascending=False).head(20)[['Name','Global_Sales']]
top_twenty_grossing_games()

Unnamed: 0,Name,Global_Sales
0,Wii Sports,82.74
1,Super Mario Bros.,40.24
2,Mario Kart Wii,35.82
3,Wii Sports Resort,33.0
4,Pokemon Red/Pokemon Blue,31.37
5,Tetris,30.26
6,New Super Mario Bros.,30.01
7,Wii Play,29.02
8,New Super Mario Bros. Wii,28.62
9,Duck Hunt,28.31


## For North American video game sales, what’s the median?

In [7]:
def NA_median():
    return df['NA_Sales'].median()
NA_median()

0.08

## Provide a secondary output showing ten games surrounding the median sales output - assume that games with same median value are sorted in descending order

In [8]:
def ten_games():
    return df[df['NA_Sales'] == df['NA_Sales'].median()].sample(10)
ten_games()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
11316,11318,Major League Baseball 2K12,DS,2012.0,Sports,Take-Two Interactive,0.08,0.0,0.0,0.01,0.08
11085,11087,Remington Super Slam Hunting: Alaska,Wii,2011.0,Sports,Mastiff,0.08,0.0,0.0,0.01,0.09
10432,10434,UFC: Tapout 2,XB,2003.0,Fighting,Capcom,0.08,0.02,0.0,0.0,0.1
9124,9126,Destruction Derby Raw,PS,2000.0,Racing,Sony Computer Entertainment,0.08,0.05,0.0,0.01,0.14
10287,10289,The Polar Express,GC,2004.0,Adventure,THQ,0.08,0.02,0.0,0.0,0.11
10730,10732,Harvest Moon: Frantic Farming,DS,2009.0,Puzzle,Rising Star Games,0.08,0.01,0.0,0.01,0.1
10616,10618,SNK vs. Capcom: SVC Chaos,XB,2004.0,Fighting,Ignition Entertainment,0.08,0.02,0.0,0.0,0.1
10580,10582,World Series of Poker 2008: Battle for the Bra...,PS3,2007.0,Misc,Activision,0.08,0.01,0.0,0.01,0.1
10574,10576,Conflict: Global Terror,XB,2005.0,Shooter,SCi,0.08,0.02,0.0,0.0,0.1
10181,10183,Smash Court Tennis 3,X360,2007.0,Sports,Atari,0.08,0.02,0.0,0.01,0.11


## For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [9]:
def z_score():
    high_score = df[['Name', 'NA_Sales']].sort_values('NA_Sales', ascending=False)['NA_Sales'][0]
    standard_deviation = df.NA_Sales.std()
    mean = df.NA_Sales.mean()
    return ((high_score - mean)/standard_deviation)

z_score()

50.47898767479108

## The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [10]:
Wii = df[df['Platform'] == 'Wii' ]['Global_Sales'].mean()
print('Wii',Wii)

not_Wii = df[df['Platform'] != 'Wii' ]['Global_Sales'].mean()
print('Not Wii',not_Wii)

Wii 0.6994037735849057
Not Wii 0.5233896418516336


# Come up with 3 more questions that can be answered with this data set.


## Show the least common 10 video games according to the Global sales

In [11]:
def least_common_game():
    return df[['Name','Global_Sales']].sort_values(by='Global_Sales',ascending=False).head(10)['Name']
least_common_game()

0                   Wii Sports
1            Super Mario Bros.
2               Mario Kart Wii
3            Wii Sports Resort
4     Pokemon Red/Pokemon Blue
5                       Tetris
6        New Super Mario Bros.
7                     Wii Play
8    New Super Mario Bros. Wii
9                    Duck Hunt
Name: Name, dtype: object

## Show every unsold video game according to north america sales?

In [12]:
def Unsold_game_in_NA():
    return df[['Name','NA_Sales']].loc[df.NA_Sales==0]
Unsold_game_in_NA()

Unnamed: 0,Name,NA_Sales
214,Monster Hunter Freedom 3,0.0
338,Friend Collection,0.0
383,Monster Hunter 4,0.0
402,English Training: Have Fun Improving Your Skills!,0.0
426,Dragon Quest VI: Maboroshi no Daichi,0.0
...,...,...
16587,Mezase!! Tsuri Master DS,0.0
16589,Chou Ezaru wa Akai Hana: Koi wa Tsuki ni Shiru...,0.0
16590,Eiyuu Densetsu: Sora no Kiseki Material Collec...,0.0
16595,SCORE International Baja 1000: The Official Game,0.0


## What is the year that have the biggest number of video games released?

In [13]:
def most_common_year():
    return df['Year'].mode()[0]
most_common_year()

2009.0

## Test

In [14]:
def test():
    
    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"

    assert_equal(most_common_publisher(), "Electronic Arts")
    assert_equal(most_common_platform(), "DS")
    assert_equal(most_common_genre(), "Action")
    assert_equal(top_twenty_grossing_games().iloc[0].Name, "Wii Sports")
    assert_equal(top_twenty_grossing_games().iloc[19].Name, "Brain Age: Train Your Brain in Minutes a Day")
    assert_equal(NA_median(), 0.08)
    # assert_equal(Ten_values_surrounding_median().iloc[0].Name, "Bladestorm: The Hundred Years' War")
    assert_equal(z_score(), 50.47898767479108)
    assert_equal(least_common_game().iloc[0], "Wii Sports")
    assert_equal(most_common_year(), 2009.0)
    assert_equal(Unsold_game_in_NA().iloc[0].Name, "Monster Hunter Freedom 3")
    print("Success!!!")

test()

AssertionError: Expected Spirits & Spells but got Wii Sports