# Data Analysis with Pandas

## Sales data from more than 16,500 games

> Mohammad Ali Khalil

- 15/11/2021

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import pandas as pd

In [2]:
df = pd.read_csv('./assets/vgsales.csv')
### here is data for Sales from more than 16,500 games

In [3]:
df.tail(10)
df.keys() #list of the keys

Index(['Rank', 'Name', 'Platform', 'Year', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'],
      dtype='object')

# 1- Which company is the most common video game publisher?

In [4]:
most_common_publisher = df.Publisher.mode()[0]
most_common_publisher

'Electronic Arts'

# 2- What's the most common platform?

In [5]:
most_common_platform = df.Platform.mode()[0]
most_common_platform

'DS'

# 3- What about the most common genre?


In [6]:
most_common_genre = df.Genre.mode()[0]
most_common_genre

'Action'

# 4- What are the top 20 highest grossing games?

In [7]:
top_twenty_highest_grossing_games = df.head(20)[['Name','Global_Sales']]
top_twenty_highest_grossing_games

Unnamed: 0,Name,Global_Sales
0,Wii Sports,82.74
1,Super Mario Bros.,40.24
2,Mario Kart Wii,35.82
3,Wii Sports Resort,33.0
4,Pokemon Red/Pokemon Blue,31.37
5,Tetris,30.26
6,New Super Mario Bros.,30.01
7,Wii Play,29.02
8,New Super Mario Bros. Wii,28.62
9,Duck Hunt,28.31


# 5- For , what's the median? 10games

In [8]:
na_median_sales = df['NA_Sales'].median()
na_median_sales

0.08

In [9]:
ten_median_Games = df[df['NA_Sales'] == df['NA_Sales'].median()][225:235].sort_values(by=['Rank'],ascending=[False])
ten_median_Games[['Name','NA_Sales','Global_Sales']]['Name']

10029              Tony Hawk's Pro Skater 5
10026                          NBA Jam 2002
10024    Family Game Night 4: The Game Show
10022                 LEGO Knights' Kingdom
10020              Phantasy Star Collection
10012                     Nickelodeon Dance
10000                        A Witch's Tale
9996             GT Advance 2: Rally Racing
9957                               Deadpool
9934                       Turok: Evolution
Name: Name, dtype: object

# 6- For the top-selling game of all time, how many standard deviations above/below the mean are its sales for North America?

In [10]:
mean_deviations= ((df['NA_Sales'].iloc[0]-df['NA_Sales'].mean())/df['NA_Sales'].std())
mean_deviations

50.47898767479108

# 7- The Nintendo Wii seems to have outdone itself with games. How does its average number of sales compare with all of the other platforms?

In [11]:
data = (df.groupby(df['Platform'] == 'Wii')['Global_Sales'].mean()[0],df.groupby(df['Platform'] == 'Wii')['Global_Sales'].mean()[1])
Not_Wii = f'Not Wii {data[0]}'
Wii = f'Wii {data[1]}'
(Not_Wii,Wii)

('Not Wii 0.5233896418516336', 'Wii 0.6994037735849057')

In [12]:
def test():

    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"

    assert_equal(most_common_publisher, 'Electronic Arts')
    assert_equal(most_common_platform, 'DS')
    assert_equal(most_common_genre, 'Action')
    assert_equal(top_twenty_highest_grossing_games.iloc[0].Name, 'Wii Sports')
    assert_equal(top_twenty_highest_grossing_games.iloc[19].Name, 'Brain Age: Train Your Brain in Minutes a Day')
    assert_equal(na_median_sales, 0.08)
    assert_equal(Not_Wii, 'Not Wii 0.5233896418516336')
    assert_equal(Wii, 'Wii 0.6994037735849057')

    print("Success!!! 🟢")

test()

Success!!! 🟢
