In [21]:
# Dependencies and Setup

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [5]:
# Store data as DataFrame

raw_game_data = "Video Games Sales.xlsx"
game_data_df = pd.read_excel(raw_game_data)
game_data_df.head()

Unnamed: 0,Rank,Game Title,Platform,Year,Genre,Publisher,North America,Europe,Japan,Rest of World,Global,Review
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,40.43,28.39,3.77,8.54,81.12,76.28
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,91.0
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,14.5,12.22,3.63,3.21,33.55,82.07
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,14.82,10.51,3.18,3.01,31.52,82.65
4,5,Tetris,GB,1989.0,Puzzle,Nintendo,23.2,2.26,4.22,0.58,30.26,88.0


In [7]:
# Find how many games per publisher

game_qty_df = game_data_df['Publisher'].value_counts()

game_qty_df.head()

Publisher
Electronic Arts                341
Nintendo                       296
Sony Computer Entertainment    156
Activision                     141
Ubisoft                         93
Name: count, dtype: int64

In [8]:
# Find the overall sales
 
sum_by_publisher = game_data_df.groupby('Publisher')[['North America', 'Europe', 'Japan', 'Rest of World', 'Global']].sum()

sum_by_publisher.head()

Unnamed: 0_level_0,North America,Europe,Japan,Rest of World,Global
Publisher,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3DO,1.57,0.46,0.0,0.1,2.13
505 Games,11.22,7.68,0.23,2.23,21.37
989 Studios,6.99,1.98,0.02,0.39,9.39
ASC Games,0.73,0.5,0.0,0.09,1.31
ASCII Entertainment,0.0,0.0,0.9,0.0,0.9


In [9]:
# Create DataFrame with both Sales and Game Qty

merged_df = pd.merge(game_qty_df, sum_by_publisher, on="Publisher")

merged_df.head()

Unnamed: 0_level_0,count,North America,Europe,Japan,Rest of World,Global
Publisher,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Electronic Arts,341,352.35,204.0,8.73,68.36,633.36
Nintendo,296,688.47,341.8,338.04,80.58,1448.84
Sony Computer Entertainment,156,167.59,119.19,50.27,40.51,377.61
Activision,141,219.21,113.52,3.66,35.07,371.42
Ubisoft,93,103.23,69.61,1.79,21.69,196.32


In [10]:
# Caluculate Rankings

merged_df["North America Ave/Game"] = merged_df['North America'] / merged_df['count']
merged_df["Europe Ave/Game"] = merged_df['Europe'] / merged_df['count']
merged_df["Japan Ave/Game"] = merged_df['Japan'] / merged_df['count']
merged_df["Rest of World Ave/Game"] = merged_df['Rest of World'] / merged_df['count']
merged_df["Global Ave/Game"] = merged_df['Global'] / merged_df['count']

merged_df.to_csv("./merged.csv")
merged_df = pd.read_csv("./merged.csv")
merged_df.head()



Unnamed: 0,Publisher,count,North America,Europe,Japan,Rest of World,Global,North America Ave/Game,Europe Ave/Game,Japan Ave/Game,Rest of World Ave/Game,Global Ave/Game
0,Electronic Arts,341,352.35,204.0,8.73,68.36,633.36,1.033284,0.59824,0.025601,0.200469,1.857361
1,Nintendo,296,688.47,341.8,338.04,80.58,1448.84,2.325912,1.15473,1.142027,0.27223,4.89473
2,Sony Computer Entertainment,156,167.59,119.19,50.27,40.51,377.61,1.074295,0.764038,0.322244,0.259679,2.420577
3,Activision,141,219.21,113.52,3.66,35.07,371.42,1.554681,0.805106,0.025957,0.248723,2.634184
4,Ubisoft,93,103.23,69.61,1.79,21.69,196.32,1.11,0.748495,0.019247,0.233226,2.110968


In [26]:
# North American Data

NA_data = merged_df[["Publisher", "North America Ave/Game"]]
NA_data2 = NA_data.sort_values(by=['North America Ave/Game'], ascending=False)
NA_data3 = NA_data2.reset_index()
NA_data4 = NA_data3.drop(["index"], axis=1)


print(f"The best sales per game is {NA_data4.loc[0]['Publisher']}")
print(f"The worst sales per game is {NA_data4['Publisher'].iloc[-1]}")
NA_data4.head()



The best sales per game is Red Orb
The worst sales per game is Pinnacle


Unnamed: 0,Publisher,North America Ave/Game
0,Red Orb,3.215
1,Havas Interactive,2.78
2,Microsoft Game Studios,2.354894
3,Nintendo,2.325912
4,RedOctane,2.133333


In [29]:
# Europe Data

Europe_data = merged_df[["Publisher", "Europe Ave/Game"]]
Europe_data2 = Europe_data.sort_values(by=['Europe Ave/Game'], ascending=False)
Europe_data3 = Europe_data2.reset_index()
Europe_data4 = Europe_data3.drop(["index"], axis=1)


print(f"The best sales per game is {Europe_data4.loc[0]['Publisher']}")
print(f"The worst sales per game is {Europe_data4['Publisher'].iloc[-1]}")
Europe_data4.head()

The best sales per game is Red Orb
The worst sales per game is ASCII Entertainment


Unnamed: 0,Publisher,Europe Ave/Game
0,Red Orb,1.805
1,505 Games,1.28
2,Nintendo,1.15473
3,id Software,1.12
4,Russel,1.11


In [31]:
# Rest of World Data

ROW_data = merged_df[["Publisher", "Rest of World Ave/Game"]]
ROW_data2 = ROW_data.sort_values(by=['Rest of World Ave/Game'], ascending=False)
ROW_data3 = ROW_data2.reset_index()
ROW_data4 = ROW_data3.drop(["index"], axis=1)


print(f"The best sales per game is {ROW_data4.loc[0]['Publisher']}")
print(f"The worst sales per game is {ROW_data4['Publisher'].iloc[-1]}")
ROW_data4.head()

The best sales per game is 505 Games
The worst sales per game is Westwood Studios


Unnamed: 0,Publisher,Rest of World Ave/Game
0,505 Games,0.371667
1,Universal Interactive,0.3275
2,Take-Two Interactive,0.326667
3,RedOctane,0.3
4,Microsoft Game Studios,0.284468


In [32]:
# Japan Data

Japan_data = merged_df[["Publisher", "Japan Ave/Game"]]
Japan_data2 = Japan_data.sort_values(by=['Japan Ave/Game'], ascending=False)
Japan_data3 = Japan_data2.reset_index()
Japan_data4 = Japan_data3.drop(["index"], axis=1)


print(f"The best sales per game is {Japan_data4.loc[0]['Publisher']}")
print(f"The worst sales per game is {Japan_data4['Publisher'].iloc[-1]}")
Japan_data4.head()

The best sales per game is Enix Corporation
The worst sales per game is Pinnacle


Unnamed: 0,Publisher,Japan Ave/Game
0,Enix Corporation,2.5
1,Banpresto,1.69
2,Square,1.54875
3,Nintendo,1.142027
4,Sammy Corporation,1.0


In [33]:
# Global Data

Global_data = merged_df[["Publisher", "Global Ave/Game"]]
Global_data2 = Global_data.sort_values(by=['Global Ave/Game'], ascending=False)
Global_data3 = Global_data2.reset_index()
Global_data4 = Global_data3.drop(["index"], axis=1)


print(f"The best sales per game is {Global_data4.loc[0]['Publisher']}")
print(f"The worst sales per game is {Global_data4['Publisher'].iloc[-1]}")
Global_data4.head()

The best sales per game is Red Orb
The worst sales per game is Pinnacle


Unnamed: 0,Publisher,Global Ave/Game
0,Red Orb,5.23
1,Nintendo,4.89473
2,Microsoft Game Studios,3.611277
3,505 Games,3.561667
4,RedOctane,2.87
