# Plotting

In [1]:
import pandas as pd
import plotly.express as px
from pyprojroot import here

In [2]:
%matplotlib inline

In [3]:
sales_jp = pd.read_csv(here("./clean_data/sales-jp.csv"))

In [4]:
sales_jp.loc[sales_jp["year"] == 1985]

Unnamed: 0,rank,name,genre,esrb_rating,platform,publisher,developer,global_sales,na_sales,pal_sales,jp_sales,other_sales,year,critic_score,user_score
339,347,Excitebike,Racing,,NES,Nintendo,Nintendo R&D1,4.16,,,,,1985,,
361,369,Golf,Sports,,NES,Nintendo,Nintendo,4.01,,,,,1985,,
445,453,Kung Fu,Action,,NES,Nintendo,Irem Software Engineering,3.5,,,,,1985,,
503,511,Baseball,Sports,,NES,Nintendo,Nintendo,3.2,,,,,1985,,
894,904,Tennis,Sports,,NES,Nintendo,Nintendo,2.17,,,,,1985,,
1106,1120,Pinball,Misc,,NES,Nintendo,Nintendo,1.85,,,,,1985,,
1460,1479,Ice Climber,Platform,,NES,Nintendo,Nintendo R&D1,1.5,,,,,1985,,
1715,1738,Gyromite,Puzzle,,NES,Nintendo,Nintendo R&D1,1.32,,,,,1985,,
1795,1821,Hogan's Alley,Shooter,,NES,Nintendo,Intelligent Systems,1.27,,,,,1985,,
2873,2913,Clu Clu Land,Puzzle,,NES,Nintendo,Nintendo R&D1,0.82,0.42,0.1,0.28,0.02,1985,,


In [5]:
sales_jp['year'].value_counts()

2009    1641
2008    1564
2010    1433
2011    1314
2007    1268
2006    1050
2005     990
2002     842
2003     828
2004     811
2017     784
2015     755
2014     753
2016     746
2012     746
2018     714
2013     629
2001     521
2000     425
1999     366
1998     352
1996     299
1997     265
1995     194
1994      99
1993      63
1992      41
1991      34
1990      21
1989      19
1986      19
1988      17
1987      16
1985      13
Name: year, dtype: int64

In [6]:
sales_jp['genre'].value_counts()

Action              2910
Sports              2544
Misc                2201
Adventure           1841
Role-Playing        1782
Shooter             1524
Racing              1414
Simulation          1130
Platform            1105
Fighting             942
Strategy             856
Puzzle               709
Action-Adventure     330
Music                144
Visual Novel         108
MMO                   44
Party                 37
Sandbox                6
Education              3
Board Game             2
Name: genre, dtype: int64

In [7]:
sales_jp['platform'].value_counts()

Nintendo DS             2262
PlayStation 2           2216
PC                      2123
PlayStation 3           1370
Nintendo Wii            1357
PlayStation Portable    1313
Xbox 360                1307
PlayStation             1206
PlayStation 4            922
Game Boy Advance         857
Xbox                     836
PlayStation Vita         644
Nintendo 3DS             621
GameCube                 563
Xbox One                 539
Nintendo 64              322
Nintendo Switch          274
SNES                     241
Sega Saturn              174
Nintendo Wii U           161
NES                       96
Game Boy                  94
Sega Dreamcast            52
Sega Genesis              31
Atari 2600                13
Neo Geo                   12
PlayStation Network       10
Game Boy Color             9
Xbox Live                  7
Name: platform, dtype: int64

## Overview

In [8]:
sales_jp.columns

Index(['rank', 'name', 'genre', 'esrb_rating', 'platform', 'publisher',
       'developer', 'global_sales', 'na_sales', 'pal_sales', 'jp_sales',
       'other_sales', 'year', 'critic_score', 'user_score'],
      dtype='object')

In [36]:
top_10_all_time = sales_jp.loc[sales_jp["global_sales"].isin(sales_jp['global_sales'].nlargest(n=10))]

fig = px.histogram(top_10_all_time, x="name", y="global_sales", color="genre")
fig.update_xaxes(tickangle=45)
fig.show()

In [10]:
top_genres = sales_jp.groupby(['genre']).size().to_frame().sort_values([0], ascending = False).head(15).reset_index()

fig = px.histogram(top_genres, x="genre", y=0)
fig.update_xaxes(tickangle=45)
fig.show()

In [11]:
mean_sales_by_genre = sales_jp.groupby("genre")["global_sales"].mean().reset_index(name = "mean global sales")

fig = px.bar(mean_sales_by_genre, x="genre", y="mean global sales")
fig.update_xaxes(tickangle=45)
fig.show()

In [12]:
sales_jp['genre'].value_counts()

Action              2910
Sports              2544
Misc                2201
Adventure           1841
Role-Playing        1782
Shooter             1524
Racing              1414
Simulation          1130
Platform            1105
Fighting             942
Strategy             856
Puzzle               709
Action-Adventure     330
Music                144
Visual Novel         108
MMO                   44
Party                 37
Sandbox                6
Education              3
Board Game             2
Name: genre, dtype: int64

In [13]:
mean_sales_by_genre

Unnamed: 0,genre,mean global sales
0,Action,0.50356
1,Action-Adventure,0.712424
2,Adventure,0.273628
3,Board Game,0.155
4,Education,0.14
5,Fighting,0.542781
6,MMO,0.899091
7,Misc,0.408287
8,Music,0.431736
9,Party,0.766216


In [35]:
fig = px.histogram(sales_jp, x="year", y="global_sales", color="genre")
fig.update_xaxes(tickangle=45)
fig.show()

In [15]:
sales_year_genre = sales_jp.groupby(['year', 'genre']).agg(
    sales=pd.NamedAgg(column='global_sales', aggfunc=sum)
).reset_index().copy()

fig = px.line(sales_year_genre, x="year", y="sales", color="genre", line_shape="spline")
fig.update_xaxes(tickangle=45)
fig.show()

In [16]:
top_publishers = sales_jp.groupby(['publisher']).size().to_frame().sort_values([0], ascending = False).head(15).reset_index()

fig = px.histogram(top_publishers, x="publisher", y=0)
fig.update_xaxes(tickangle=45)
fig.show()

In [17]:
mean_sales_by_publisher = sales_jp.groupby("publisher")["global_sales"].mean().sort_values(ascending = False).head(15).reset_index(name = "mean global sales")

fig = px.bar(mean_sales_by_publisher, x="publisher", y="mean global sales")
fig.update_xaxes(tickangle=45)
fig.show()

In [18]:
fig = px.histogram(sales_jp, x="platform", y="global_sales")
fig.update_xaxes(tickangle=45)
fig.show()

In [19]:
fig = px.histogram(sales_jp, x="genre", y="global_sales")
fig.update_xaxes(tickangle=45)
fig.show()

In [20]:
fig = px.scatter(sales_jp, x="critic_score", y="global_sales")
fig.show()

In [21]:
fig = px.scatter(sales_jp, x="user_score", y="global_sales")
fig.show()

## PC/PS4/Xbox One

In [22]:
sales_ppx = sales_jp.loc[sales_jp["platform"].isin(["PC", "PlayStation 4", "Xbox One"])]

In [23]:
sales_ppx = sales_ppx.loc[sales_ppx["year"] >= 1990]

In [24]:
sales_ppx

Unnamed: 0,rank,name,genre,esrb_rating,platform,publisher,developer,global_sales,na_sales,pal_sales,jp_sales,other_sales,year,critic_score,user_score
1,4,PlayerUnknown's Battlegrounds,Shooter,,PC,PUBG Corporation,PUBG Corporation,36.60,,,,,2017,,
7,10,Minecraft,Misc,,PC,Mojang,Mojang AB,30.01,,,,,2010,,
16,21,Grand Theft Auto V,Action,M,PlayStation 4,Rockstar Games,Rockstar North,19.39,6.06,9.71,0.60,3.02,2014,97.0,8.3
18,23,Garry's Mod,Misc,,PC,Unknown,Team Garry,18.58,,,,,2004,,
30,35,Call of Duty: Black Ops 3,Shooter,M,PlayStation 4,Activision,Treyarch,15.09,6.18,6.05,0.41,2.44,2015,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19615,19846,Super Meat Boy,Platform,,PlayStation 4,Team Meat,Team Meat,0.01,,0.00,,0.00,2016,,
19617,19848,BC Kings,Strategy,,PC,Strategy First,Mascot Entertainment,0.01,,0.00,,0.00,2008,,
19620,19851,This Is The Police,Simulation,M,Xbox One,THQ Nordic,Weappy Studio,0.01,0.00,,,0.00,2017,,
19627,19858,FirePower for Microsoft Combat Flight Simulator 3,Simulation,T,PC,GMX Media,Shockwave Productions,0.01,,0.00,,0.00,2004,,


In [25]:
top_10_ppx = sales_ppx.loc[sales_ppx["global_sales"].isin(sales_ppx['global_sales'].nlargest(n=10))]

fig = px.histogram(top_10_ppx, x="name", y="global_sales", color="genre", opacity=0.8)
fig.update_xaxes(tickangle=45)
fig.show()

In [26]:
top_genres_ppx = sales_ppx.groupby(['genre']).size().to_frame().sort_values([0], ascending = False).head(15).reset_index()

fig = px.histogram(top_genres_ppx, x="genre", y=0)
fig.update_xaxes(tickangle=45)
fig.show()

In [27]:
mean_sales_by_genre_ppx = sales_ppx.groupby("genre")["global_sales"].mean().reset_index(name = "mean global sales")

fig = px.bar(mean_sales_by_genre_ppx, x="genre", y="mean global sales", template="simple_white")
fig.update_xaxes(tickangle=45)
fig.show()

In [28]:
sales_ppx.loc[sales_ppx["genre"] == "MMO"]

Unnamed: 0,rank,name,genre,esrb_rating,platform,publisher,developer,global_sales,na_sales,pal_sales,jp_sales,other_sales,year,critic_score,user_score
277,283,World of Warcraft: Cataclysm,MMO,T,PC,Blizzard Entertainment,Blizzard Entertainment,4.7,,,,,2010,90.0,5.6
368,376,World of Warcraft: Wrath of the Lich King,MMO,T,PC,Blizzard Entertainment,Blizzard Entertainment,4.0,,,,,2008,91.0,7.5
447,455,World of Warcraft: The Burning Crusade,MMO,T,PC,Blizzard Entertainment,Blizzard Entertainment,3.5,,,,,2007,91.0,7.9
466,474,World of Warcraft: Battle for Azeroth,MMO,,PC,Blizzard Entertainment,Blizzard Entertainment,3.4,,,,,2018,,
487,495,World of Warcraft: Legion,MMO,T,PC,Blizzard Entertainment,Blizzard Entertainment,3.3,,,,,2016,88.0,7.5
488,496,World of Warcraft: Warlords of Draenor,MMO,T,PC,Activision Blizzard,Blizzard Entertainment,3.3,,,,,2014,87.0,5.9
659,667,World of Warcraft: Mists of Pandaria,MMO,T,PC,Blizzard Entertainment,Blizzard Entertainment,2.7,,,,,2012,82.0,4.8
964,976,The Elder Scrolls Online: Tamriel Unlimited,MMO,M,PlayStation 4,Bethesda Softworks,ZeniMax Online Studios,2.05,0.74,0.98,,0.33,2015,,
1362,1380,Final Fantasy XIV Online,MMO,T,PC,Square Enix,Square Enix,1.58,0.93,0.48,,0.16,2010,,
1628,1650,The Elder Scrolls Online: Tamriel Unlimited,MMO,M,Xbox One,Bethesda Softworks,ZeniMax Online Studios,1.38,0.86,0.39,,0.13,2015,,


In [29]:
top_genres_ppx = sales_ppx.loc[sales_ppx["genre"].isin(["Action", "Shooter", "Role-Playing", "Strategy", \
                                                       "Adventure", "Simulation", "Sports", "Racing", \
                                                       "Misc", "Action-Adventure"])]

fig = px.histogram(top_genres_ppx, x="year", y="global_sales", color="genre", template="simple_white", opacity=0.8)
fig.update_xaxes(showgrid=True)
fig.update_yaxes(showgrid=True)
fig.show()

In [30]:
sales_ppx.loc[sales_ppx["year"] == 2004]

Unnamed: 0,rank,name,genre,esrb_rating,platform,publisher,developer,global_sales,na_sales,pal_sales,jp_sales,other_sales,year,critic_score,user_score
18,23,Garry's Mod,Misc,,PC,Unknown,Team Garry,18.58,,,,,2004,,
32,37,Counter-Strike: Source,Shooter,M,PC,VU Games,Valve Software,15.0,,,,,2004,88.0,8.9
82,87,RollerCoaster Tycoon 3,Strategy,E,PC,Atari,Frontier Developments,10.0,,,,,2004,81.0,5.0
99,104,Half-Life 2,Shooter,M,PC,VU Games,Valve Software,8.88,,,,,2004,96.0,9.1
725,734,Far Cry,Shooter,M,PC,Ubisoft,Crytek,2.5,,,,,2004,89.0,8.1
1514,1534,Grim Dawn,Role-Playing,,PC,Unknown,Crate Entertainment,1.46,,,,,2004,,
1739,1763,Who's Your Daddy,Action,,PC,Unknown,Evil Tortilla Games,1.31,,,,,2004,,
1993,2020,World of Warcraft,MMO,T,PC,Blizzard Entertainment,Blizzard Entertainment,1.16,,,,,2004,93.0,7.3
2363,2399,Doom 3,Shooter,M,PC,Activision,id Software,1.0,,,,,2004,87.0,7.6
3991,4055,Football Manager 2005,Sports,,PC,Sega,Sports Interactive,0.57,,,,,2004,,


In [31]:
top_publishers_ppx = sales_ppx.groupby(['publisher']).size().to_frame().sort_values([0], ascending = False).head(15).reset_index()

fig = px.histogram(top_publishers_ppx, x="publisher", y=0)
fig.update_xaxes(tickangle=45)
fig.show()

In [32]:
mean_sales_by_publisher_ppx = sales_ppx.groupby("publisher")["global_sales"].mean().sort_values(ascending = False).head(15).reset_index(name = "mean global sales")

fig = px.bar(mean_sales_by_publisher_ppx, x="publisher", y="mean global sales")
fig.update_xaxes(tickangle=45)
fig.show()

In [33]:
sales_ppx.loc[sales_ppx["publisher"] == "Rockstar Games"]

Unnamed: 0,rank,name,genre,esrb_rating,platform,publisher,developer,global_sales,na_sales,pal_sales,jp_sales,other_sales,year,critic_score,user_score
16,21,Grand Theft Auto V,Action,M,PlayStation 4,Rockstar Games,Rockstar North,19.39,6.06,9.71,0.6,3.02,2014,97.0,8.3
41,46,Red Dead Redemption 2,Action-Adventure,M,PlayStation 4,Rockstar Games,Rockstar Games,13.94,5.26,6.21,0.21,2.26,2018,,
56,61,Grand Theft Auto V,Action,M,PC,Rockstar Games,Rockstar North,12.6,,,,,2015,96.0,7.9
103,108,Grand Theft Auto V,Action,M,Xbox One,Rockstar Games,Rockstar North,8.72,4.7,3.25,0.01,0.76,2014,97.0,7.9
183,189,Red Dead Redemption 2,Action-Adventure,M,Xbox One,Rockstar Games,Rockstar Games,5.77,3.76,1.47,,0.54,2018,,
1191,1205,Max Payne 3,Shooter,M,PC,Rockstar Games,Rockstar Vancouver,1.74,,,,,2012,87.0,7.6
1351,1368,L.A. Noire: The Complete Edition,Adventure,M,PC,Rockstar Games,Rockstar Leeds,1.6,,,,,2011,83.0,7.9
2461,2498,Grand Theft Auto: San Andreas,Action,M,PC,Rockstar Games,Rockstar North,0.97,0.0,0.93,,0.04,2005,93.0,8.9
2729,2768,Grand Theft Auto IV,Action,M,PC,Rockstar Games,Rockstar Toronto,0.87,0.01,0.8,,0.06,2008,90.0,6.6
3775,3836,L.A. Noire,Adventure,M,PlayStation 4,Rockstar Games,Team Bondi,0.6,0.18,0.32,,0.1,2017,,


In [34]:
fig = px.scatter(sales_ppx, x="critic_score", y="global_sales")
fig.show()