In [1]:
# Import Library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [2]:
from plotly.subplots import make_subplots
from plotly import data

from matplotlib.patches import ConnectionPatch


In [3]:
dataset = pd.read_csv("../Dataset/vgsales.csv")
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16287 entries, 0 to 16286
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Rank          16287 non-null  int64  
 1   Name          16287 non-null  object 
 2   Platform      16287 non-null  object 
 3   Year          16287 non-null  float64
 4   Genre         16287 non-null  object 
 5   Publisher     16287 non-null  object 
 6   NA_Sales      16287 non-null  float64
 7   EU_Sales      16287 non-null  float64
 8   JP_Sales      16287 non-null  float64
 9   Other_Sales   16287 non-null  float64
 10  Global_Sales  16287 non-null  float64
dtypes: float64(6), int64(1), object(4)
memory usage: 1.4+ MB


In [4]:
dataset

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16282,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16283,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16284,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16285,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


In [5]:
# Sum sales by region
df_sales = dataset[["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales"]].aggregate("sum").reset_index()
df_sales.columns = ["Region", "Sales"]
df_sales

Unnamed: 0,Region,Sales
0,NA_Sales,4327.38
1,EU_Sales,2406.69
2,JP_Sales,1284.22
3,Other_Sales,788.89


In [6]:
# Show barplot
fig = px.bar(df_sales, x="Region", y="Sales", text_auto=".4s")
fig.update_traces(
    marker_color=px.colors.sequential.algae
)
fig.update_layout(
    title="Total Penjualan Setiap Regional", 
    xaxis_title="", 
    yaxis_title=""
)
fig.show()

In [7]:
# Show barplot
fig = px.pie(df_sales, names="Region", values="Sales", title="Persentase Penjualan Video Games", hole=0.5)
fig.update_traces(pull=[0, 0, 0, 0.2], selector=dict(type='pie'))

fig.show()

In [8]:
df_yearsales_region = dataset.groupby("Year")[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].aggregate("sum").sort_values(by=['Global_Sales'],ascending=False).reset_index()
df_yearsales_region = df_yearsales_region.head(5)

In [9]:
df = df_yearsales_region
df

Unnamed: 0,Year,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,2008.0,351.44,184.4,60.26,82.39,678.9
1,2009.0,338.85,191.59,61.89,74.77,667.3
2,2007.0,311.18,160.18,60.29,77.58,609.92
3,2010.0,304.24,176.57,59.49,59.9,600.29
4,2006.0,263.12,129.24,73.73,54.43,521.04


In [10]:
df = df_yearsales_region

fig = go.Figure(go.Bar(x=df["Year"], y=df["Other_Sales"], name="Others"))

fig.add_trace(go.Bar(x=df["Year"], y=df["NA_Sales"], name="North America")),
fig.add_trace(go.Bar(x=df["Year"], y=df["EU_Sales"], name="Europe")),
fig.add_trace(go.Bar(x=df["Year"], y=df["JP_Sales"], name="Japanese")),


layout=dict(
    barcornerradius=15,
),

fig.update_traces(selector=dict(name="Others"), marker_pattern_shape="/",)
fig.update_traces(selector=dict(name="North America"), marker_pattern_shape="x",)
fig.update_traces(selector=dict(name="Europe"), marker_pattern_shape="+",)
fig.update_traces(selector=dict(name="Japanese"), marker_pattern_shape="|",)

fig.update_layout(
    title="Region Tersering dari angka Penjualan Global", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='stack', xaxis={'categoryorder':'array', 'categoryarray':df["Year"]})
fig.show()

In [11]:
df_region = dataset[["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales"]].aggregate("sum").reset_index()
df_region.columns = ["Region", "Sales"]
df_region

Unnamed: 0,Region,Sales
0,NA_Sales,4327.38
1,EU_Sales,2406.69
2,JP_Sales,1284.22
3,Other_Sales,788.89


In [12]:
df_yearsales_region = dataset.groupby("Year")[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].aggregate("sum").sort_values(by=['Global_Sales'],ascending=False).reset_index()
df_yearsales_region = df_yearsales_region.head(5)
df = df_yearsales_region
df

Unnamed: 0,Year,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,2008.0,351.44,184.4,60.26,82.39,678.9
1,2009.0,338.85,191.59,61.89,74.77,667.3
2,2007.0,311.18,160.18,60.29,77.58,609.92
3,2010.0,304.24,176.57,59.49,59.9,600.29
4,2006.0,263.12,129.24,73.73,54.43,521.04


In [13]:
fig = go.Figure(go.Bar(x=df["Year"], y=df_region["Sales"], name="Others"))
fig.add_trace(go.Bar(x=df["Year"], y=df_region["Sales"], name="North America")),
fig.add_trace(go.Bar(x=df["Year"], y=df_region["Sales"], name="Europe")),
fig.add_trace(go.Bar(x=df["Year"], y=df_region["Sales"], name="Japanese")),


layout=dict(
    barcornerradius=15,
),

fig.update_traces(selector=dict(name="Others"), marker_pattern_shape="/",)
fig.update_traces(selector=dict(name="North America"), marker_pattern_shape="x",)
fig.update_traces(selector=dict(name="Europe"), marker_pattern_shape="+",)
fig.update_traces(selector=dict(name="Japanese"), marker_pattern_shape="|",)

fig.update_layout(
    title="Trafik Penjualan dari 4 Region per Total", 
    xaxis=dict(tickangle=-45, tickfont=dict(size=12, family="Courier New, monospace")),
    yaxis=dict(tickfont=dict(size=12, family="Courier New, monospace")),
    showlegend=True,

    paper_bgcolor='lightgrey',
    plot_bgcolor='rgba(128, 128, 128, 1)',

    xaxis_title="Tahun", 
    yaxis_title="Jumlah Penjualan (jutaan unit)"
    
)

fig.update_layout(barmode='stack', xaxis={'categoryorder':'array', 'categoryarray':df["Year"]})
fig.show()

In [14]:
fig = go.Figure()
fig.add_trace(go.Bar(x=df_yearsales_region.Year, y=df_sales.Sales,
                base=df_sales.Sales,
                marker_color='crimson',
                name='ascend'))
fig.add_trace(go.Bar(x=df_yearsales_region.Year, y=df_sales.Sales,
                base=0,
                marker_color='lightslategrey',
                name='descent'))

fig.show()

In [15]:
df_global_sales = dataset.groupby(by=["Name"])["Global_Sales"].aggregate("sum").sort_values(ascending=False).reset_index()
df_global_sales = df_global_sales.head(5)

In [16]:
# Show barplot
fig = px.bar(df_global_sales, y="Global_Sales", x="Name", text_auto=".4s")
fig.update_traces(
    marker_color=px.colors.sequential.algae
)
fig.update_layout(
    title="5 Urutan Tertinggi Penjualan Game secara Global", 
    xaxis_title="", 
    yaxis_title=""
)
fig.show()

In [17]:
df_name_region = dataset.groupby("Name")[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].aggregate("sum").sort_values(by=['Global_Sales'],ascending=False).reset_index()
df_name_region = df_name_region.head(10)

In [18]:
df = df_name_region
df

Unnamed: 0,Name,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Wii Sports,41.49,29.02,3.77,8.46,82.74
1,Grand Theft Auto V,23.46,23.04,1.39,8.03,55.92
2,Super Mario Bros.,32.48,4.88,6.96,0.99,45.31
3,Tetris,26.17,2.95,6.03,0.69,35.84
4,Mario Kart Wii,15.85,12.88,3.79,3.31,35.82
5,Wii Sports Resort,15.75,11.01,3.28,2.96,33.0
6,Pokemon Red/Pokemon Blue,11.27,8.89,10.22,1.0,31.37
7,Call of Duty: Modern Warfare 3,15.58,11.29,0.62,3.35,30.83
8,New Super Mario Bros.,11.38,9.23,6.5,2.9,30.01
9,Call of Duty: Black Ops II,14.08,11.05,0.72,3.88,29.72


In [19]:
df = df_name_region

fig = make_subplots(rows=1, cols=4, shared_yaxes=True)

fig.add_trace(
    go.Bar(x=df.Name, y=df.NA_Sales, name="North America", marker=dict(cornerradius=30)),
    1, 1
)
fig.add_trace(
    go.Bar(x=df.Name, y=df.EU_Sales, name="Europe", marker=dict(cornerradius=30)),
    1, 2,
)

fig.add_trace(
    go.Bar(x=df.Name, y=df.JP_Sales, name="Japanese", marker=dict(cornerradius="40%")),
    1, 3,
)

fig.add_trace(
    go.Bar(x=df.Name, y=df.Other_Sales, name="Others", marker=dict(cornerradius="40%")),
    1, 4,
)

fig.update_traces(
    marker_color=px.colors.sequential.algae_r
)

fig.update_layout(
    title="10 Game Tersering dari angka Penjualan Global", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='group')
fig.show()

In [20]:
df = df_name_region

fig = go.Figure()
df=[
    fig.add_trace(
        go.Bar(x=df["Name"], y=df["NA_Sales"], name="North America")),
    fig.add_trace(
        go.Bar(x=df["Name"], y=df["EU_Sales"], name="Europe")),
    fig.add_trace(
        go.Bar(x=df["Name"], y=df["JP_Sales"], name="Japanese")),
    fig.add_trace(
        go.Bar(x=df["Name"], y=df["Other_Sales"], name="Others")),
],
layout=dict(
    barcornerradius=15,
),

fig.update_traces(
    marker_color=px.colors.sequential.algae_r
)

fig.update_layout(
    title="10 Game Tersering dari angka Penjualan Global", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='group')
fig.show()

In [21]:
df_platform = dataset.groupby(by=["Platform"])["Global_Sales"].aggregate("sum").sort_values(ascending=False).reset_index()
df_platform = df_platform.head(5)

In [22]:
# Show barplot
fig = px.bar(df_platform, y="Global_Sales", x="Platform", text_auto=".4s")
fig.update_traces(
    marker_color=px.colors.sequential.algae
)
fig.update_layout(
    title="5 Platform Tersering dari angka Penjualan Global", 
    xaxis_title="", 
    yaxis_title=""
)
fig.show()

In [23]:
df_platform_region = dataset.groupby("Platform")[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].aggregate("sum").sort_values(by=['Global_Sales'],ascending=False).reset_index()
df_platform_region = df_platform_region.head(10)

In [24]:
df = df_platform_region
df

Unnamed: 0,Platform,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,PS2,572.92,332.63,137.54,190.47,1233.46
1,X360,594.33,278.0,12.3,84.67,969.6
2,PS3,388.9,340.47,79.21,140.81,949.35
3,Wii,497.37,264.35,68.28,79.2,909.81
4,DS,388.26,194.05,175.02,60.27,818.62
5,PS,334.71,212.38,139.78,40.69,727.39
6,GBA,178.43,72.49,46.56,7.51,305.62
7,PSP,107.09,67.16,75.89,41.52,291.71
8,PS4,96.8,123.7,14.27,43.36,278.07
9,PC,92.04,137.35,0.17,24.33,254.7


In [25]:
df = df_platform_region

fig = make_subplots(rows=1, cols=4, shared_yaxes=True)

fig.add_trace(
    go.Bar(x=df.Platform, y=df.NA_Sales, name="North America", marker=dict(cornerradius=30)),
    1, 1
)
fig.add_trace(
    go.Bar(x=df.Platform, y=df.EU_Sales, name="Europe", marker=dict(cornerradius=30)),
    1, 2,
)

fig.add_trace(
    go.Bar(x=df.Platform, y=df.JP_Sales, name="Japanese", marker=dict(cornerradius="40%")),
    1, 3,
)

fig.add_trace(
    go.Bar(x=df.Platform, y=df.Other_Sales, name="Others", marker=dict(cornerradius="40%")),
    1, 4,
)

fig.update_traces(
    marker_color=px.colors.sequential.algae_r
)

fig.update_layout(
    title="10 Platform Tersering dari angka Penjualan Global", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='group')
fig.show()

In [26]:
df = df_platform_region

fig = go.Figure()
df=[
    fig.add_trace(
        go.Bar(x=df["Platform"], y=df["NA_Sales"], name="North America")),
    fig.add_trace(
        go.Bar(x=df["Platform"], y=df["EU_Sales"], name="Europe")),
    fig.add_trace(
        go.Bar(x=df["Platform"], y=df["JP_Sales"], name="Japanese")),
    fig.add_trace(
        go.Bar(x=df["Platform"], y=df["Other_Sales"], name="Others")),
],
layout=dict(
    barcornerradius=15,
),

fig.update_traces(
    marker_color=px.colors.sequential.algae_r
)

fig.update_layout(
    title="10 Platform Tersering dari angka Penjualan Global", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='group')
fig.show()

In [27]:
df_genre = dataset.groupby(by=["Genre"])["Global_Sales"].aggregate("sum").sort_values(ascending=False).reset_index()
df_genre = df_genre.head(5)

In [28]:
# Show barplot
fig = px.bar(df_genre, y="Global_Sales", x="Genre", text_auto=".4s")
fig.update_traces(
    marker_color=px.colors.sequential.algae
)
fig.update_layout(
    title="5 Genre Game Terpopuler", 
    xaxis_title="", 
    yaxis_title=""
)
fig.show()

In [29]:
df_genre_region = dataset.groupby("Genre")[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].aggregate("sum").sort_values(by=['Global_Sales'],ascending=False).reset_index()
df_genre_region = df_genre_region.head(5)

In [30]:
df = df_genre_region
df

Unnamed: 0,Genre,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Action,861.77,516.48,158.64,184.92,1722.83
1,Sports,670.09,371.34,134.76,132.65,1309.24
2,Shooter,575.16,310.45,38.18,101.9,1026.2
3,Role-Playing,326.5,187.57,350.25,59.38,923.79
4,Platform,445.99,200.65,130.65,51.51,829.13


In [31]:
df = df_genre_region

fig = make_subplots(rows=1, cols=4, shared_yaxes=True)

fig.add_trace(
    go.Bar(x=df.Genre, y=df.NA_Sales, name="North America", marker=dict(cornerradius=30)),
    1, 1
)
fig.add_trace(
    go.Bar(x=df.Genre, y=df.EU_Sales, name="Europe", marker=dict(cornerradius=30)),
    1, 2,
)

fig.add_trace(
    go.Bar(x=df.Genre, y=df.JP_Sales, name="Japanese", marker=dict(cornerradius="40%")),
    1, 3,
)

fig.add_trace(
    go.Bar(x=df.Genre, y=df.Other_Sales, name="Others", marker=dict(cornerradius="40%")),
    1, 4,
)

fig.update_traces(
    marker_color=px.colors.sequential.algae_r
)

fig.update_layout(
    title="5 Genre Game Terpopuler", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='group')
fig.show()

In [32]:
df = df_genre_region

fig = go.Figure()
df=[
    fig.add_trace(
        go.Bar(x=df["Genre"], y=df["NA_Sales"], name="North America")),
    fig.add_trace(
        go.Bar(x=df["Genre"], y=df["EU_Sales"], name="Europe")),
    fig.add_trace(
        go.Bar(x=df["Genre"], y=df["JP_Sales"], name="Japanese")),
    fig.add_trace(
        go.Bar(x=df["Genre"], y=df["Other_Sales"], name="Others")),
],
layout=dict(
    barcornerradius=15,
),

fig.update_traces(
    marker_color=px.colors.sequential.algae_r
)

fig.update_layout(
    title="5 Genre Game Terpopuler", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='group')
fig.show()

In [33]:
df_publisher = dataset.groupby(by=["Publisher"])["Global_Sales"].aggregate("sum").sort_values(ascending=False).reset_index()
df_publisher = df_publisher.head(5)

In [34]:
# Show barplot
fig = px.bar(df_publisher, y="Global_Sales", x="Publisher", text_auto=".4s")
fig.update_traces(
    marker_color=px.colors.sequential.algae
)
fig.update_layout(
    title="5 Publisher Game dengan Penjualan Game Terbanyak secara Global", 
    xaxis_title="", 
    yaxis_title=""
)
fig.show()

In [35]:
df_publisher_region = dataset.groupby("Publisher")[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']].aggregate("sum").sort_values(by=['Global_Sales'],ascending=False).reset_index()
df_publisher_region = df_publisher_region.head(5)

In [36]:
df = df_publisher_region
df

Unnamed: 0,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Nintendo,815.75,418.3,454.99,95.19,1784.43
1,Electronic Arts,584.22,367.38,13.98,127.63,1093.39
2,Activision,426.01,213.72,6.54,74.79,721.41
3,Sony Computer Entertainment,265.22,187.55,74.1,80.4,607.28
4,Ubisoft,252.54,163.03,7.33,50.14,473.25


In [37]:
df = df_publisher_region

fig = make_subplots(rows=1, cols=4, shared_yaxes=True)

fig.add_trace(
    go.Bar(x=df.Publisher, y=df.NA_Sales, name="North America", marker=dict(cornerradius=30)),
    1, 1
)
fig.add_trace(
    go.Bar(x=df.Publisher, y=df.EU_Sales, name="Europe", marker=dict(cornerradius=30)),
    1, 2,
)

fig.add_trace(
    go.Bar(x=df.Publisher, y=df.JP_Sales, name="Japanese", marker=dict(cornerradius="40%")),
    1, 3,
)

fig.add_trace(
    go.Bar(x=df.Publisher, y=df.Other_Sales, name="Others", marker=dict(cornerradius="40%")),
    1, 4,
)

fig.update_traces(
    marker_color=px.colors.sequential.algae_r
)

fig.update_layout(
    title="5 Publisher Game dengan Penjualan Game Terbanyak secara Global", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='group')
fig.show()

In [38]:
df = df_publisher_region

fig = go.Figure()
df=[
    fig.add_trace(
        go.Bar(x=df["Publisher"], y=df["NA_Sales"], name="North America")),
    fig.add_trace(
        go.Bar(x=df["Publisher"], y=df["EU_Sales"], name="Europe")),
    fig.add_trace(
        go.Bar(x=df["Publisher"], y=df["JP_Sales"], name="Japanese")),
    fig.add_trace(
        go.Bar(x=df["Publisher"], y=df["Other_Sales"], name="Others")),
],
layout=dict(
    barcornerradius=15,
),

fig.update_traces(
    marker_color=px.colors.sequential.algae_r
)

fig.update_layout(
    title="5 Publisher Game dengan Penjualan Game Terbanyak secara Global", 
    xaxis_title="", 
    yaxis_title=""
)

fig.update_layout(barmode='group')
fig.show()

In [39]:
# df = dataset.groupby("Platform")[["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales", "Global_Sales"]].aggregate("sum").sort_values(ascending=True).reset_index()
# df = df.sort_values(by=["Global_Sales"]).reset_index().tail(5)

# df = dataset.groupby("Genre")[["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales", "Global_Sales"]].aggregate("sum").sort_values(ascending=True).reset_index()
# df = df.sort_values(by=["Global_Sales"]).reset_index().tail(5)

# df = dataset.groupby("Publisher")[["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales", "Global_Sales"]].aggregate("sum").sort_values(ascending=True).reset_index()
# df = df.sort_values(by=["Global_Sales"]).reset_index().tail(5)

# df = dataset.groupby("Name")[["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales", "Global_Sales"]].aggregate("sum").sort_values(ascending=True).reset_index()
# df = df.sort_values(by=["Global_Sales"]).reset_index().tail(5)

In [40]:
pip show nbformat


Name: nbformat
Version: 5.9.2
Summary: The Jupyter Notebook format
Home-page: https://jupyter.org
Author: 
Author-email: Jupyter Development Team <jupyter@googlegroups.com>
License: BSD 3-Clause License

- Copyright (c) 2001-2015, IPython Development Team
- Copyright (c) 2015-, Jupyter Development Team

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written

In [41]:
import nbformat
print(nbformat.__version__)


5.9.2
