In [1]:
import pandas as pd
df = pd.read_csv('vgsales.csv')
print(df.shape)
df.head(10)

(16598, 11)


Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37
5,6,Tetris,GB,1989.0,Puzzle,Nintendo,23.2,2.26,4.22,0.58,30.26
6,7,New Super Mario Bros.,DS,2006.0,Platform,Nintendo,11.38,9.23,6.5,2.9,30.01
7,8,Wii Play,Wii,2006.0,Misc,Nintendo,14.03,9.2,2.93,2.85,29.02
8,9,New Super Mario Bros. Wii,Wii,2009.0,Platform,Nintendo,14.59,7.06,4.7,2.26,28.62
9,10,Duck Hunt,NES,1984.0,Shooter,Nintendo,26.93,0.63,0.28,0.47,28.31


In [2]:
#drop null values
df = df.dropna()

print(df.shape)
df.head()

(16291, 11)


Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [3]:
# drop entries from before 2000 and after 2015
df = df[(df['Year'] >= 2000) & (df['Year'] <= 2015)]
df.shape

(13971, 11)

In [4]:
df['Year'] = df['Year'].astype(int)

df.dtypes

Rank              int64
Name             object
Platform         object
Year              int64
Genre            object
Publisher        object
NA_Sales        float64
EU_Sales        float64
JP_Sales        float64
Other_Sales     float64
Global_Sales    float64
dtype: object

In [13]:
df.to_csv('video-game-data-processed.csv')

In [16]:
print(df.head())

   Rank                   Name Platform  Year     Genre Publisher  NA_Sales  \
0     1             Wii Sports      Wii  2006    Sports  Nintendo     41.49   
2     3         Mario Kart Wii      Wii  2008    Racing  Nintendo     15.85   
3     4      Wii Sports Resort      Wii  2009    Sports  Nintendo     15.75   
6     7  New Super Mario Bros.       DS  2006  Platform  Nintendo     11.38   
7     8               Wii Play      Wii  2006      Misc  Nintendo     14.03   

   EU_Sales  JP_Sales  Other_Sales  Global_Sales  
0     29.02      3.77         8.46         82.74  
2     12.88      3.79         3.31         35.82  
3     11.01      3.28         2.96         33.00  
6      9.23      6.50         2.90         30.01  
7      9.20      2.93         2.85         29.02  


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13971 entries, 0 to 16597
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Rank          13971 non-null  int64  
 1   Name          13971 non-null  object 
 2   Platform      13971 non-null  object 
 3   Year          13971 non-null  int64  
 4   Genre         13971 non-null  object 
 5   Publisher     13971 non-null  object 
 6   NA_Sales      13971 non-null  float64
 7   EU_Sales      13971 non-null  float64
 8   JP_Sales      13971 non-null  float64
 9   Other_Sales   13971 non-null  float64
 10  Global_Sales  13971 non-null  float64
dtypes: float64(5), int64(2), object(4)
memory usage: 1.3+ MB


### Tree Map

In [12]:
#pip install plotly ipywidgets

In [6]:
import pandas as pd
import numpy as np
import plotly.express as px
from ipywidgets import Output
from IPython.display import display, clear_output
from ipywidgets import Dropdown, HTML, VBox, HBox, Output
import plotly.graph_objects as go

In [7]:
region_cols = ["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales", "Global_Sales"]
available_years = sorted(df['Year'].unique().tolist())

In [8]:
year_range_label = "2000-2015"
year_options = [year_range_label] + list(available_years)



# Year-selection dropdown
year_dropdown = Dropdown(
    options = year_options,
    value = year_range_label,   
    description = 'Year:',
    layout = {'width': '180px'}
)


region_options = ["Global_Sales"] + list(region_cols)


# Region-selection dropdown
region_dropdown = Dropdown(
    options = region_options,
    value = "Global_Sales",     
    description = 'Region:',
    layout = {'width': '180px'}
)


# Dashboard title
title_html = HTML("<b>Interactive Treemap: Publisher → Platform Market Share</b>")



def build_treemap(year_selection, region_col: str):
    
    """ Creates the treemap based on given year selection"""

    dfx = df.copy()

   
    if isinstance(year_selection, str) and year_selection == year_range_label:
       
        start, end = 2000, 2015
        dfx = dfx[(dfx["Year"] >= start) & (dfx["Year"] <= end)]
        year_label = year_range_label

    elif isinstance(year_selection, str) and "-" in year_selection:
       
        try:
            start, end = map(int, year_selection.split("-"))
            dfx = dfx[(dfx["Year"] >= start) & (dfx["Year"] <= end)]
            year_label = f"{start}-{end}"
        except ValueError:
            
            year_label = str(year_selection)

    elif isinstance(year_selection, (int, float)):
        dfx = dfx[dfx["Year"] == int(year_selection)]
        year_label = str(int(year_selection))

    else:
       
        year_label = str(year_selection)

    
    # Defining global sales as sum of all region sales
    if region_col == "Global_Sales":
      
        
        grouped = (
            dfx.groupby(['Publisher', 'Platform'], as_index = False)[region_cols]
               .sum()
        )
        grouped["Sales"] = grouped[region_cols].sum(axis=1)
        grouped = grouped[["Publisher", "Platform", "Sales"]]
    else:
        
        grouped = (
            dfx.groupby(['Publisher', 'Platform'], as_index = False)[region_col]
               .sum()
               .rename(columns={region_col: 'Sales'})
        )

   
    if grouped.empty:
        fig = px.treemap(title=f"No data for Year = {year_label} and Region = {region_col}")
        return fig

   
    # Creating the treemap chart
    fig = px.treemap(
        grouped,
        path = ['Publisher', 'Platform'],
        values = 'Sales',
        color ='Publisher',
        hover_data = {'Sales': ':.2f'},
        title = f"Publisher and its platforms for {year_label} in {region_col.replace('_',' ')}"
    )

  
    # Customising hover and labels
    fig.update_traces(
        hovertemplate=(
            "<b>%{label}</b><br>"
            "Path: %{currentPath}<br>"
            "Sales: %{customdata[0]:,.2f} M"
        ),
        texttemplate = "<b>%{label}</b><br>%{value:,.2f} M",
        textinfo = "label+value",
        insidetextfont = dict(size = 13)
    )
    fig.update_layout(uniformtext = dict(minsize = 8, mode = "hide"))
    fig.update_layout(
        margin = dict(t = 60, l = 8, r = 8, b = 8),
        height = 650
    )

    return fig
# Output widget for treemap
out = Output()

def render(*_):
    """ Changes the treemap based on the selection"""
    fig = build_treemap(year_dropdown.value, region_dropdown.value)
    with out:
        clear_output(wait=True)
        display(fig)


# Rendering the treemap when year and region changes
year_dropdown.observe(render, names='value')
region_dropdown.observe(render, names='value')

render()

ui = VBox([title_html, HBox([year_dropdown, region_dropdown]), out])
ui

VBox(children=(HTML(value='<b>Interactive Treemap: Publisher → Platform Market Share</b>'), HBox(children=(Dro…

In [9]:
# Output widget for treemap
out = Output()

def render(*_):
    """ Changes the treemap based on the selection"""
    fig = build_treemap(year_dropdown.value, region_dropdown.value)
    with out:
        clear_output(wait=True)
        display(fig)


# Rendering the treemap when year and region changes
year_dropdown.observe(render, names='value')
region_dropdown.observe(render, names='value')

render()

ui = VBox([title_html, HBox([year_dropdown, region_dropdown]), out])
ui

VBox(children=(HTML(value='<b>Interactive Treemap: Publisher → Platform Market Share</b>'), HBox(children=(Dro…

In [18]:
fig = build_treemap(year_dropdown.value, region_dropdown.value)
fig.write_html('treemap.html')