In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [8]:
df = pd.read_csv('vgsales.csv')

In [9]:
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,259,Asteroids,2600,1980,Shooter,Atari,4.0,0.26,0.0,0.05,4.31
1,545,Missile Command,2600,1980,Shooter,Atari,2.56,0.17,0.0,0.03,2.76
2,1768,Kaboom!,2600,1980,Misc,Activision,1.07,0.07,0.0,0.01,1.15
3,1971,Defender,2600,1980,Misc,Atari,0.99,0.05,0.0,0.01,1.05
4,2671,Boxing,2600,1980,Fighting,Activision,0.72,0.04,0.0,0.01,0.77


In [10]:
df.isnull().sum()

Rank             0
Name             0
Platform         0
Year             0
Genre            0
Publisher       36
NA_Sales         0
EU_Sales         0
JP_Sales         0
Other_Sales      0
Global_Sales     0
dtype: int64

In [11]:
df['Publisher'].fillna('Unknown',inplace=True)

In [12]:
df.isnull().sum()

Rank            0
Name            0
Platform        0
Year            0
Genre           0
Publisher       0
NA_Sales        0
EU_Sales        0
JP_Sales        0
Other_Sales     0
Global_Sales    0
dtype: int64

In [13]:
df.shape

(16324, 11)

In [14]:
df.duplicated().sum()

0

In [15]:
df.columns

Index(['Rank', 'Name', 'Platform', 'Year', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'],
      dtype='object')

In [16]:
df.sort_values(by='Rank')

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
6112,1,Wii Sports,Wii,2006,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
122,2,Super Mario Bros.,NES,1985,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
8322,3,Mario Kart Wii,Wii,2008,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
9750,4,Wii Sports Resort,Wii,2009,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
705,5,Pokemon Red/Pokemon Blue,GB,1996,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
3632,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
4406,16597,Men in Black II: Alien Escape,GC,2003,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
9749,16598,SCORE International Baja 1000: The Official Game,PS2,2008,Racing,Activision,0.00,0.00,0.00,0.00,0.01
12441,16599,Know How 2,DS,2010,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


# YEAR wise analysis

In [21]:
# Total Global Sales Over Time (Line Chart)
fig = px.line(df.groupby('Year')['Global_Sales'].sum().reset_index(), x='Year', y='Global_Sales', title='Total Global Sales Over Time')
fig.update_yaxes(title_text='Global Sales')
fig.update_traces(line=dict(color='red'))
fig.show()

In [18]:
df.columns

Index(['Rank', 'Name', 'Platform', 'Year', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'],
      dtype='object')

In [40]:
region_sales_over_time = df.groupby('Year')[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']].sum().reset_index()
fig = px.line(region_sales_over_time, x='Year', y=['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales'],
              title='Sales by Region Over Time',
             color_discrete_sequence=[])

fig.update_traces(
    name='North America',
    selector=dict(name='NA_Sales')
)

fig.update_traces(
    name='Europe',
    selector=dict(name='EU_Sales')
)

fig.update_traces(
    name='Japan',
    selector=dict(name='JP_Sales')
)

fig.update_traces(
    name='Other',
    selector=dict(name='Other_Sales')
)
fig.show()


In [41]:
# Genre Trends Over Time (Line Chart)
genre_trends_over_time = df.groupby(['Year', 'Genre']).size().reset_index(name='Count')
fig = px.line(genre_trends_over_time, x='Year', y='Count', color='Genre',
              title='Genre Trends Over Time')
fig.show()

In [57]:
platform_popularity_over_time = df.groupby(['Year', 'Platform']).size().reset_index(name='Count')
platform_popularity_over_time

Unnamed: 0,Year,Platform,Count
0,1980,2600,9
1,1981,2600,46
2,1982,2600,36
3,1983,2600,11
4,1983,NES,6
...,...,...,...
233,2016,PS4,107
234,2016,PSV,60
235,2016,WiiU,10
236,2016,X360,8


In [42]:

# Platform Popularity Over Time (Line Chart)

fig = px.line(platform_popularity_over_time, x='Year', y='Count', color='Platform',
              title='Platform Popularity Over Time')
fig.show()

In [43]:
# Average Sales Per Game Over Time (Line Chart)
average_sales_over_time = df.groupby('Year')['Global_Sales'].mean().reset_index()
fig = px.line(average_sales_over_time, x='Year', y='Global_Sales',
              title='Average Sales Per Game Over Time')
fig.show()

In [56]:
games_released_over_time = (df.groupby('Year')['Name'].count().reset_index()).rename(columns = {'Name':'count'})
games_released_over_time.head()

Unnamed: 0,Year,count
0,1980,9
1,1981,46
2,1982,36
3,1983,17
4,1984,14


In [66]:

# Number of Games Released Over Time (Bar Chart)

fig = px.bar(games_released_over_time, x='Year', y='count',
             title='Number of Games Released Over Time',
             color_discrete_sequence=['#0E21A0'])

fig.update_xaxes(title_text='Year', tickmode='linear', tick0=2010, dtick=1)
fig.update_yaxes(title_text='Count')
fig.update_layout(
    title_font_size=24,
    width=800,
    height=400,
    margin=dict(l=50, r=50, t=50, b=50)
)

fig.update_layout(
    plot_bgcolor='white',  # Background color
    xaxis=dict(showgrid=False),  # Hide x-axis grid lines
    yaxis=dict(showgrid=True, gridcolor='lightgray')  # Customize y-axis grid lines
)

fig.show()