# Video Games Sales - Exploratory Data Analysis


<b>CONTENTS:</b>
   <li><b>1. Overall Statistics
   <li><b>2. Year Wise Analysis</b>
   <li><b>3. Publisher Wise Analysis</b>
   <li><b>4. Platform Wise Analysis</b>
   <li><b>5. Genre Wise Analysis</b>
   <li><b>6. Global & Regional Wise Analysis</b>
<p style="text-indent: 5px;">  
<p style="text-indent: 5px;">
This dataset contains a list of video games with sales greater than 100,000 copies. It was generated by a scrape of <a href= "https://www.vgchartz.com/gamedb/">vgchartz.com </a>
    
<p style="text-indent: 5px;">          
</p>
        <li><b>Rank</b> - Ranking of overall sales</li>
        <li><b>Name</b> - The games name</li>
        <li><b>Platform</b> - Platform of the games release (i.e. PC,PS4, etc.)</li>
        <li><b>Year</b> - Year of the game's release</li>
        <li><b>Genre</b> - Genre of the game</li>
        <li><b>Publisher</b> - Publisher of the game</li>
        <li><b>NA_Sales</b> - Sales in North America (in millions)</li>
        <li><b>EU_Sales</b> - Sales in Europe (in millions)</li>
        <li><b>JP_Sales</b> - Sales in Japan (in millions)</li>
        <li><b>Other_Sales</b> - Sales in the rest of the world (in millions)</li>
        <li><b>Global_Sales</b> - Total worldwide sales.</li>
    
</span>

# Load Data

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import missingno as mn

import plotly.express as px
from plotly.offline import init_notebook_mode,iplot
init_notebook_mode(connected=True)
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('/kaggle/input/videogamesales/vgsales.csv')
df.head()

# 1. Overall Statistics

## 1.1 Descriptive Statistics

In [None]:
df.describe()

## 1.2 Null values

In [None]:
mn.matrix(df)

## 1.3 Data Count

In [None]:
games = df['Name'].value_counts()
publisher = df['Publisher'].value_counts()
platforms = df['Platform'].value_counts()
genres = df['Genre'].value_counts()
minyear = df['Year'].min()
maxyear = df['Year'].max()

fig = go.Figure()
fig.add_trace(go.Indicator(
    mode = "number",
    value = len(games),
    title = {'text': "Games",'font': {'color': 'gold','size':20}},
    number={'font':{'color': 'gold','size':40}},
    domain = {'row': 0, 'column': 0}
))
fig.add_trace(go.Indicator(
    mode = "number",
    value = len(publisher),
    title = {'text': "Publishers",'font': {'color': 'green','size':20}},
    number={'font':{'color': 'green','size':40}},
    domain = {'row': 0, 'column': 1}
))
fig.add_trace(go.Indicator(
    mode = 'number',
    value = len(platforms),
    title = {'text':'Platforms', 'font':{'color':'blue','size':20}},
    number = {'font':{'color':'green', 'size':40}},
    domain = {'row':0, 'column': 2}
))
fig.add_trace(go.Indicator(
    mode = 'number',
    value = len(genres),
    title = {'text':'Genres', 'font':{'color':'blue','size':20}},
    number = {'font':{'color':'green', 'size':40}},
    domain = {'row':0, 'column': 3}
))
fig.add_trace(go.Indicator(
    mode = 'number',
    value = 8920,
    title = {'text':'Gobal Sales ($millions)', 'font':{'color':'blue','size':20}},
    number = {'font':{'color':'green', 'size':40}},
    domain = {'row':1, 'column': 0}
))
fig.add_trace(go.Indicator(
    mode = 'number',
    value = minyear,
    title = {'text':'From Year', 'font':{'color':'blue','size':20}},
    number = {'font':{'color':'green', 'size':40}},
    domain = {'row':1, 'column': 1}
))
fig.add_trace(go.Indicator(
    mode = 'number',
    value = maxyear,
    title = {'text':'To Year', 'font':{'color':'blue','size':20}},
    number = {'font':{'color':'green', 'size':40}},
    domain = {'row':1, 'column': 2}
))
fig.update_layout(
    grid = {'rows': 2, 'columns': 4, 'pattern': "independent"})
fig.show()

## 1.4 Games Count & Sales per Year

In [None]:
x = np.arange(1980, 2019, 1)

dfYearspivot = df.pivot_table(index=['Year'], aggfunc='count')
y = dfYearspivot['Rank']

df_Yearsb = df.groupby(by="Year").sum()
y2 = df_Yearsb['Global_Sales']

fig = plt.figure(figsize=(15,7))
ax11 = fig.add_subplot(111)
ax11.plot(x, y, label = 'Count')

ax12 = ax11.twinx()
ax12.plot(x, y2, '--', label = 'Sales')

ax11.legend(loc=2, fontsize = 14)
ax12.legend(loc=0, fontsize = 14)
ax11.grid()
ax11.set_xlabel("Year", fontsize= 14)
ax11.set_ylabel("Count", fontsize = 14)
ax12.set_ylabel('Sales ($millions)', fontsize = 14)
ax12.set_title('Games Count & Sales per Year', fontsize = 20)

## 1.5 Top Games Sales

In [None]:
Games_table = pd.pivot_table(df, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head()
ax12 = Games_table.plot(kind='barh', figsize=(15,8), fontsize=14)
ax12.set_title('Top 5 Game Sales ($millions)', fontdict={'fontsize':20})
ax12.set_xlabel('Sales ($millions)', fontsize=14)

## 1.6 Top Platforms

In [None]:
df_Platform = df['Platform'].value_counts()
plt.figure(figsize = (16,5))
ax20 = df_Platform.head(5).plot(kind='barh', fontsize=14)
ax20.set_ylabel('Platforms', fontsize=14)
ax20.set_xlabel('Frequency', fontsize=14)
ax20.set_title('Top 5 Platforms', fontsize=20)

## 1.7 Genre Frequency

In [None]:
df_Genre = df['Genre'].value_counts()
plt.figure(figsize = (16,5))
ax21 = df_Genre.plot(kind='bar', fontsize=12, color='green')
ax21.set_ylabel('Frequency', fontsize=14)
ax21.set_xlabel('Genre', fontsize=14)
ax21.set_title('Genre Frequency', fontsize=20)

## 1.8 Publishers Game Count

In [None]:

publisher_count= df.groupby(['Publisher'])['Name'].count().reset_index().sort_values(by = 'Name', ascending = False)
publisher_count

ax22 = px.bar(publisher_count, x = publisher_count['Publisher'], y = publisher_count['Name'])
ax22.update_layout(title='Publishers Count',xaxis_title="Publisher",yaxis_title="Game Count")
ax22.update_xaxes(rangeslider_visible=True)
ax22.update_layout(
    autosize=False,
    width=2000,
    height=700)
ax22.show()

# 2. Publisher Wise Analysis

## 2.1 Publishers Game Count 

In [None]:
df['Publisher'].value_counts()

In [None]:
df_Publishers = pd.pivot_table(df, values='Global_Sales', index=['Publisher'], aggfunc = 'sum').sort_values(by = 'Global_Sales', ascending = False)
fig = px.pie(df_Publishers, values='Global_Sales', names=df_Publishers.index, title='Publishers Global Market Share (%)')
fig.update_traces(textposition='inside', textinfo='percent+label', textfont_size=20)
fig.update_layout(
    autosize=False,
    width=1000,
    height=1000)

In [None]:
df_Publisher_sorted = pd.pivot_table(df, values='Global_Sales', index=['Publisher'], aggfunc = 'sum').sort_values(by = 'Global_Sales', ascending = False)
ax2 = df_Publisher_sorted.head(10).plot.bar(y = 'Global_Sales', align='center', alpha=0.5, figsize = (15, 8), fontsize=14)
ax2.set_title('Global Sales for Top 10 Publishers ($millions)', fontsize=20)
ax2.set_ylabel('$millions', fontsize=14)
ax2.grid()

## 2.3 Top Publishers per Region

In [None]:
df_Publisher_sortedNA = pd.pivot_table(df, values='NA_Sales', index=['Publisher']).sort_values(by = 'NA_Sales', ascending = False).head(10)
df_Publisher_sortedJP = pd.pivot_table(df, values='JP_Sales', index=['Publisher']).sort_values(by = 'JP_Sales', ascending = False).head(10)
df_Publisher_sortedEU = pd.pivot_table(df, values='EU_Sales', index=['Publisher']).sort_values(by = 'EU_Sales', ascending = False).head(10)
df_Publisher_sortedOS = pd.pivot_table(df, values='Other_Sales', index=['Publisher']).sort_values(by = 'Other_Sales', ascending = False).head(10)
# Initialize figure
fig = make_subplots(
    rows=2, cols=2, subplot_titles=("North America Sales (millions $)", "Europe Sales (millions $)", "Japan Sales (millions $)","Other Sales (millions $)"),
    column_widths=[0.5, 0.5],
    row_heights=[0.5, 0.5],
    specs=[[{"type": "bar"}, {"type": "bar"}],
           [ {"type": "bar"}, {"type": "bar"}]])
# Add Traces

fig.add_trace(
    go.Bar(y=df_Publisher_sortedNA['NA_Sales'],
           x=df_Publisher_sortedNA.index,
           name="North America",
          marker={'color': df_Publisher_sortedNA['NA_Sales'],'colorscale': 'Portland'})
         ,row=1, col=1)
fig.add_trace(
    go.Bar(y=df_Publisher_sortedEU['EU_Sales'],
           x=df_Publisher_sortedEU.index,
           name="Europe",
           marker={'color': df_Publisher_sortedEU['EU_Sales'],'colorscale': 'Portland'},
           ),row=1, col=2)
fig.add_trace(
    go.Bar(y=df_Publisher_sortedJP['JP_Sales'],
           x=df_Publisher_sortedJP.index,
           name="Japan",
           marker={'color': df_Publisher_sortedJP['JP_Sales'],'colorscale': 'Portland'},
           ),row=2, col=1)
fig.add_trace(
    go.Bar(y=df_Publisher_sortedOS['Other_Sales'],
           x=df_Publisher_sorted.index,
           name="Other",
           marker={'color': df_Publisher_sortedOS['Other_Sales'],'colorscale': 'Portland'},
           ),row=2, col=2)
fig.update_layout(height=700, showlegend=False)
fig.update_layout(title="Top 10 Publishers per Region", title_font_size=20)
fig.show()

## 2.4 Sales by Publisher and Genre

In [None]:
df_PublisherGenre2 = df.drop(['Rank', 'Year', 'EU_Sales', 'NA_Sales', 'JP_Sales', 'Other_Sales','Name', 'Platform'], axis=1)


df_PublisherGenre3 = df_PublisherGenre2.groupby(by="Publisher").sum().sort_values(by = 'Global_Sales', ascending = False).head(10)

df_PublisherGenre4 = df_PublisherGenre2.loc[df_PublisherGenre2['Publisher'].isin(['Nintendo','Electronic Arts', 'Activision',
                                                                                  'Sony Computer Entertainment', 'Ubisoft', 'Take-Two Interactive',
                                                                                  'THQ', 'Konami Digital Entertainment', 'Sega', 'Namco Bandai Games'])]

df_PublisherGenre = pd.pivot_table(df_PublisherGenre4, values='Global_Sales', index=['Genre'], columns='Publisher', aggfunc='sum', fill_value=0)
df_PublisherGenre
plt.figure(figsize = (16,5))
ax9 = sns.heatmap(df_PublisherGenre)
ax9.set_title('Sales by Genre and Publisher ($millions)', fontsize=20)

# 3. Genre Wise Analyis

## 3.1 Game Count per Genre

In [None]:
df['Genre'].value_counts()

In [None]:
df_GenreName= df.pivot_table(values='Name', columns='Genre', aggfunc='count')
df_GenreName_transposed = df_GenreName.T
df_GenreName_transposed
ax7 = df_GenreName_transposed.plot.pie(y = 'Name', labels = df_GenreName_transposed.index, autopct='%1.1f%%', figsize = (12,12),textprops={'fontsize': 14})
ax7.get_legend().remove()
ax7.set_title('Game Count per Genre (%)', fontsize=20)

## 3.2 Genre Sales per Region

In [None]:
df_RegionGenre = pd.pivot_table(df, values=('NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales'), index=['Genre'], aggfunc='sum').sort_values(by = 'NA_Sales', ascending = True)
ax6 = df_RegionGenre.plot.barh(stacked=True, figsize=(18, 7), fontsize=14)
ax6.set_title('Genre Sales per Region ($millions)', fontsize=20)
ax6.set_xlabel('Sales ($millions)', fontsize = 14)

## 3.3 Sales by Genre and Publisher

In [None]:
df_PublisherGenre = pd.pivot_table(df_PublisherGenre4, values='Global_Sales', index=['Genre'], columns='Publisher', aggfunc='sum', fill_value=0)
df_PublisherGenre
plt.figure(figsize = (16,5))
ax9 = sns.heatmap(df_PublisherGenre)
ax9.set_title('Sales by Genre and Publisher ($millions)', fontsize=20)

## 3.4 Sales by Genre and Platform

In [None]:
df_PlatformGenre = pd.pivot_table(df, index='Platform', values='Global_Sales', columns = 'Genre', aggfunc='sum', fill_value=0)
plt.figure(figsize = (20,10))
ax23 = sns.heatmap(df_PlatformGenre)
ax23.set_title('Sales by Platform and Genre ($millions)', fontsize=20)

## 3.5 Top Games per Genre

In [None]:
df_Action = df.loc[df['Genre'] == 'Action']
df_Sports = df.loc[df['Genre'] == 'Sports']
df_Misc = df.loc[df['Genre'] == 'Misc']
df_Shooter = df.loc[df['Genre'] == 'Shooter']
df_Adventure = df.loc[df['Genre'] == 'Adventure']
df_Racing = df.loc[df['Genre'] == 'Racing']
df_Platform = df.loc[df['Genre'] == 'Platform']
df_Simulation = df.loc[df['Genre'] == 'Simulation']
df_Fighting = df.loc[df['Genre'] == 'Fighting']
df_Strategy = df.loc[df['Genre'] == 'Strategy']
df_Puzzle = df.loc[df['Genre'] == 'Puzzle']

Action_table = pd.pivot_table(df_Action, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Sports_table = pd.pivot_table(df_Sports, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Misc_table = pd.pivot_table(df_Misc, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Shooter_table = pd.pivot_table(df_Shooter, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Adventure_table = pd.pivot_table(df_Adventure, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Racing_table = pd.pivot_table(df_Racing, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Platform_table = pd.pivot_table(df_Platform, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Simulation_table = pd.pivot_table(df_Simulation, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Fighting_table = pd.pivot_table(df_Fighting, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Strategy_table = pd.pivot_table(df_Strategy, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
Puzzle_table = pd.pivot_table(df_Puzzle, values='Global_Sales', index=['Name']).sort_values(by = 'Global_Sales', ascending = False).head(5)
# Initialize figure
fig = make_subplots(
    rows=4, cols=3, subplot_titles=('Action', 'Sports', 'Misc', 'Shooter', 'Adventure', 'Racing', 'Platform', 'Simulation', 'Fighting', 'Strategy', 'Puzzle'),
    column_widths=[2, 2, 2],
    row_heights=[2, 2, 2, 2],
    specs=[[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
           [ {"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
           [ {"type": "bar"}, {"type": "bar"}, {"type": "bar"}],
           [ {"type": "bar"}, {"type": "bar"}, {"type": "bar"}]])
# Add Traces

fig.add_trace(
    go.Bar(y=Action_table['Global_Sales'],
           x=Action_table.index,
           name="Action",
          marker={'color': Action_table['Global_Sales'],'colorscale': 'Portland'})
         ,row=1, col=1)
fig.add_trace(
    go.Bar(y=Sports_table['Global_Sales'],
           x=Sports_table.index,
           name="Sports",
           marker={'color': Sports_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=1, col=2)
fig.add_trace(
    go.Bar(y=Misc_table['Global_Sales'],
           x=Misc_table.index,
           name="Misc",
           marker={'color': Misc_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=1, col=3)
fig.add_trace(
    go.Bar(y=Shooter_table['Global_Sales'],
           x=Shooter_table.index,
           name="Shooter",
           marker={'color': Shooter_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=2, col=1)
fig.add_trace(
    go.Bar(y=Adventure_table['Global_Sales'],
           x=Adventure_table.index,
           name="Adventure",
          marker={'color': Adventure_table['Global_Sales'],'colorscale': 'Portland'})
         ,row=2, col=2)
fig.add_trace(
    go.Bar(y=Racing_table['Global_Sales'],
           x=Racing_table.index,
           name="Racing",
           marker={'color': Racing_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=2, col=3)
fig.add_trace(
    go.Bar(y=Platform_table['Global_Sales'],
           x=Platform_table.index,
           name="Platform",
           marker={'color': Platform_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=3, col=1)
fig.add_trace(
    go.Bar(y=Simulation_table['Global_Sales'],
           x=Simulation_table.index,
           name="Simulation",
           marker={'color': Simulation_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=3, col=2)
fig.add_trace(
    go.Bar(y=Fighting_table['Global_Sales'],
           x=Fighting_table.index,
           name="Fighting",
           marker={'color': Fighting_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=3, col=3)
fig.add_trace(
    go.Bar(y=Strategy_table['Global_Sales'],
           x=Strategy_table.index,
           name="Strategy",
           marker={'color': Strategy_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=4, col=1)
fig.add_trace(
    go.Bar(y=Puzzle_table['Global_Sales'],
           x=Puzzle_table.index,
           name="Puzzle",
           marker={'color': Puzzle_table['Global_Sales'],'colorscale': 'Portland'},
           ),row=4, col=2)
fig.update_layout(height=1500, showlegend=False)
fig.update_layout(title="Top 5 Games per Genre ($millions)", title_font_size=20)
                  
fig.show()

# 4. Platform Wise Analysis

In [None]:
df['Platform'].value_counts()

## 4.1 Top Platforms Word Cloud

In [None]:
df_Platform = df.groupby(by="Platform").sum()
df_Platform_sorted = df_Platform.sort_values(by = 'Global_Sales', ascending = False)
df_Platform_sorted
from wordcloud import WordCloud, ImageColorGenerator

text = list(set(df['Platform']))
plt.rcParams['figure.figsize'] = (15,15)
wordcloud = WordCloud(max_font_size=50, 
                      max_words=100,
                      background_color="black").generate(str(text))

plt.imshow(wordcloud,interpolation="bilinear")
plt.title('Top Platforms', fontsize=20)
plt.axis("off")
plt.show()

## 4.2 Game Count per Platform

In [None]:
df_Platform = df['Platform'].value_counts()
ax10 = df_Platform.plot(kind='pie', figsize=(17,10),  autopct='%1.1f%%', textprops={'fontsize': 12})
ax10.set_title('Game Count per Platform (%)', fontdict={'fontsize':20})

 ## 4.3 Platform Sales per Region

In [None]:
df_RegionPlatform = pd.pivot_table(df, values=('NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'), index=['Platform'],aggfunc=np.sum).sort_values(by = 'NA_Sales', ascending = False).head(15)
EU_normal = df_RegionPlatform['EU_Sales'] / df_RegionPlatform['Global_Sales'].max()
JP_normal = df_RegionPlatform['JP_Sales'] / df_RegionPlatform['Global_Sales'].max()
NA_normal = df_RegionPlatform['NA_Sales'] / df_RegionPlatform['Global_Sales'].max()
OS_normal = df_RegionPlatform['Other_Sales'] / df_RegionPlatform['Global_Sales'].max()
plt.figure(figsize=(15, 10))

plt.scatter(df_RegionPlatform.index, df_RegionPlatform['EU_Sales'], 
                  color='darkblue', 
                 alpha=0.5,
                 s = EU_normal * 5000)
plt.scatter(df_RegionPlatform.index, df_RegionPlatform['JP_Sales'], 
                  color='red', 
                 alpha=0.5,
                 s = JP_normal * 5000)
plt.scatter(df_RegionPlatform.index, df_RegionPlatform['NA_Sales'], 
                  color='green', 
                 alpha=0.5,
                 s = NA_normal * 5000)
plt.scatter(df_RegionPlatform.index, df_RegionPlatform['Other_Sales'], 
                  color='orange', 
                 alpha=0.5,
                 s = OS_normal * 5000)
plt.xlabel("Platform", size=14)
plt.ylabel("Sales ($millions)", size=14)
plt.title('Platform Sales per Region ($millions)', fontsize= 20)
plt.text(12, 600, 'Green - North America')
plt.text(12, 550, 'Blue - Europe')
plt.text(12, 500, 'Red - Japan')
plt.text(12, 450, 'Orange - Other')
plt.show

## 4.4 Sales According to Platform and Region

In [None]:
xaction=df[df.Genre=="Action"]
xsports=df[df.Genre=="Sports"]
xmisc=df[df.Genre=="Misc"]
xrole=df[df.Genre=="Role-Playing"]
xshooter=df[df.Genre=="Shooter"]
xadventure=df[df.Genre=="Adventure"]
xrace=df[df.Genre=="Racing"]
xplatform=df[df.Genre=="Platform"]
xsimulation=df[df.Genre=="Simulation"]
xfight=df[df.Genre=="Fighting"]
xstrategy=df[df.Genre=="Strategy"]
xpuzzle=df[df.Genre=='Puzzle']

trace1 = go.Bar(x=xaction.groupby('Platform')['Global_Sales'].sum().index, y=xaction.groupby('Platform')['Global_Sales'].sum().values, opacity=0.75,name='Action', marker=dict(color='green'))
trace2 = go.Bar(x=xsports.groupby('Platform')['Global_Sales'].sum().index, y=xsports.groupby('Platform')['Global_Sales'].sum().values,  opacity=0.5, name='Sports', marker=dict(color='blue'))
trace3 = go.Bar(x=xmisc.groupby('Platform')['Global_Sales'].sum().index, y=xmisc.groupby('Platform')['Global_Sales'].sum().values, opacity=0.75, name='Misc', marker=dict(color='yellow'))
trace4 = go.Bar(x=xrole.groupby('Platform')['Global_Sales'].sum().index, y=xrole.groupby('Platform')['Global_Sales'].sum().values, opacity=0.6, name='Role-Playing', marker=dict(color='red'))
trace5 = go.Bar(x=xadventure.groupby('Platform')['Global_Sales'].sum().index, y=xadventure.groupby('Platform')['Global_Sales'].sum().values, opacity=0.75, name='Adventure', marker=dict(color='white'))
trace6 = go.Bar(x=xrace.groupby('Platform')['Global_Sales'].sum().index, y=xrace.groupby('Platform')['Global_Sales'].sum().values, opacity=0.5, name='Racing', marker=dict(color='black'))
trace7 = go.Bar(x=xplatform.groupby('Platform')['Global_Sales'].sum().index, y=xplatform.groupby('Platform')['Global_Sales'].sum().values, opacity=0.75, name='Platform', marker=dict(color='orange'))
trace8 = go.Bar(x=xsimulation.groupby('Platform')['Global_Sales'].sum().index, y=xsimulation.groupby('Platform')['Global_Sales'].sum().values, opacity=0.75, name='Simulation', marker=dict(color='pink'))
trace9 = go.Bar(x=xfight.groupby('Platform')['Global_Sales'].sum().index, y=xfight.groupby('Platform')['Global_Sales'].sum().values, opacity=0.75, name='Fight', marker=dict(color='purple'))
trace10 = go.Bar(x=xstrategy.groupby('Platform')['Global_Sales'].sum().index, y=xstrategy.groupby('Platform')['Global_Sales'].sum().values, opacity=0.75, name='Strategy', marker=dict(color='grey'))
trace11 = go.Bar(x=xpuzzle.groupby('Platform')['Global_Sales'].sum().index, y=xpuzzle.groupby('Platform')['Global_Sales'].sum().values, opacity=0.75, name='Puzzle', marker=dict(color='rgb(119,172,238)'))
trace12 = go.Bar(x=xshooter.groupby('Platform')['Global_Sales'].sum().index, y=xshooter.groupby('Platform')['Global_Sales'].sum().values, opacity=0.5, name='Shooter', marker=dict(color='rgb(20,100,80)'))


data = [trace1, trace2,trace3,trace4,trace5,trace6,trace7,trace8,trace9,trace10,trace11, trace12]
layout = go.Layout(barmode='stack', title='Global Sales According to Platform and Genre', xaxis=dict(title='Platform'), yaxis=dict( title='Global Sales ($millions)'),paper_bgcolor='beige',plot_bgcolor='beige')
fig = go.Figure(data=data, layout=layout)
iplot(fig)

## 4.5 Top Games per Platform

In [None]:
xDS=df[df.Platform=="DS"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xPS2=df[df.Platform=="PS2"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xPS3=df[df.Platform=="PS3"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xWii=df[df.Platform=="Wii"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xX360=df[df.Platform=="X360"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xPSP=df[df.Platform=="PSP"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xPS=df[df.Platform=="PS"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xPC=df[df.Platform=="PC"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xXB=df[df.Platform=="XB"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xGBA=df[df.Platform=="GBA"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xGC=df[df.Platform=="GC"].sort_values(by = 'Global_Sales', ascending = False).head(5)
x3DS=df[df.Platform=="3DS"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xPSV=df[df.Platform=="PSV"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xPS4=df[df.Platform=="PS4"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xN64=df[df.Platform=="N64"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xSNES=df[df.Platform=="SNES"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xXOne=df[df.Platform=="XOne"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xSAT=df[df.Platform=="SAT"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xWiiU=df[df.Platform=="WiiU"].sort_values(by = 'Global_Sales', ascending = False).head(5)
x2600=df[df.Platform=="2600"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xGB=df[df.Platform=="GB"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xNES=df[df.Platform=="NES"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xDC=df[df.Platform=="DC"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xGEN=df[df.Platform=="GEN"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xNG=df[df.Platform=="NG"].sort_values(by = 'Global_Sales', ascending = False).head(5)
xWS=df[df.Platform=="WS"].sort_values(by = 'Global_Sales', ascending = False).head(5)

trace1 = go.Scatter(x=xDS.Platform, y=xDS.Global_Sales, mode='markers', marker_size=xDS.Global_Sales, name='DS', text = xDS.Name)
trace2 = go.Scatter(x=xPS2.Platform, y=xPS2.Global_Sales, mode='markers',  marker_size=xPS2.Global_Sales, name='PS2', text = xPS2.Name)
trace3 = go.Scatter(x=xPS3.Platform, y=xPS3.Global_Sales, mode='markers',  marker_size=xPS3.Global_Sales, name='PS3', text = xPS3.Name)
trace4 = go.Scatter(x=xWii.Platform, y=xWii.Global_Sales, mode='markers',  marker_size=xWii.Global_Sales, name='Wii', text = xWii.Name)
trace5 = go.Scatter(x=xX360.Platform, y=xX360.Global_Sales, mode='markers', marker_size=xX360.Global_Sales,name='X360', text = xX360.Name)
trace6 = go.Scatter(x=xPSP.Platform, y=xPSP.Global_Sales, mode='markers', marker_size=xPSP.Global_Sales, name='PSP', text = xPSP.Name)
trace7 = go.Scatter(x=xPS.Platform, y=xPS.Global_Sales, mode='markers', marker_size=xPS.Global_Sales, name='PS', text = xPS.Name)
trace8 = go.Scatter(x=xPC.Platform, y=xPC.Global_Sales, mode='markers', marker_size=xPC.Global_Sales, name='PC', text = xPC.Name)
trace9 = go.Scatter(x=xXB.Platform, y=xXB.Global_Sales, mode='markers', marker_size=xXB.Global_Sales, name='xXB', text = xXB.Name)
trace10 = go.Scatter(x=xGBA.Platform, y=xGBA.Global_Sales, mode='markers', marker_size=xGBA.Global_Sales, name='GBA', text = xGBA.Name)
trace11 = go.Scatter(x=xGC.Platform, y=xGC.Global_Sales, mode='markers', marker_size=xGC.Global_Sales, name='GC', text = xGC.Name)
trace12 = go.Scatter(x=x3DS.Platform, y=x3DS.Global_Sales, mode='markers', marker_size=x3DS.Global_Sales, name='3DS', text = x3DS.Name)
trace13 = go.Scatter(x=xPSV.Platform, y=xPSV.Global_Sales, mode='markers', marker_size=xPSV.Global_Sales, name='PSV', text = xPSV.Name)
trace14 = go.Scatter(x=xPS4.Platform, y=xPS4.Global_Sales, mode='markers', marker_size=xPS4.Global_Sales, name='PS4', text = xPS4.Name)
trace15 = go.Scatter(x=xN64.Platform, y=xN64.Global_Sales, mode='markers', marker_size=xN64.Global_Sales, name='N64', text = xN64.Name)
trace16 = go.Scatter(x=xSNES.Platform, y=xSNES.Global_Sales, mode='markers', marker_size=xSNES.Global_Sales, name='SNES', text = xSNES.Name)
trace17 = go.Scatter(x=xXOne.Platform, y=xXOne.Global_Sales, mode='markers', marker_size=xXOne.Global_Sales, name='XOne', text = xXOne.Name)
trace18 = go.Scatter(x=xSAT.Platform, y=xSAT.Global_Sales, mode='markers', marker_size=xSAT.Global_Sales, name='SAT', text = xSAT.Name)
trace19 = go.Scatter(x=xWiiU.Platform, y=xWiiU.Global_Sales, mode='markers', marker_size=xWiiU.Global_Sales, name='WiiU', text = xWiiU.Name)
trace20 = go.Scatter(x=x2600.Platform, y=x2600.Global_Sales, mode='markers', marker_size=x2600.Global_Sales, name='2600', text = x2600.Name)
trace21 = go.Scatter(x=xGB.Platform, y=xGB.Global_Sales, mode='markers', marker_size=xGB.Global_Sales, name='GB', text = xGB.Name)
trace22 = go.Scatter(x=xNES.Platform, y=xNES.Global_Sales, mode='markers', marker_size=xNES.Global_Sales, name='NES', text = xNES.Name)
trace23 = go.Scatter(x=xDC.Platform, y=xDC.Global_Sales, mode='markers', marker_size=xDC.Global_Sales, name='DC', text = xDC.Name)


data= [trace1, trace2, trace3, trace4, trace5, trace6, trace7, trace8, trace9, trace10, trace11, trace12, trace13, trace14, trace15, trace16, trace17, trace18, trace19, trace20]
layout = dict(title = 'Top 5 Game Sales per Platform', autosize=False, width=1500, height=750, xaxis= dict(title= 'Platform',ticklen= 5,zerolinewidth=1,gridcolor="white"),
              yaxis= dict(title= 'Global Sales ($millions)',ticklen= 5,zeroline= False,zerolinewidth=1,gridcolor="white",))

fig = dict(data = data, layout = layout)

iplot(fig)


# 5. Global & Regional Sales Wise Analysis

## 5.1 Global Sales per Year

In [None]:
df_Year = df.groupby(by="Year").sum()
ax5 = df_Year.plot.bar(y = 'Global_Sales', align='center', alpha=0.5, figsize = (15, 5), fontsize = 12)
ax5.set_title('Global Sales per Year', fontsize=20)

## 5.2 Sales per Region

In [None]:
df_RegionSales = pd.pivot_table(df, values=('NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales'), index=['Year'],aggfunc=np.sum).sort_values(by = 'Year', ascending = True)
ax8 = df_RegionSales.plot(kind='line', figsize=(15,8), fontsize=18)
ax8.set_title('Sales per Region over Time ($millions)', fontsize=20)
ax8.grid()
ax8.set_ylabel('Year', fontsize=16)
ax8.set_xlabel('Sales ($millions)', fontsize=16)

## 5.3 Game Sales per Genre

In [None]:
EU_Table = pd.pivot_table(df, values='EU_Sales', index=['Name']).sort_values(by = 'EU_Sales', ascending = False).head()
NA_Table = pd.pivot_table(df, values='NA_Sales', index=['Name']).sort_values(by = 'NA_Sales', ascending = False).head()
JP_Table = pd.pivot_table(df, values='JP_Sales', index=['Name']).sort_values(by = 'JP_Sales', ascending = False).head()
OS_Table = pd.pivot_table(df, values='Other_Sales', index=['Name']).sort_values(by = 'Other_Sales', ascending = False).head()
fig = make_subplots(
    rows=2, cols=2, subplot_titles=('Europe', 'North America', 'Japan', 'Other Sales'),
    column_widths=[2, 2],
    row_heights=[2, 2],
    specs=[[{"type": "bar"}, {"type": "bar"}],
           [ {"type": "bar"}, {"type": "bar"}]])
# Add Traces

fig.add_trace(
    go.Bar(y=EU_Table['EU_Sales'],
           x=EU_Table.index,
           name="EU",
          marker={'color': EU_Table['EU_Sales'],'colorscale': 'Portland'})
         ,row=1, col=1)
fig.add_trace(
    go.Bar(y=NA_Table['NA_Sales'],
           x=NA_Table.index,
           name="NA",
          marker={'color': NA_Table['NA_Sales'],'colorscale': 'Portland'})
         ,row=1, col=2)
fig.add_trace(
    go.Bar(y=JP_Table['JP_Sales'],
           x=JP_Table.index,
           name="JP",
          marker={'color': JP_Table['JP_Sales'],'colorscale': 'Portland'})
         ,row=2, col=1)
fig.add_trace(
    go.Bar(y=OS_Table['Other_Sales'],
           x=OS_Table.index,
           name="Other",
          marker={'color': OS_Table['Other_Sales'],'colorscale': 'Portland'})
         ,row=2, col=2)

fig.update_layout(height=500, width = 1500, showlegend=False)
fig.update_layout(title="Top 5 Game Sales per Genre ($millions)", title_font_size=20)
                  
fig.show()

## 5.4 Regional and Global Sales

In [None]:
g = sns.pairplot((df.drop(['Rank'], axis=1)), corner=True);

g.fig.suptitle('Sales per Game Pairplot ($millions)', fontsize = 20)


## 5.5 Sales by Genre and Region

In [None]:
df_PlatformGenre2 = df.drop(['Rank', 'Year', 'Global_Sales'], axis=1)
df_PlatformGenre = df_PlatformGenre2.groupby(by="Genre").sum()
df_PlatformGenre_T = df_PlatformGenre.T
plt.figure(figsize = (16,5))
ax9 = sns.heatmap(df_PlatformGenre_T)
ax9.set_title('Sales by Genre and Region ($millions)', fontsize=20)

## 5.6 Top Game Sales per Region

In [None]:
df_TopGames = df.sort_values(by = 'Global_Sales', ascending = False).head(100)

trace1 = go.Scatter(x=df.Rank, y=df_TopGames.NA_Sales,
                    mode='markers',
                    name='North America',
                   text = df.Name)
trace2 = go.Scatter(x=df.Rank, y=df_TopGames.EU_Sales,
                    mode='markers',
                    name='Europe',
                   text = df.Name)
trace3 = go.Scatter(x=df.Rank, y=df_TopGames.JP_Sales,
                    mode='markers',
                    name='Japan',
                   text = df.Name)
trace4 = go.Scatter(x=df.Rank, y=df_TopGames.Other_Sales,
                    mode='markers',
                    name='other',
                   text = df.Name)
data= [trace1, trace2, trace3, trace4]
layout = dict(title = 'Top 100 Video Game Sales per Region',
              xaxis= dict(title= 'Rank',ticklen= 5,zerolinewidth=1,gridcolor="white"),
              yaxis= dict(title= 'Sales ($millions)',ticklen= 5,zeroline= False,zerolinewidth=1,gridcolor="white",))
fig = dict(data = data, layout = layout)
iplot(fig)