In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#importing necessary libraries
import numpy as np 
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
plotly.offline.init_notebook_mode(connected=True)
import plotly.graph_objects as go

In [None]:
#loading the dataset
games=pd.read_csv('/kaggle/input/videogamesales/vgsales.csv')
games.head()

In [None]:
games.shape

In [None]:
games.info()

***Year*** *column was wrongly represented as float so changing it to int format.*

In [None]:
#identifying missing values
games.isnull().sum()

In [None]:
#dropping null values and changing format of year column
games=games.dropna()
games['Year']=games['Year'].astype(int)

In [None]:
games.info()

In [None]:
games.isnull().sum()

In [None]:
#dropping duplicate rows
games=games.drop_duplicates()

In [None]:
games.shape

*There were 307 duplicate rows.*

In [None]:
px.pie(games,'Genre',title='Distribution of Genres')

*Majority of the games in the dataset are* ***Action(20%)*** *and* ***Sports(14.1%)*** *oriented.*

In [None]:
platform=games.Platform.value_counts(normalize=True)*100
px.bar(platform,y='Platform',labels={'index':'Platform','Platform':'Count in %'},color='Platform',
       title='Distribution of Gaming Platforms',color_continuous_scale=px.colors.sequential.Purpor)

*Most of the games are developed for* ***Nintendo DS*** *and* ***Sony Playstation 2*** *gaming consoles* ***(around 13%).***

In [None]:
year=games.Year.value_counts(normalize=True)*100
px.bar(year,y='Year',labels={'index':'Release year','Year':'Count in %'},color='Year',
       title='Distribution of Release Year',color_continuous_scale=px.colors.sequential.PuRd)

*Around* ***2008*** and ***2009*** *maximum games were released worldwide* ***(around 9%).***

In [None]:
top_publishers=games.Publisher.value_counts(normalize=True)[:20]*100
px.bar(top_publishers,y='Publisher',labels={'index':'Company','Publisher':'Count in %'},
       title='Top 20 Game Publishers in terms of most games produced',
       color='Publisher',color_continuous_scale=px.colors.sequential.Viridis)

***Electronic Arts*** *has published most number of games* ***(approx 8.2%)*** *amongst the top 20 publishers.*

In [None]:
#function to plot sales calculated each year in all parts of the world
def sales_per_year(column,color,title):
    sales_per_year=games.groupby(['Year'])[column].sum()
    fig=px.bar(sales_per_year,y=column,labels={column:'Sales(in million)'},color=column,
           color_continuous_scale=color,title=title)
    fig.show()

In [None]:
sales_per_year('NA_Sales','Cividis_r','Yearly Sales in North America')

* *Most profitable years were* ***2007-2010*** *having sales more than* ***300 million.***
* *Highest Sale is recorded in* ***2008*** *with an estimated figure of* ***351.44 million*** *in North America.*

In [None]:
sales_per_year('EU_Sales','Cividis_r','Yearly sales in Europe')

* *Most profitable years were* ***2007-2011*** *having sales more than* ***160 million.***
* *Highest Sale is recorded in* ***2009*** *with an estimated figure of* ***191.59 million*** *in Europe.*

In [None]:
sales_per_year('JP_Sales','Cividis_r','Yearly sales in Japan')

* *Most profitable years were* ***2006-2009*** *having sales more than* ***60 million.***
* *Highest Sale is recorded in* ***2006*** *with an estimated figure of* ***73.73 million*** *in Japan.*

In [None]:
sales_per_year('Other_Sales','Cividis_r','Yearly sales in rest of the world')

* *Most profitable years were* ***2007-2009*** *having sales more than* ***70 million.***
* *Highest Sale is recorded in* ***2008*** *with an estimated figure of* ***82.39 million*** *in rest of the world.*

In [None]:
sales_per_year('Global_Sales','Cividis_r','Yearly sales around the world')

* *Most profitable years were* ***2007-2010*** *having sales more than* ***600 million.***
* *Highest Sale is recorded in* ***2008*** *with an estimated figure of* ***678.90 million*** *around the world.*

In [None]:
#function to plot tabular analysis
def table_plot(feature1,feature2,title1,title2,color,metric,index=True):
    if index==False:
        top=games.groupby([feature1])[feature2].agg(metric).sort_values(ascending=False)
        fig=go.Figure(data=[go.Table(header=dict(values=[title1,title2],
                                            font=dict(color='black', size=11)),
                                cells=dict(values=[top.index,top],fill_color=color,
                                           font=dict(color='white', size=11)))])
        fig.show()
    else:
        top=games.groupby([feature1])[feature2].agg(metric).sort_values(ascending=False)[:20]
        fig=go.Figure(data=[go.Table(header=dict(values=[title1,title2],
                                            font=dict(color='black', size=11)),
                                cells=dict(values=[top.index,top],fill_color=color,
                                           font=dict(color='white', size=11)))])
        fig.show()

***Top 20 Games sold around the world***

In [None]:
table_plot('Name','Global_Sales','Game','Sales around the world','violet',"max")

***Top 20 Games sold in North America***

In [None]:
table_plot('Name','NA_Sales','Game','Sales in North America','violet',"max")

***Top 20 Games sold in Europe***

In [None]:
table_plot('Name','EU_Sales','Game','Sales in Europe','violet',"max")

***Top 20 games sold in Japan***

In [None]:
table_plot('Name','JP_Sales','Game','Sales in Japan','violet',"max")

***Top 20 games sold in rest of the world***

In [None]:
table_plot('Name','Other_Sales','Game','Sales in Rest of the World','violet',"max")

***Top 20 publishers with total sales around the world***

In [None]:
table_plot('Publisher','Global_Sales','Publisher','Sales around the world','black',"sum")

***Top 20 publishers with total sales in North America***

In [None]:
table_plot('Publisher','NA_Sales','Publisher','Sales in North America','black',"sum")

***Top 20 publishers with total sales in Europe***

In [None]:
table_plot('Publisher','EU_Sales','Publisher','Sales in Europe','black',"sum")

***Top 20 publishers with total sales in Japan***

In [None]:
table_plot('Publisher','JP_Sales','Publisher','Sales in Japan','black',"sum")

***Top 20 publishers with total sales in rest of the world***

In [None]:
table_plot('Publisher','Other_Sales','Publisher','Sales in Rest of the World','black',"sum")

***Top platforms with most sales around the world*** 

In [None]:
table_plot('Platform','Global_Sales','Publisher','Sales in World','blue',"sum",index=False)

***Sony playstation 2 has most number of sales around the world.***

***Top platforms with most sales in North America*** 

In [None]:
table_plot('Platform','NA_Sales','Platform','Sales in North America','blue',"sum",index=False)

***Xbox 360 has most number of sales in North America.***

***Top platforms with most sales in Europe*** 

In [None]:
table_plot('Platform','EU_Sales','Platform','Sales in Europe','blue',"sum",index=False)

***Sony Playstation 3 has most number of sales in Europe.***

***Top platforms with most sales in Japan*** 

In [None]:
table_plot('Platform','JP_Sales','Platform','Sales in Japan','blue',"sum",index=False)

***Nintendo DS has most number of sales in Japan.***

***Top platforms with most sales in Rest of the World*** 

In [None]:
table_plot('Platform','Other_Sales','Platform','Sales in Rest of the World','blue',"sum",index=False)

***Sony playstation 2 has most number of sales in Rest of the World.***

***Top Genres in World with most sales***

In [None]:
table_plot('Genre','Global_Sales','Genre','Sales in World','green',"sum",index=False)

***Top Genres in North America with most sales***

In [None]:
table_plot('Genre','NA_Sales','Genre','Sales in North America','green',"sum",index=False)

***Top Genres in Europe with most sales***

In [None]:
table_plot('Genre','EU_Sales','Genre','Sales in Europe','green',"sum",index=False)

***Top Genres in Japan with most sales***

In [None]:
table_plot('Genre','JP_Sales','Genre','Sales in Japan','green',"sum",index=False)

***Top Genres in Rest of the World with most sales***

In [None]:
table_plot('Genre','Other_Sales','Genre','Sales in Rest of the World','green',"sum",index=False)

In [None]:
games.head()

In [None]:
#function to plot top 10 games of each genre
def genre_plot(feature):
    genre_filter=games[games.Genre==feature]
    top=genre_filter.groupby(['Rank'])['Name'].max()[:10]
    top.reset_index(drop=True,inplace=True)
    fig=go.Figure(data=[go.Table(header=dict(values=['Rank','Game'],
                                            font=dict(color='black', size=11)),
                                cells=dict(values=[top.index + 1,top],fill_color='red',
                                           font=dict(color='white', size=11)))])
    fig.update_layout(title_text="Top 10 {genre_name} Games".format(genre_name=i))
    fig.show()

In [None]:
for i in games.Genre.unique():
    genre_plot(i)

In [None]:
#function to display top 10 games of each of the top 20 publishers
def publisher_plot(feature):
    genre_filter=games[games.Publisher==feature]
    top=genre_filter.groupby(['Rank'])['Name'].max()[:10]
    top.reset_index(drop=True,inplace=True)
    fig=go.Figure(data=[go.Table(header=dict(values=['Rank','Name'],
                                            font=dict(color='black', size=11)),
                                cells=dict(values=[top.index + 1,top],fill_color='orange',
                                           font=dict(color='white', size=11)))])
    fig.update_layout(title_text="Top 10 {publisher_name} Games".format(publisher_name=i))
    fig.show()

In [None]:
for i in top_publishers.index:
    publisher_plot(i)

*If you liked it do share and upvote.*

*Give your valuable feedback.*

*Happy Learning!!!!!.*