# INTRODUCTION
The video game industry grows day by day. Especially after pandemic, we can say that the industry overpassed the other entertainment industries such as music and cinema.

In this kernel,I have analysed the Video Game Sales and visualized how they are related with other coefficients. 
* For exploring and analysing data, pandas library is used.
* For visualization, wordcloud and plotly is used.

**Content**
1. [Sales of Top 50 Video Games](#0)
1. [Loading Data and Explanation of Features](#1)
1. [Comparison of NA, EU and JP Sales of Top 50 Video Games](#2)
1. [Global Sales vs Rank of top 50 Video Games According to Publisher](#3)
1. [Global Sales of Top 100 Video Games According to Genre vs Published Year](#4)
1. [Genre vs Rank of Top 20 Games in 2010 with Global Sales(Size) and Rank(Color)](#5)
1. [Most Used Platforms of Top 100 Video Games](#6)

<a id="1"></a><br>
# Loading Data and Explanation of Features

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#data visualization
#seaborn
import seaborn as sns

#plotly
import plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go

#word cloud
from wordcloud import WordCloud

#matplotlib
import matplotlib.pyplot as plt

import os
print(os.listdir("../input"))

In [None]:
#loading data
vg_sales=pd.read_csv("../input/videogamesales/vgsales.csv")

In [None]:
#information about data
vg_sales.info()

In [None]:
vg_sales.head(10)

<a id="0"></a><br>
# Sales of Top 50 Video Games

To see outlier and median data, box plots have been used.

In [None]:
#prepare data frame
df=vg_sales.iloc[:50,:]

#trace0
trace0=go.Box(y=df.Global_Sales,
             name='Global Sales',
             marker=dict(color='rgb(12,12,140)'))
trace1=go.Box(y=df.NA_Sales,
             name='NA Sales',
             marker=dict(color='rgb(12,12,140)'))
trace2=go.Box(y=df.EU_Sales,
             name='EU Sales',
             marker=dict(color='rgb(12,12,140)'))
trace3=go.Box(y=df.JP_Sales,
             name='JP Sales',
             marker=dict(color='rgb(12,12,140)'))
data=[trace0,trace1,trace2,trace3]
iplot(data)

<a id="2"></a><br>
# Comparison of NA, EU and JP Sales of Top 50 Video Games

In [None]:


#creating trace1
trace1=go.Scatter(x=df.Rank,y=df.NA_Sales,
                 mode='lines+markers',
                 name='NA Sales',
                 marker=dict(color='rgba(60, 179, 113, 0.8)'),
                  text=df.Name)
#creating trace2
trace2=go.Scatter(x=df.Rank,y=df.EU_Sales,
                 mode='lines+markers',
                 name='EU Sales',
                 marker=dict(color='rgba(255, 0, 0, 0.8)'),
                  text=df.Name)
#creating trace3
trace3=go.Scatter(x=df.Rank,y=df.JP_Sales,
                 mode='lines+markers',
                 name='JP Sales',
                 marker=dict(color='rgba(0, 0, 255, 0.8)'),
                  text=df.Name)
#combining all the traces and visualization
data=[trace1,trace2,trace3]
layout=dict(title='Comparison of NA, EU and JP Sales of Top 50 Video Games',
           xaxis=dict(title='Rank',ticklen=5,zeroline=True))
fig=dict(data=data,layout=layout)
iplot(fig)

**Comparison of NA, EU and JP Sales of Top 20 Video Games**

In [None]:
#Close Up Bar plotting for better visualization
df=vg_sales.iloc[:20,:]
#trace1
trace1=go.Bar(x=df.Rank,y=df.NA_Sales,
             name='NA Sales',
             marker=dict(color='rgba(255,174,255,0.5)',
                        line=dict(color='rgb(0,0,0)',width=1.5)),
             text=df.Name)
#trace2
trace2=go.Bar(x=df.Rank,y=df.EU_Sales,
             name='EU Sales',
             marker=dict(color='rgba(255,255,128,0.5)',
                        line=dict(color='rgb(0,0,0)',width=1.5)),
             text=df.Name)
#trace3
trace3=go.Bar(x=df.Rank,y=df.JP_Sales,
             name='JP Sales',
             marker=dict(color='rgba(0,255,8,0.5)',
                        line=dict(color='rgb(0,0,0)',width=1.5)),
             text=df.Name)
#visualization
data=[trace1,trace2,trace3]
layout=go.Layout(barmode='group')
fig=go.Figure(data=data,layout=layout)
iplot(fig)

In [None]:
import plotly.figure_factory as ff#to scatter plot matrix plot

dataframe=df.loc[:,['NA_Sales','EU_Sales','JP_Sales']]
dataframe['index']=np.arange(1,len(dataframe)+1)

#scatter matrix
fig=ff.create_scatterplotmatrix(dataframe,diag='box',index='index',colormap='Portland',
                               colormap_type='cat', height=700,width=700)
iplot(fig)

<a id="3"></a><br>
# Global Sales vs Rank of top 50 Video Games According to Publisher

In [None]:
df=vg_sales.iloc[:50,:]
df.Publisher.unique()

In [None]:
#there are 5 publisher for top 50
#prepare dataframes
df_nintendo=df[df.Publisher=='Nintendo']
df_microsoft=df[df.Publisher=='Microsoft Game Studios']
df_taketwo=df[df.Publisher=='Take-Two Interactive']
df_sony=df[df.Publisher=='Sony Computer Entertainment']
df_activison=df[df.Publisher=='Activision']

#creating trace1
trace1=go.Bar(x=df_nintendo.Rank,
                 y=df_nintendo.Global_Sales,
                 name='Nintendo',
                 marker=dict(color='rgba(0, 255, 8, 0.8)',
                            line=dict(color='rgb(0, 0, 0)',width=1.5)),
                 text=df_nintendo.Name)
#creating trace2
trace2=go.Bar(x=df_microsoft.Rank,
                 y=df_microsoft.Global_Sales,
                 name='Microsoft Game Studios',
                 marker=dict(color='rgba(0, 243, 255, 0.8)',
                            line=dict(color='rgb(0, 0, 0)',width=1.5)),
                 text=df_microsoft.Name)
#creating trace3
trace3=go.Bar(x=df_taketwo.Rank,
                 y=df_taketwo.Global_Sales,
                 name='Take-Two Interactive',
                 marker=dict(color='rgba(240, 255, 0, 0.8)',
                            line=dict(color='rgb(0, 0, 0)',width=1.5)),
                 text=df_taketwo.Name)
#creating trace4
trace4=go.Bar(x=df_sony.Rank,
                 y=df_sony.Global_Sales,
                 name='Sony Computer Entertainment',
                 marker=dict(color='rgba(255, 4, 0, 0.8)',
                            line=dict(color='rgb(0, 0, 0)',width=1.5)),
                 text=df_sony.Name)
#creating trace5
trace5=go.Bar(x=df_activison.Rank,
                 y=df_activison.Global_Sales,
                 name='Activison',
                 marker=dict(color='rgba(255, 0, 220, 0.8)',
                            line=dict(color='rgb(0, 0, 0)',width=1.5)),
                 text=df_activison.Name)

#combine all the traces
data=[trace1,trace2,trace3,trace4,trace5]
layout=dict(title='Global Sales vs Rank of top 50 Video Games According to Publisher',
           xaxis=dict(title='Rank',ticklen=5,zeroline=False),
           yaxis=dict(title='Global Sales',ticklen=5,zeroline=False))
#visualization
fig=dict(data=data,layout=layout)
iplot(fig)

<a id="4"></a><br>
# Global Sales of Top 100 Video Games According to Genre vs Published Year

In [None]:
df=vg_sales.iloc[:100,:]
df.Genre.unique()

In [None]:
#prepare dataframes
df_sports=df[df.Genre=='Sports']
df_platform=df[df.Genre=='Platform']
df_racing=df[df.Genre=='Racing']
df_rp=df[df.Genre=='Role-Playing']
df_puzzle=df[df.Genre=='Puzzle']
df_misc=df[df.Genre=='Misc']
df_shooter=df[df.Genre=='Shooter']
df_simulation=df[df.Genre=='Simulation']
df_action=df[df.Genre=='Action']
df_fighting=df[df.Genre=='Fighting']
df_adventure=df[df.Genre=='Adventure']

#creating trace1
trace1=go.Scatter(x=df_sports.Year,
                 y=df_sports.Global_Sales,
                 mode='markers',
                 name='Sports',
                 marker=dict(color='rgba(0, 255, 8, 0.8)'),
                 text=df_sports.Name)
#creating trace2
trace2=go.Scatter(x=df_platform.Year,
                 y=df_platform.Global_Sales,
                 mode='markers',
                 name='Platform',
                 marker=dict(color='rgba(0, 243, 255, 0.8)'),
                 text=df_platform.Name)
#creating trace3
trace3=go.Scatter(x=df_racing.Year,
                 y=df_racing.Global_Sales,
                 mode='markers',
                 name='Racing',
                 marker=dict(color='rgba(240, 255, 0, 0.8)'),
                 text=df_racing.Name)
#creating trace4
trace4=go.Scatter(x=df_rp.Year,
                 y=df_rp.Global_Sales,
                 mode='markers',
                 name='Role Playing',
                 marker=dict(color='rgba(255, 4, 0, 0.8)'),
                 text=df_rp.Name)
#creating trace5
trace5=go.Scatter(x=df_puzzle.Year,
                 y=df_puzzle.Global_Sales,
                 mode='markers',
                 name='Puzzle',
                 marker=dict(color='rgba(255, 0, 220, 0.8)'),
                 text=df_puzzle.Name)
#creating trace6
trace6=go.Scatter(x=df_misc.Year,
                 y=df_misc.Global_Sales,
                 mode='markers',
                 name='Misc',
                 marker=dict(color='rgba(65, 113, 60, 0.8)'),
                 text=df_misc.Name)
#creating trace7
trace7=go.Scatter(x=df_shooter.Year,
                 y=df_shooter.Global_Sales,
                 mode='markers',
                 name='Shooter',
                 marker=dict(color='rgba(177, 148, 51, 0.8)'),
                 text=df_shooter.Name)
#creating trace8
trace8=go.Scatter(x=df_simulation.Year,
                 y=df_simulation.Global_Sales,
                 mode='markers',
                 name='Simulation',
                 marker=dict(color='rgba(28, 52, 167, 0.8)'),
                 text=df_simulation.Name)
#creating trace9
trace9=go.Scatter(x=df_action.Year,
                 y=df_action.Global_Sales,
                 mode='markers',
                 name='Action',
                 marker=dict(color='rgba(173, 133, 225, 0.8)'),
                 text=df_action.Name)
#creating trace10
trace10=go.Scatter(x=df_fighting.Year,
                 y=df_fighting.Global_Sales,
                 mode='markers',
                 name='Fighting',
                 marker=dict(color='rgba(3, 250, 202, 0.8)'),
                 text=df_fighting.Name)
#creating trace11
trace11=go.Scatter(x=df_adventure.Year,
                 y=df_adventure.Global_Sales,
                 mode='markers',
                 name='Adventure',
                 marker=dict(color='rgba(155, 0, 255, 0.8)'),
                 text=df_adventure.Name)

#combining traces & visualization
data=[trace1,trace2,trace3,trace4,trace5,trace6,trace7,trace8,trace9,trace10,trace11]
layout=dict(title='Global Sales of Top 100 Video Games According to Genre vs Published Year',
            xaxis=dict(title='Years',ticklen=5,zeroline=False),
            yaxis=dict(title='Global Sales',ticklen=5,zeroline=False))
fig=dict(data=data,layout=layout)
iplot(fig)

**Global Sales Rate of Top 100 Video Games According to Genre as Pie Plot**

In [None]:
pie1=df.Global_Sales
labels=df.Genre

#figure
fig={
    'data':[
        {
            'values':pie1,
            'labels':labels,
            'domain':{'x':[0,.5]},
            'name':'Global Sales Rate',
            'hoverinfo':'label+percent+name',
            'hole':.3,
            'type':'pie'
        },
    ],
    'layout':{
        'title':'Global Sales Rate of Top 100 Video Games According to Genre',
        'annotations':[
            {'font':{'size':20},
             'showarrow':False,
             'text':'',
             'x':0.20,
             'y':1
            },
        ]
    }
}
iplot(fig)

**Global Sales Rate of All Video Games According to Genre**

In [None]:
pie1=vg_sales.Global_Sales
labels=vg_sales.Genre

#figure
fig={
    'data':[
        {
            'values':pie1,
            'labels':labels,
            'domain':{'x':[0,.5]},
            'name':'Global Sales Rate',
            'hoverinfo':'label+percent+name',
            'hole':.3,
            'type':'pie'
        },
    ],
    'layout':{
        'title':'Global Sales Rate of All Video Games According to Genre',
        'annotations':[
            {'font':{'size':20},
             'showarrow':False,
             'text':'',
             'x':0.20,
             'y':1
            },
        ]
    }
}
iplot(fig)

In [None]:
#we can use wordcloud to visualize the same data

plt.subplots(figsize=(10,10))
wordcloud=WordCloud(background_color='white',
                   width=512,
                   height=384
                   ).generate(' '.join(vg_sales.Genre))
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('graph.png')
plt.show()

<a id="5"></a><br>
# Genre vs Rank of Top 20 Games in 2010 with Global Sales(Size) and Rank(Color)

In [None]:
#to convert year data to int(it was float & null)
vg_sales['Year']=vg_sales['Year'].fillna(0)
vg_sales.Year.astype(int)

In [None]:
#data preparation
df_2010=vg_sales[vg_sales.Year==2010].iloc[:20,:]
sales_size=df_2010.Global_Sales
rank_color=df_2010.Rank

data=[
    {
        'y':df_2010.Genre,
        'x':df_2010.Rank,
        'mode':'markers',
        'marker':{
            'color':rank_color,
            'size':sales_size,
            'showscale':True,
        },
        'text':df_2010.Name
    }
]
iplot(data)

In [None]:
#for 3d visualization
trace=go.Scatter3d(x=df_2010.Rank,
                  y=df_2010.Global_Sales,
                  z=df_2010.Genre,
                  mode='markers',
                  marker=dict(size=10,color=df_2010.Rank,colorscale='magenta'))
data=[trace]
layout=go.Layout(margin=dict(l=0,b=0,r=0,t=0))
fig=go.Figure(data=data,layout=layout)
iplot(fig)

<a id="6"></a><br>
# Most Used Platforms of Top 100 Video Games

In [None]:
#prepare data
df=vg_sales.iloc[:100,:]

#trace
trace=go.Histogram(x=df.Platform,
                   opacity=1,
                   name='Platforms',
                   marker=dict(color='rgba(17,50,96,0.6)')
                  )
layout=go.Layout(barmode='overlay',
                title='Platforms of Top 100 Video Games',
                xaxis=dict(title='Platforms'),
                yaxis=dict(title='Count'))
#visualization
fig=go.Figure(data=trace,layout=layout)
iplot(fig)