In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from wordcloud import WordCloud

import plotly.graph_objects as go



# Data Importing

In [None]:
df = pd.read_csv('../input/videogamesales/vgsales.csv')
df.head()

In [None]:
df.info()

In [None]:
df.isna()

In [None]:
df.describe()

In [None]:
df = df.rename(columns = {df.columns[0]: "Game_Rank"}, inplace=False)

In [None]:
df.head()

In [None]:
drop_row = df[df['Year'] > 2015].index
df = df.drop(drop_row)

*There is not enough data to evaluate after 2015 so i dropped to data after that year*

In [None]:
df['Genre'].value_counts()

In [None]:
df['Publisher'].value_counts()

In [None]:
df['Platform'].value_counts()

# How many unique "Games, Publisher, Platforms, Genres" we have ?

In [None]:
games = df['Name'].unique()
publisher = df['Publisher'].unique()
platforms = df['Platform'].unique()
genres = df['Genre'].unique()

In [None]:
fig = go.Figure()
fig.add_trace(go.Indicator(
    mode = "number",
    value = len(games),
    title = {'text': "Games",'font': {'color': 'gold','size':20}},
    number={'font':{'color': 'gold','size':50}},
    domain = {'row': 0, 'column': 0}
))
fig.add_trace(go.Indicator(
    mode = "number",
    value = len(publisher),
    title = {'text': "Publishers",'font': {'color': 'green','size':20}},
    number={'font':{'color': 'green','size':50}},
    domain = {'row': 0, 'column': 1}
))

fig.add_trace(go.Indicator(
    mode = "number",
    value = len(platforms),
    title = {'text': "Platforms",'font': {'color': 'darkviolet','size':20}},
    number={'font':{'color': 'darkviolet','size':50}},
    domain = {'row': 0, 'column': 2}
))

fig.add_trace(go.Indicator(
    mode = "number",
    value = len(genres),
    title = {'text': "Genres",'font': {'color': 'deepskyblue','size':20}},
    number={'font':{'color': 'deepskyblue','size':50}},
    domain = {'row': 0, 'column': 3}
))

fig.update_layout(
    grid = {'rows': 1, 'columns': 4, 'pattern': "independent"})
fig.show()

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot(x="Platform", data=df, order = df['Platform'].value_counts().index)


In [None]:
plt.figure(figsize=(15, 10))
sns.countplot(x="Genre", data=df, order = df['Genre'].value_counts().index)


In [None]:
plt.figure(figsize=(15, 10))
sns.countplot(x="Year", data=df, order = df.groupby(by=['Year'])['Name'].count().sort_values(ascending=True).index)
plt.xticks(rotation = 90)

# Top 3 years game releases by genre.

In [None]:
plt.figure(figsize=(30, 10))
sns.countplot(x="Year", data=df, hue='Genre', order=df.Year.value_counts().iloc[:3].index)
plt.xticks(size=25, rotation=0)

# Highest Sales Numbers Worldwide

In [None]:
data_year = df.groupby(by=['Year'])['Global_Sales'].sum()
data_year = data_year.reset_index()
data_year.sort_values(by=['Global_Sales'], ascending=False)

In [None]:
plt.figure(figsize=(15, 10))
sns.barplot(x="Year", y="Global_Sales", data=data_year)
plt.xticks(rotation=90)

In [None]:
plt.figure(figsize=(8,8))
sns.heatmap(df.corr(),annot=True, cmap='Blues')
plt.show()

In [None]:
labels= df.Genre.value_counts().index
explode = [0,0,0,0,0,0,0,0,0,0,0,0]
sizes = df.Genre.value_counts().values
# visual
plt.figure(figsize = (7,7))
plt.pie(sizes, explode=explode, labels=labels, colors=sns.color_palette('Set2'), autopct='%1.1f%%')
plt.title('Games According to Genre',fontsize = 17,color = 'green')

In [None]:
#World rank of the top 100 video games, japan sales and europe sales.
df=df.loc[:99,:] # data.iloc[:100,:] -- data.head(100)

import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

trace1=go.Scatter(
                x=df.Game_Rank,
                y=df.JP_Sales,
                mode="lines+markers",
                name="JP Sales",
                marker= dict(color = 'rgba(158, 90, 10, 0.7)'),
                text=df.Name)
trace2=go.Scatter(
                x=df.Game_Rank,
                y=df.EU_Sales,
                mode="lines",
                name="Europe Sales",
                marker=dict(color = 'rgba(56, 140, 200, 0.7)'),
                text=df.Name)

edit_df=[trace1,trace2]
layout=dict(title="World rank of the top 100 video games, japan and europe sales .",
            xaxis=dict(title="World Rank",tickwidth=5,ticklen=8,zeroline=False))
fig=dict(data=edit_df,layout=layout)
iplot(fig)

In [None]:
# data preparation
dataGenre_list=list(df.Genre.unique())
na_sales=[]
eu_sales=[]
jp_sales=[]
other_sales=[]
global_sales=[]
for i in dataGenre_list:
    val=df[df.Genre==i]
    na_sales.append(val.NA_Sales.mean())
    eu_sales.append(val.EU_Sales.mean())
    jp_sales.append(val.JP_Sales.mean())
    other_sales.append(val.Other_Sales.mean())
    global_sales.append(val.Global_Sales.mean())
    
df1=pd.DataFrame({"Genre":dataGenre_list,"NA_Sales":na_sales,"EU_Sales":eu_sales,"JP_Sales":jp_sales,"Other_Sales":other_sales,"Global_Sales":global_sales})

# data visualization
trace1=go.Scatter(
                x=df1.Genre,
                y=df1.NA_Sales,
                mode="lines+markers",
                name="North America Sales",
                marker= dict(color = 'grey'))
trace2=go.Scatter(
                x=df1.Genre,
                y=df1.EU_Sales,
                mode="lines",
                name="Europe Sales",
                marker=dict(color = 'blue'))
trace3=go.Scatter(
                x=df1.Genre,
                y=df1.JP_Sales,
                mode="lines+markers",
                name="Japan Sales",
                marker=dict(color = 'orange'))
trace4=go.Scatter(
                x=df1.Genre,
                y=df1.Other_Sales,
                mode="lines",
                name="Other Sales",
                marker=dict(color = 'green'))
trace5=go.Scatter(
                x=df1.Genre,
                y=df1.Global_Sales,
                mode="lines+markers",
                name="Global Sales",
                marker=dict(color = 'black'))
edit_df=[trace1,trace2,trace3,trace4,trace5]
layout=dict(title="The genres according to the average sales",
            xaxis=dict(title="  Genre of Video Games",tickwidth=5,ticklen=8,zeroline=False))
fig=dict(data=edit_df,layout=layout)
plt.savefig('graph.png')
iplot(fig)

In [None]:
# Which video games is mentioned most at Genre of Action..
# data prepararion
w_data = df[df.Genre == 'Action'].iloc[:50,:]
plt.subplots(figsize=(8,8))
wordcloud = WordCloud(
                        background_color='#f2f2f2',
                        width=532,
                        height=374
                     ).generate(" ".join(w_data.Name))
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('graph.png')
plt.show()

# Thank you !!!


Feel free to make a comment about this notebook. I'm open to suggestions.

![](https://www.google.com/url?sa=i&url=https%3A%2F%2Fwww.jvstoronto.org%2Fblog%2Fafter-the-interview-write-a-good-thank-you-letter%2F&psig=AOvVaw1baz-CGnMWqUG_0kEt_tw9&ust=1612090971797000&source=images&cd=vfe&ved=0CAIQjRxqFwoTCMiL54TBw-4CFQAAAAAdAAAAABAD)
