# Game Sales EDA and Visualization

This dataset contains a list of video games with sales greater than 100,000 copies. It was generated by a scrape of vgchartz.com.

Fields include

Rank - Ranking of overall sales

Name - The games name

Platform - Platform of the games release (i.e. PC,PS4, etc.)

Year - Year of the game's release

Genre - Genre of the game

Publisher - Publisher of the game

NA_Sales - Sales in North America (in millions)

EU_Sales - Sales in Europe (in millions)

JP_Sales - Sales in Japan (in millions)

Other_Sales - Sales in the rest of the world (in millions)

Global_Sales - Total worldwide sales.

In [None]:
# data manipulation and visualization
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
import os

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import warnings
warnings.filterwarnings("ignore")

In [None]:
data = pd.read_csv('../input/videogamesales/vgsales.csv',index_col='Rank')
data.head(10)

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
sales = pd.pivot_table(data,values=["Global_Sales"],index=["Year"],columns=["Genre"],margins=False)
plt.figure(figsize=(15,15))
sns.heatmap(sales["Global_Sales"],annot=True,linewidths=.7)
plt.show()

In [None]:
data.groupby("Year")["Name"].count().plot(
kind="bar", y="name", figsize=(15,7))
plt.title("Number of Games Released by Year")
plt.xticks(rotation=45)
plt.show()

In [None]:
data.groupby("Year")["Global_Sales"].count().sort_values(ascending=False).plot(
kind="bar", y="name", figsize=(15,7))
plt.title("Number of Games Released by Year")
plt.xticks(rotation=45)
plt.show()

In [None]:
plt.figure(figsize=(15,15))
sns.countplot(x="Year",hue='Genre', order=data.Year.value_counts().iloc[:10].index, data=data)
plt.xlabel(xlabel='Year')
plt.ylabel(ylabel='Count')
plt.show()

In [None]:
count = pd.pivot_table(data, values=["Global_Sales"],index=["Year"],columns=["Genre"],aggfunc="count", margins=False)

plt.figure(figsize=(15,15))
sns.heatmap(count["Global_Sales"],annot=True,fmt="2.0f",linewidths=.7)
plt.title("Count of Games")
plt.show()

In [None]:
print(data.isnull().sum())
plt.figure(figsize=(10,8))
sns.heatmap(data.isnull(),cmap="terrain",annot=False)
plt.title("Empty Data Visualization Table")
plt.show()

In [None]:
data.dropna(inplace=True,axis=1)

In [None]:
data.isnull().sum()
plt.figure(figsize=(10,8))
sns.heatmap(data.isnull(),cmap="terrain",annot=False)
plt.title("Empty Data Visualization Table")
plt.show()

In [None]:
print(data.describe())
plt.figure(figsize=(10,8))
sns.heatmap(data.describe(),annot=True,fmt="0.001f",cmap="cubehelix")
plt.title("Describe Statistic")
plt.show()

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(data.corr(),annot=True,cmap="ocean")
plt.title("Corralation Table")
plt.show()

In [None]:
df_genre = data[['Genre', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']].groupby(by=['Genre']).sum()
df_genre

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df_genre,fmt="0.1f",annot=True)
plt.show()

In [None]:
plt.figure(figsize=(15,8))
sns.barplot(x=data.Genre, y=data.Global_Sales)
plt.title("Sales rates by global genre")
plt.show()

In [None]:
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from PIL import Image

stopwords = set(STOPWORDS)

for x in data.Genre.unique():
    plt.figure(figsize=(10,8))
    wc = WordCloud(background_color = "black",max_words=2000,
                  stopwords=stopwords, max_font_size=40,
                  random_state=42)
    wc.generate(data.Name[data.Genre == x].to_string())
    plt.imshow(wc)
    plt.title(x)
    plt.axis("off")
    plt.show()

In [None]:
plt.figure(figsize=(15,8))
sns.barplot(x="Platform", y="Global_Sales", data=data)
plt.show()

In [None]:
plt.figure(figsize=(15,8))
sns.countplot(data["Platform"])
plt.title("Platform Counts")
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(data["Genre"])
plt.title("Genre Counts")
plt.show()

In [None]:
df = data.sort_values(by=["Global_Sales"], ascending=False).head(10)
df


In [None]:
plt.figure(figsize=(8,6))
sns.barplot(x=df["Global_Sales"],y=df["Platform"],data=df)
plt.show()

In [None]:
df=data["Genre"].value_counts()
df=pd.DataFrame(df)
df.reset_index(inplace=True)
df.columns.values[0] = "Genre"
df.columns.values[1] = "Counts"
print(df)
plt.figure(figsize=(12,8))
sns.barplot(x="Genre",y="Counts",data=df)
plt.title("Top 10 Genre")
plt.show()

In [None]:
sns.pairplot(data, hue="Genre")
plt.show()

In [None]:
platform = data.groupby(by=["Platform"])["Global_Sales"].sum()
platform = platform.reset_index()
platform = platform.sort_values(by=["Global_Sales"], ascending=False)

plt.figure(figsize=(15,10))
sns.barplot(x="Platform", y="Global_Sales", data=platform)
plt.xticks(rotation=45)
plt.show()