# ***Video Game Sales***
This dataset contains a list of video games with sales greater than 100,000 copies. It was generated by a scrape of [vgchartz.com](https://www.vgchartz.com/).

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns  # visualization tool
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

 ****Read Data****

In [None]:
data= pd.read_csv('../input/videogamesales/vgsales.csv')

Show the top 5 data :

In [None]:
data.head()

The properties of the data are as follows: 
* **Rank** - Ranking of overall sales

* **Name** - The games name

* **Platform** - Platform of the games release (i.e. PC,PS4, etc.)

* **Year** - Year of the game's release

* **Genre** - Genre of the game

* **Publisher** - Publisher of the game

* **NA_Sales** - Sales in North America (in millions)

* **EU_Sales** - Sales in Europe (in millions)

* **JP_Sales** - Sales in Japan (in millions)

* **Other_Sales** - Sales in the rest of the world (in millions)

* **Global_Sales** - Total worldwide sales.

Content of the data is as follows:

In [None]:
data.info()
    

Run describe() method :

In [None]:
data.describe()

# Correlation Map 

In [None]:
f,ax=plt.subplots()
sns.heatmap(data.corr(),annot=True,linewidths=5,fmt='.1f',ax=ax)
plt.show()

 # Sales by years 

In [None]:
plt.bar(data.Year,data.Global_Sales,align='center',label='Global Sales')
plt.bar(data.Year,data.Other_Sales,align='center',label='Other Sales')
plt.legend(loc='upper right')  
plt.xlabel('Year')
plt.ylabel('Sales')
plt.title('Sales by years')
plt.show()


In [None]:
data.plot(kind='scatter',x='EU_Sales',y='Global_Sales',alpha=0.5,color='green')
plt.xlabel('EU Sales')
plt.ylabel('Global Sales')
plt.title('EU Sales Global Sales Scatter Plot')
plt.show()

In [None]:
data.Year.plot(kind='hist', bins = 25,figsize = (12,12))
plt.show()

# ****Sales compearison by platform****

In [None]:
plt.figure(figsize=(30, 15))
sns.barplot(x='Platform',y='Global_Sales',data=data)
plt.xlabel('Platform')
plt.ylabel('Global Sales')
plt.title('Global Sales compearison by platform')
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.show()

In [None]:
 melted=pd.melt(frame=data,id_vars='Name',value_vars=['Genre','Platform'])
melted

# Pivot Table 

Average global sales by platform and genres


In [None]:
mean_sales = data.pivot_table('Global_Sales',index='Genre',columns='Platform',aggfunc='mean')

mean_sales



# Group by Genre

In [None]:
sales_by_genre= data.groupby('Genre').size()
sales_by_genre[:12]

# Filter for genre

<p>Filtering genres with more than 1000 games of genres</p>

In [None]:
filter_genre = sales_by_genre.index[sales_by_genre>=1000]
filter_genre[:10]

# Average of video games genres as globally sales 

In [None]:
mean_sales= mean_sales.loc[filter_genre]
mean_sales