## Import libraries

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
all_data=pd.read_csv("../input/videogamesales/vgsales.csv")
all_data.head()

## Cleaning up data

In [None]:
all_data.isna().sum()


In [None]:
all_data=all_data.drop(columns=['Rank'])
all_data.head()

In [None]:
all_data.dropna(inplace=True)
all_data.isna().sum()

## Analysis

In [None]:
all_data.sort_values('Global_Sales',ascending=False)
# Already sorted according to global sales

### Top 20 Global Sales according to Name

In [None]:
y=all_data.groupby("Name").Global_Sales.sum().sort_values(ascending=False).head(20)
plt.figure(figsize=(5,10))
plt.barh(y.index,y.values,color='orange')
plt.yticks(y.index,size=8)
plt.xlabel('Sales [in millions]')
plt.ylabel('Name')
plt.show()

### Global Sales according to Platform

In [None]:
y=all_data.groupby("Platform").sum()
platform=[pf for pf,df in all_data.groupby('Platform')]
plt.figure(figsize=(15,6))
plt.bar(platform,y['Global_Sales'].sort_values(ascending=False))
plt.xticks(platform,rotation='vertical',size=8)
plt.ylabel('Sales [in millions]')
plt.xlabel('Platform name')
plt.show()

### Top 20 Global Sales according to Publisher

In [None]:
y=all_data.groupby("Publisher").Global_Sales.sum().sort_values(ascending=False).head(20)
plt.figure(figsize=(15,6))
plt.bar(y.index,y.values,color='red')
plt.xticks(y.index,rotation='vertical',size=8)
plt.ylabel('Sales [in millions]')
plt.xlabel('Publisher name')
plt.show()

### Global Sales according to genre

In [None]:
all_data.Genre.unique()

In [None]:
y=all_data.groupby("Genre").Global_Sales.sum().sort_values(ascending=False)
plt.figure(figsize=(15,6))
plt.barh(y.index,y.values,color='green')
plt.yticks(y.index,size=8)
plt.xlabel('Sales [in millions]')
plt.ylabel('Genre')
plt.show()

### Global Sales according to Year 

In [None]:
all_data.Year.unique()

In [None]:

y=all_data.groupby("Year").Global_Sales.sum().sort_values(ascending=False)
plt.figure(figsize=(15,6))
plt.bar(y.index,y.values,color='blue')
plt.xticks(y.index,rotation='vertical',size=8)
plt.ylabel('Sales [in millions]')
plt.xlabel('Year')
plt.show()

In [None]:
y=all_data.groupby("Year").Global_Sales.sum()
plt.figure(figsize=(15,6))
plt.plot(y.index,y.values,color='blue')
plt.grid()
plt.xticks(y.index,rotation='vertical',size=8)
plt.ylabel('Sales [in millions]')
plt.xlabel('Year')
plt.show()

### Genre of games sold in a particular Year

In [None]:
# year=float(input("Enter year of data required : "))
x=all_data.groupby(['Year','Genre']).count()
year=2010
x=x.loc[year].Name
plt.figure(figsize=(15,6))
plt.bar(x.index,x.values,color='#f2d70d')
plt.title("Genres sold in the Year "+str(year))
plt.xticks(x.index,rotation='vertical',size=8)
plt.ylabel('Number of Games')
plt.xlabel('Genre')
plt.show()
print(x)

### Platforms with maximum games

In [None]:
x=all_data.Platform.value_counts()
plt.figure(figsize=(15,5))
plt.bar(x.index,x.values,color='magenta')
plt.xticks(x.index,rotation='vertical',size=8)
plt.ylabel('Number of Games')
plt.xlabel('Platform')
plt.show()

### Year with maximum new games

In [None]:
x=all_data.Year.value_counts()
plt.figure(figsize=(15,5))
plt.bar(x.index,x.values,color='turquoise')
plt.xticks(x.index,rotation='vertical',size=8)
plt.ylabel('Number of Games')
plt.xlabel('Year')
plt.show()

### Genres with maximum games

In [None]:
x=all_data.Genre.value_counts()
plt.figure(figsize=(15,5))
plt.bar(x.index,x.values,color='#13ec32')
plt.xticks(x.index,rotation='vertical',size=8)
plt.ylabel('Number of Games')
plt.xlabel('Genre')
plt.show()


In [None]:
#Variation of action,sports,misc over the years
x=all_data.groupby(['Genre','Year']).Global_Sales.count()
a=x.loc['Action']
s=x.loc['Sports']
m=x.loc['Misc']
plt.figure(figsize=(15,10))
plt.grid()
plt.xticks(a.index,rotation='vertical',size=8)
plt.plot(a.index,a.values,label='Action')
plt.plot(s.index,s.values,label='Sports')
plt.plot(m.index,m.values,label='Misc')
plt.ylabel('Number of Games')
plt.xlabel('Year')
plt.legend()
plt.show()

### Publisher with maximum games [Top 20]

In [None]:
x=all_data.Publisher.value_counts().head(20)
plt.figure(figsize=(15,5))
plt.bar(x.index,x.values,color='#ff6000')
plt.xticks(x.index,rotation='vertical',size=8)
plt.ylabel('Number of Games')
plt.xlabel('Publisher')
plt.show()

### NA vs Japan vs EU vs Other Sales by the year

In [None]:
n=all_data.groupby('Year').NA_Sales.sum()
j=all_data.groupby('Year').JP_Sales.sum()
e=all_data.groupby('Year').EU_Sales.sum()
o=all_data.groupby('Year').Other_Sales.sum()
plt.figure(figsize=(15,10))
plt.grid()
plt.plot(n.index,n.values,label='North America')
plt.plot(j.index,j.values,label='Japan')
plt.plot(e.index,e.values,label='Europe')
plt.plot(o.index,o.values,label='Others')
plt.xticks(n.index,rotation='vertical',size=8)
plt.legend()
plt.ylabel('Sales [in millions]')
plt.xlabel('Year')

### Comparison of Global Sales in different regions by a particular year

In [None]:
# year=float(input("Enter the year: "))
x=all_data.groupby('Year').sum()
x=x.drop(columns='Global_Sales')
year=2006
td=dict(x.loc[year])
plt.figure(figsize=(7,5))
plt.title("Distribution of Game Sales in "+str(year))
plt.pie(td.values(),autopct='%.2f %%',labels=td.keys())
plt.show()

### Most successful publishers in different regions

#### North America

In [None]:
x=all_data.groupby('Publisher').NA_Sales.sum().sort_values(ascending=False)
x.head(20)

#### Europe

In [None]:
x=all_data.groupby('Publisher').EU_Sales.sum().sort_values(ascending=False)
x.head(20)

#### Japan

In [None]:
x=all_data.groupby('Publisher').JP_Sales.sum().sort_values(ascending=False)
x.head(20)