![](https://www.zastavki.com/pictures/originals/2015/Games_Characters_in_the_game_Bayonetta_2_103262_.jpg)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np 
import pandas as pd 

from pandas_profiling import ProfileReport
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

In [None]:
games = pd.read_csv('../input/videogamesales/vgsales.csv')

# Basic information

In [None]:
ProfileReport(games, title = 'Basic information about data')

# Dynamics of sales

For a little more correct representation of the data, I have restored released dates of 22 games, which global sales more than 1 millon $.

In [None]:
games.iloc[179, 3] = 2004
games.iloc[377, 3] = 2004
games.iloc[431, 3] = 2008
games.iloc[470, 3] = 2006
games.iloc[607, 3] = 1978
games.iloc[624, 3] = 2007
games.iloc[649, 3] = 2001
games.iloc[652, 3] = 2008
games.iloc[711, 3] = 2006
games.iloc[782, 3] = 2007
games.iloc[1126, 3] = 2010
games.iloc[1133, 3] = 2007
games.iloc[1303, 3] = 1998
games.iloc[1433, 3] = 2011
games.iloc[1498, 3] = 2008
games.iloc[1513, 3] = 1979
games.iloc[1585, 3] = 1977
games.iloc[1649, 3] = 2003
games.iloc[1697, 3] = 2002
games.iloc[1837, 3] = 2007
games.iloc[1990, 3] = 1999
games.iloc[2019, 3] = 1997

In [None]:
games_sales = games.fillna(0)
games_sales = games_sales.drop(games_sales.query('Year == 0').index, axis = 0)
games_sales['Year'] = games_sales['Year'].astype(int)

In [None]:
games_sales = games_sales.groupby('Year').agg({'NA_Sales': 'sum', 
                                               'EU_Sales': 'sum', 
                                               'JP_Sales': 'sum', 
                                               'Other_Sales': 'sum',
                                               'Global_Sales': 'sum'}).reset_index()
games_sales.head(10)

In [None]:
sns.set_theme(style = 'darkgrid')
plt.figure(figsize = (14,8))
plt.title('Game sales dynamics', size = 16)
plt.xticks(range(1977, 2021, 1), rotation = 75)
plt.yticks(range(0, 401, 25))
plt.ylabel('Sales')
sns.lineplot(x = 'Year', y = 'NA_Sales', data = games_sales, label = 'NA')
sns.lineplot(x = 'Year', y = 'EU_Sales', data = games_sales, label = 'EU')
sns.lineplot(x = 'Year', y = 'JP_Sales', data = games_sales, label = 'JP')
sns.lineplot(x = 'Year', y = 'Other_Sales', data = games_sales, label = 'Other')
plt.show()

From 1994 to 2008, there is a constant positive trend, however, then there is a decline, but most likely this is only due to the fact that the author of the dataset updated it for the last time 4 years ago. Why did 1994 become the beginning of a big sales growth? In many ways, 1994 was a landmark year for the gaming industry, new genres appeared and existing ones were strengthened. There are many new franchises and companies. And most importantly - the PlayStation 1 is coming to the market!

# Game platforms analysis

In [None]:
games_platforms = games.groupby('Platform').agg({'Global_Sales': 'sum'}).sort_values('Global_Sales')
games_platforms.transpose()

In [None]:
plt.figure(figsize = (14,8))
plt.title('TOP-10 game platforms', size = 16)
plt.xticks(range(0, 1301, 100))
sns.barplot(x = 'Global_Sales', y = 'Platform', data = games_platforms.reset_index().tail(10))
plt.xlabel('Global sales')
plt.show()

According to data, the PS2 had the best sales, and this is true, because it is this gaming platform that is still considered the best-selling in the world. Officially, they have sold more than 150 million devices.

****Let's look at the dynamics of released games on TOP-10 gaming platforms****

In [None]:
games_2 = games.fillna(0)
games_2 = games_2.drop(games_2.query('Year == 0').index, axis = 0)
games_2['Year'] = games_2['Year'].astype(int)

In [None]:
best_platforms = games_platforms.tail(10).reset_index()['Platform'].to_list()
g_platform_d = games_2.query('Platform == @best_platforms').groupby(['Year', 'Platform']).agg({'Platform': 'count'}).rename(columns = {'Platform': 'Count'})
g_platform_d

In [None]:
plt.figure(figsize = (14,8))
plt.title('Count of games released on TOP-10 game platforms', size = 16)
sns.lineplot(x = 'Year', y = 'Count', hue = 'Platform', data = g_platform_d)
plt.xticks(range(1985, 2021, 1), rotation = 75)
plt.yticks(range(0, 501, 25))
plt.show()

In general, nothing special, but the count of games released on the DS gaming platform from Nintendo in 2008-2009 is amazing!

# Game publishers analysis

In [None]:
games_publishers = games.groupby('Publisher').agg({'Global_Sales': 'sum'}).sort_values('Global_Sales')
games_publishers.transpose()

In [None]:
plt.figure(figsize = (14,8))
plt.title('TOP-10 game publishers')
plt.xticks(range(0, 1801, 100))
sns.barplot(x = 'Global_Sales', y = 'Publisher', data = games_publishers.reset_index().tail(10))
plt.xlabel('Global sales')
plt.show()

Nintendo is a company that has revolutionized video gaming in the late 20th century. This publisher owns the top 15 best-selling games in the world! Although now Nintendo has lost its place, but it will forever remain in the history of games.

In [None]:
games.loc[:, ['Rank', 'Name', 'Year', 'Publisher', 'Global_Sales']].head(15)

****Let's look at the dynamics of released games of TOP-10 gaming publishers****

In [None]:
best_publishers = games_publishers.tail(10).reset_index()['Publisher'].to_list()
g_publisher_d = games_2.query('Publisher == @best_publishers').groupby(['Year', 'Publisher']).agg({'Publisher': 'count'}).rename(columns = {'Publisher': 'Count'})
g_publisher_d

In [None]:
plt.figure(figsize = (14,8))
plt.title('Count of games released on TOP-10 game publishers', size = 16)
sns.lineplot(x = 'Year', y = 'Count', hue = 'Publisher', data = g_publisher_d)
plt.xticks(range(1980, 2021, 1), rotation = 75)
plt.yticks(range(0, 131, 10))
plt.show()

Based on the data, we can see that the first game publishers were Activision, SEGA and Nintendo. Interesting observation, Activision still holds a leading position together with such giants of the gaming industry as Electronic Arts and Ubisoft.

# Game genre analysis

In [None]:
games_genres = games.groupby('Genre').agg({'Global_Sales': 'sum'}).sort_values('Global_Sales')
games_genres.transpose()

In [None]:
plt.figure(figsize = (14,8))
plt.title('Sales by game genre', size = 16)
plt.xticks(range(0, 1801, 100))
sns.barplot(x = 'Global_Sales', y = 'Genre', data = games_genres.reset_index())
plt.xlabel('Global sales')
plt.show()

****Let's see if the regions have any preferences for the genre of the game****

In [None]:
fig = plt.figure(figsize = (20, 20))

plt.subplot(221)
plt.xticks(rotation = 45)
plt.yticks(range(0, 901, 100))
sns.barplot(x = 'Genre', y = 'NA_Sales', data = games.groupby('Genre').agg({'NA_Sales': 'sum'}).sort_values('NA_Sales').reset_index())
plt.ylabel('North America')

plt.subplot(222)
plt.xticks(rotation = 45)
plt.yticks(range(0, 601, 100))
sns.barplot(x = 'Genre', y = 'EU_Sales', data = games.groupby('Genre').agg({'EU_Sales': 'sum'}).sort_values('EU_Sales').reset_index())
plt.ylabel('Europe')

plt.subplot(223)
plt.xticks(rotation = 45)
plt.yticks(range(0, 401, 25))
sns.barplot(x = 'Genre', y = 'JP_Sales', data = games.groupby('Genre').agg({'JP_Sales': 'sum'}).sort_values('JP_Sales').reset_index())
plt.ylabel('Japan')

plt.subplot(224)
plt.xticks(rotation = 45)
sns.barplot(x = 'Genre', y = 'Other_Sales', data = games.groupby('Genre').agg({'Other_Sales': 'sum'}).sort_values('Other_Sales').reset_index())
plt.ylabel('Other countries')


plt.show()

All regions almost equally prefer the same genres, except for Japan. Japan prefers more Role-Playing than Action.

# The most popular games in each region in the 20th and 21st centuries

****Most popular games in North America in 20th century****

In [None]:
best_games_NA_XX = games.query('Year < 2000').sort_values('NA_Sales').tail(5).loc[:, ['Name', 'Platform', 'Genre', 'Publisher', 'NA_Sales']]
best_games_NA_XX

****Most popular games in North America in 21st century****

In [None]:
best_games_NA_XXI = games.query('Year >= 2000').sort_values('NA_Sales').tail(5).loc[:, ['Name', 'Platform', 'Genre', 'Publisher', 'NA_Sales']]
best_games_NA_XXI

In [None]:
fig = plt.figure(figsize = (12, 12))

plt.subplot(211)
plt.title('Most popular games in North America in 20th century', size = 16)
labels1 = best_games_NA_XX['Name']
fracs1 = best_games_NA_XX['NA_Sales']
plt.pie(fracs1, labels = labels1, autopct = '%.0f%%', shadow = True, textprops = {'size': 'smaller'})

plt.subplot(212)
plt.title('Most popular games in North America in 21st century', size = 16)
labels2 = best_games_NA_XXI['Name']
fracs2 = best_games_NA_XXI['NA_Sales']
plt.pie(fracs2, labels = labels2, autopct = '%.0f%%', shadow = True, textprops = {'size': 'smaller'})

plt.show()

****Most popular games in Europe in 20th century****

In [None]:
best_games_EU_XX = games.query('Year < 2000').sort_values('EU_Sales').tail(5).loc[:, ['Name', 'Platform', 'Genre', 'Publisher', 'EU_Sales']]
best_games_EU_XX

****Most popular games in Europe in 21st century****

In [None]:
best_games_EU_XXI = games.query('Year >= 2000').sort_values('EU_Sales').tail(5).loc[:, ['Name', 'Platform', 'Genre', 'Publisher', 'EU_Sales']]
best_games_EU_XXI

In [None]:
fig = plt.figure(figsize = (12, 12))

plt.subplot(211)
plt.title('Most popular games in Europe in 20th century', size = 16)
labels1 = best_games_EU_XX['Name']
fracs1 = best_games_EU_XX['EU_Sales']
plt.pie(fracs1, labels = labels1, autopct = '%.0f%%', shadow = True, textprops = {'size': 'smaller'})

plt.subplot(212)
plt.title('Most popular games in Europe in 21st century', size = 16)
labels2 = best_games_EU_XXI['Name']
fracs2 = best_games_EU_XXI['EU_Sales']
plt.pie(fracs2, labels = labels2, autopct = '%.0f%%', shadow = True, textprops = {'size': 'smaller'})

plt.show()

****Most popular games in Japan in 20th century****

In [None]:
best_games_JP_XX = games.query('Year < 2000').sort_values('JP_Sales').tail(5).loc[:, ['Name', 'Platform', 'Genre', 'Publisher', 'JP_Sales']]
best_games_JP_XX

****Most popular games in Japan in 21st century****

In [None]:
best_games_JP_XXI = games.query('Year >= 2000').sort_values('JP_Sales').tail(5).loc[:, ['Name', 'Platform', 'Genre', 'Publisher', 'JP_Sales']]
best_games_JP_XXI

In [None]:
fig = plt.figure(figsize = (12, 12))

plt.subplot(211)
plt.title('Most popular games in Japan in 20th century', size = 16)
labels1 = best_games_JP_XX['Name']
fracs1 = best_games_JP_XX['JP_Sales']
plt.pie(fracs1, labels = labels1, autopct = '%.0f%%', shadow = True, textprops = {'size': 'smaller'})

plt.subplot(212)
plt.title('Most popular games in Japan in 21st century', size = 16)
labels2 = best_games_JP_XXI['Name']
fracs2 = best_games_JP_XXI['JP_Sales']
plt.pie(fracs2, labels = labels2, autopct = '%.0f%%', shadow = True, textprops = {'size': 'smaller'})

plt.show()

# End

****Peace, love and play games! :)****