# Video Game Visualization

In [None]:
# Reading in data
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# importing libraries for visualization 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
game_df = pd.read_csv('/kaggle/input/videogame-metacritic/metacritic_18.07.2021_csv.csv')
game_df.head()

# Overview of the Data

In [None]:
# Dimension 30,777 rows (data input) and 7 columns
game_df.shape

In [None]:
game_df.info()

In [None]:
# Checking for NaN value
# Resently released Game might be missing metascore and userscore
game_df.isna().sum()

In [None]:
# Percentage of missing value
print('Missing Data Percentage\n')
for col in game_df.columns:
    print(col,":",round(game_df[col].isna().sum() / game_df.shape[0] * 100,2),"%")

In [None]:
# Convert date to datetime object
game_df['date'] = pd.to_datetime(game_df['date'])

In [None]:
# Looking at the new date column
game_df.head()

In [None]:
# Checking the Data type of date again
game_df.info()

# Platforms

In [None]:
# Counting unique instances of platforms
game_df['platforms'].value_counts()

In [None]:
# Checking unique value of platforms 
game_df['platforms'].unique()

In [None]:
# Cleaning platforms name iOS/n...(Apple Arcade) to just iOS (Apple Arcade)
game_df.loc[game_df['platforms'] == 'iOS\n                                                                                    \xa0(Apple Arcade)','platforms'] = 'iOS (Apple Arcade)'

In [None]:
# Checking the new cleaner unique value of platforms
game_df['platforms'].unique()

In [None]:
# Counting instances of each Platform
plt.figure(figsize=(15,8))
ax  = sns.countplot(data=game_df,x='platforms',order=game_df['platforms'].value_counts().index,palette='Set2')
for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()),
                    (p.get_x(), p.get_height()+10), fontsize=12)
plt.xticks(rotation=90)
plt.show()

### Genre and Platforms

In [None]:
# Top 3 Genre of game on Top 10 Platforms
top3_genre = game_df['genre'].value_counts().iloc[:3].index.to_list()
top10_plats = game_df['platforms'].value_counts().iloc[:10].index.to_list()

plt.figure(figsize=(15,8))
ax  = sns.countplot(data=game_df[(game_df['genre'].isin(top3_genre)) & (game_df['platforms'].isin(top10_plats))],
                    x='platforms',order=game_df['platforms'].value_counts().iloc[:10].index,
                    hue='genre',palette='Set3')
for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()),
                    (p.get_x()+0.02, p.get_height()+10), fontsize=10)
plt.xticks(rotation=90)
plt.show()

# Metascore and Userscore

In [None]:
# Metascore 
game_df['metascore'].unique()

In [None]:
game_df['metascore'].value_counts()

In [None]:
game_df.sort_values(by='metascore',ascending=False)

In [None]:
# Games with highest metascore
game_df[game_df['metascore'] == game_df['metascore'].max()]

In [None]:
plt.figure(figsize=(15,8))

sns.countplot(data=game_df,x='userscore',order=game_df['userscore'].value_counts().index)
plt.xticks(rotation=90)
plt.show()

# Most of the userscore is "tbd"

# Number of Game releases by year

In [None]:
# Number of game released by year

game_df['year'] = pd.DatetimeIndex(game_df['date']).year
num_game_release = game_df.groupby('year').count()['titles'].reset_index()
num_game_release.columns = ['Year','Count']
plt.figure(figsize=(15,8))
ax = sns.lineplot(data=num_game_release,x='Year',y='Count')
plt.show()

In [None]:
plt.figure(figsize=(15,8))
sns.barplot(data=num_game_release,x='Year',y='Count',palette='inferno')
plt.show()

# Game Genre Distribution

In [None]:
# Genre Distribution
plt.figure(figsize=(15,8))
sns.countplot(data=game_df,x='genre',order=game_df['genre'].value_counts().index)
plt.xticks(rotation=90)
plt.show()

In [None]:
# Meta Score and Platform

meta_platform = game_df['metascore'].groupby(game_df['platforms']).mean().sort_values(ascending=False)
meta_platform

In [None]:
# Top 5 Average metascore score by Platform
top5_platform_rating = meta_platform.head(5).index.to_list()
top5_platform_rating

In [None]:
# Number of game that is not listed in the Top 5 Average metascore list
game_df[~game_df['platforms'].isin(top5_platform_rating)].count()['platforms']

In [None]:
# Metascore and Genre

game_df['metascore'].groupby(game_df['genre']).mean().sort_values(ascending=False)

# Released Year and Platforms

In [None]:
# New Dataframe grouping by year and platform 
year_platform = game_df.groupby(['year','platforms']).count()
year_platform

In [None]:
# 2021 - number of title represents the number of game released by particular platform when structure the dataframe like I did
year_platform.loc[2021,'titles'].sort_values(ascending=False)

# PlayStation 

In [None]:
# Games under PlayStation Platform
playstation_df = game_df[game_df['platforms'].str.contains('PlayStation')]
playstation_df.head()

In [None]:
playstation_df.groupby('platforms').count()['titles'].plot.pie(autopct='%1.2f%%',figsize=(15,8),
                                                              explode = (0.4,0.05,0.05,0.05,0.2,0.3),startangle=0,cmap='Set3',
                                                              textprops={'fontweight':'bold','fontsize':12})
plt.ylabel("")
plt.show()

In [None]:
# Top 10 metascore game among PlayStations 
playstation_df.sort_values(by='metascore',ascending = False).head(10)

In [None]:
# Genre Count by playstation 
plt.figure(figsize=(15,8))
sns.countplot(data=playstation_df,x='genre',order=playstation_df['genre'].value_counts().iloc[:10].index,
             palette='Set2')
plt.xticks(rotation=90)
plt.show()

# Xbox

In [None]:
# New dataframe only Xbox platforms
xbox_df = game_df[game_df['platforms'].str.contains("Xbox")]
xbox_df.head()

In [None]:
plt.figure(figsize=(15,8))
xbox_df.groupby('platforms').count()['titles'].plot.pie(autopct='%1.2f%%',figsize=(15,8),
                                                              explode = [0.05]*4,startangle=0,cmap='Set2_r',
                                                              textprops={'fontweight':'bold','fontsize':12})
plt.ylabel("")
plt.show()

In [None]:
# Top 10 metascore game among Xbox 
xbox_df.sort_values(by='metascore',ascending = False).head(10)

In [None]:
# Genre Count by XBox 
plt.figure(figsize=(15,8))
sns.countplot(data=xbox_df,x='genre',order=xbox_df['genre'].value_counts().iloc[:10].index,
             palette='Set1_r')
plt.xticks(rotation=90)
plt.show()

# Thank you for checking out my notebook!