# Overview

This notebook is to show how we can produce visualization using e.g. Pandas, Matplotlib, and Seaborn.
There are several plots which in my opinion produced best by using a particular library.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df_orig = pd.read_csv('/kaggle/input/videogamesales/vgsales.csv') # opening the original dataframe
df = df_orig.copy() # we make a copy

df.info() # find the information of the dataframe

In [None]:
df

Plot

1. Scatter
2. Bar
3. Line
4. Pie
5. Boxplot
6. Correlation Matrix
7. Histogram
8. Stacked Bar
10. Pairplot

Some of them will be using Pandas, then Matplotlib, then SNS. Some of them will be only using one particular library.

In [None]:
gsales = df['Global_Sales']
nasales = df['NA_Sales']
jpsales = df['JP_Sales']
eusales = df['EU_Sales']
osales = df['Other_Sales']
platform = df['Platform']
genre = df['Genre']
publisher = df['Publisher']
year = df['Year']

# Scatter Plot

In [None]:
# Pandas

df.plot(kind='scatter', x = 'Year', y = 'Global_Sales', figsize = (20,10))
plt.show()

In [None]:
# Matplotlib

plt.figure(figsize=(20,10))
plt.scatter(year, gsales)
plt.title('Year X Global Sales')
plt.xlabel('Year')
plt.ylabel('Global Sales')
plt.show()

In [None]:
# Seaborn

plt.figure(figsize=(20,10))
sns.scatterplot(x = 'Year', y = 'Global_Sales', data = df)
plt.show()

# Bar Plot

In [None]:
df = df.dropna()

In [None]:
df = df.head(200)

In [None]:
df

In [None]:
# Pandas

df.plot(kind='bar', x = 'Year', y = 'Global_Sales', figsize = (20,10))
plt.show()

In [None]:
# Matplotlib

plt.figure(figsize=(20,10))
plt.bar(year, gsales)
plt.show()

In [None]:
# Seaborn

plt.figure(figsize=(20,10))
sns.barplot(x = 'Year', y = 'Global_Sales', data = df)
plt.show()

# Line Plot

In [None]:
# Pandas

df.groupby(['Year'])['Global_Sales'].sum().plot(kind = 'line', x = 'Year', y = 'Global_Sales', figsize = (20,10))
plt.show()

In [None]:
# Matplotlib

plt.figure(figsize=(20,10))
plt.plot(df.groupby(['Year'])['Global_Sales'].sum())
plt.show()

In [None]:
# Seaborn

plt.figure(figsize=(20,10))
sns.lineplot(x = 'Year', y = 'Global_Sales', data = df)
plt.show()

# Pie Plot

In [None]:
df = df.head(100)

In [None]:
# Pandas

# df.plot(kind = 'pie', x = 'Genre', y = 'Global_Sales', figsize = (10,10)) # THE OLD WAY OF PLOTTING PIE PLOT WITH THE STANDARD METHOD DOES NOT WORK

df.groupby(['Genre']).sum().plot(kind = 'pie', y = 'Global_Sales', figsize = (10,10)) # pandas groupby
plt.show()

In [None]:
# Matplotlib

plt.figure(figsize = (10,10))
plt.pie(df.groupby(['Genre'])['Global_Sales'].sum())
plt.show()

# This is the part where I'm stuck. I don't know how to provide the legend/label to the matplotlib pie plot. Can somebody help?

In [None]:
# Seaborn: There's no 'direct' library for Seaborn pie chart as far as I know.

# Boxplot

In [None]:
# Pandas
# pd.DataFrame.boxplot(df) # the standard way of plotting boxplot on the dataframe

df.plot(kind = 'box', y = ['JP_Sales', 'EU_Sales', 'NA_Sales', 'Other_Sales'], figsize=(20,10))
plt.show()

In [None]:
# Matplotlib

plt.figure(figsize = (20,10))
plt.boxplot([jpsales, eusales, nasales, osales])
plt.show()

In [None]:
# Seaborn

plt.figure(figsize=(20,10))
sns.boxplot(data = df[['JP_Sales', 'EU_Sales', 'NA_Sales', 'Other_Sales']])
plt.show()

# Correlation Matrix

In [None]:
df.corr() # degree of correlation from -1 to +1 (negative correlation to positive correlation)

In [None]:
# I'm just gonna go with Seaborn, it's simple and fast

plt.figure(figsize = (10,10))
sns.heatmap(df.corr(), annot=True)
plt.show()

# Histogram

In [None]:
# Pandas

df.hist(figsize=(20,20))
plt.show()

In [None]:
# Matplotlib: I don't think neither Seaborn nor Matplotlib does better than just simple df.hist()

# Stacked Bar Chart

In [None]:
df.groupby(['Year'])['JP_Sales', 'EU_Sales', 'NA_Sales', 'Other_Sales'].sum().plot(kind = 'bar', figsize = (20,10), stacked = True)
plt.show()

In [None]:
# Matplotlib

fig, ax = plt.subplots(figsize=(20,10))
ax.bar(year, jpsales, label = 'Japan Sales')
ax.bar(year, eusales, bottom = jpsales, label = 'EU Sales')
ax.bar(year, nasales, bottom = jpsales + eusales, label = 'NA Sales')
ax.bar(year, osales, bottom = jpsales + eusales + nasales, label = 'Other Sales')
plt.legend()
plt.show()

# I'm also a bit lost here as the coding is much longer than Pandas. I'm gonna avoid matplotlib then.

In [None]:
# Seaborn, too complex. I'm just gonna avoid it.
# see for example https://stackoverflow.com/questions/59038979/stacked-bar-chart-in-seaborn

# Pair Plot

In [None]:
# Seaborn is enough for producing pairplots

plt.figure(figsize=(10,10))
sns.pairplot(df)
plt.show()

# the output might not be meaningful, but the point is just to display the pairplot itself