<a href="https://colab.research.google.com/github/Zurezh/Python_LearnByDoing/blob/master/Module_6_Advanced_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Module 6 - Advanced Visualization

1. Category Data Type
2. JointPlots
3. Histograms
4. Staked Histograms
5. KDE Plot
6. Subplots()
7. Violinplots vs Boxplots
8. Faced Grid
9. Coordinates and Diagonals
10. Building Dashboard in Python
11. Styling Tips
12. Finishing Touches
13. Exercise

## 1. Category Data Type

In [None]:
import pandas as pd

In [None]:
movies = pd.read_csv('https://raw.githubusercontent.com/Zurezh/Python_LearnByDoing/master/P4-Movie-Ratings.csv')

In [None]:
len(movies)

In [None]:
movies.head()

In [None]:
movies.columns

In [None]:
# Renaming Columns
movies.columns = ['Film', 'Genre', 'CriticRating', 'AudienceRating',
       'BudgetMillions', 'Year']

In [None]:
movies.head()

In [None]:
movies.info()

In [None]:
movies.describe()
# Year is more of category rather than a number
# In reality, Mean and Std of Year doesn't make sense

In [None]:
# Change the column in category data
movies.Film = movies.Film.astype('category')

In [None]:
movies.info()

In [None]:
# Change the column in category data
movies.Genre = movies.Genre.astype('category')
movies.Year = movies.Year.astype('category')

In [None]:
movies.info()

In [None]:
# Quick way to find unique values for categories
movies.Genre.cat.categories

In [None]:
movies.describe()
# Year is not displayed as we changed to the Category Data

## 2. Joint Plots

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

# JointPlots
j = sns.jointplot(data = movies, x = 'CriticRating',y = 'AudienceRating')

In [None]:
# Adding few parameters
j = sns.jointplot(data = movies, x = 'CriticRating',y = 'AudienceRating', kind='hex')

In [None]:
# Chart 1

## 3. Histograms

In [None]:
# Using Seaborn
m1 = sns.distplot(movies.AudienceRating,bins=15)

In [None]:
m2 = sns.distplot(movies.CriticRating,bins=15)

In [None]:
# Using Pyplot
sns.set_style('darkgrid') # sns.set_style('white')
n1 = plt.hist(movies.AudienceRating,bins=15)

In [None]:
n2 = plt.hist(movies.CriticRating,bins=15)

## Staked Histograms

In [None]:
plt.hist(movies.BudgetMillions)
plt.show()

In [None]:
# Filter the dataset
plt.hist(movies[movies.Genre == 'Drama'].BudgetMillions)
plt.show()

In [None]:
# Filter the dataset
plt.hist(movies[movies.Genre == 'Action'].BudgetMillions, bins=15)
plt.hist(movies[movies.Genre == 'Drama'].BudgetMillions, bins = 15)
plt.hist(movies[movies.Genre == 'Thriller'].BudgetMillions, bins = 15)
plt.show()

In [None]:
# Stake the histogram
plt.hist([movies[movies.Genre == 'Action'].BudgetMillions, \
          movies[movies.Genre == 'Drama'].BudgetMillions, \
          movies[movies.Genre == 'Thriller'].BudgetMillions], \
         bins = 15, stacked = True)
plt.show()

In [None]:
movies.Genre.cat.categories

In [None]:
# Optimized for Genres
for gen in movies.Genre.cat.categories:
  print(gen)

In [None]:
# Optimized for Genres

list1 = []
myLabels = []

for gen in movies.Genre.cat.categories:
  list1.append(movies[movies.Genre == gen].BudgetMillions)
  myLabels.append(gen)

h = plt.hist(list1,bins=30,stacked=True,rwidth=1,label=myLabels)
plt.legend()
plt.show()

## 6. KDE Plot

In [None]:
# Kernel Density Estimate Plot

In [None]:
vis1 = sns.lmplot(data=movies,x='CriticRating',y='AudienceRating',fit_reg=False,hue='Genre',aspect = 1)

In [None]:
#k1 = sns.kdeplot(movies.CriticRating,movies.AudienceRating,cmap = 'Reds')
k1 = sns.kdeplot(movies.CriticRating, movies.AudienceRating, shade=True, 
    cbar=True, shade_lowest=False, cmap='Reds')

#Tip:
#,shade_lowest=False, shade = True
#k1b = sns.kdeplot(movies.CriticRating,movies.AudienceRating, cmap = 'Reds')

In [None]:
k1 = sns.kdeplot(movies.CriticRating,movies.AudienceRating, \
                 shade = True,shade_lowest=False,cmap = 'Reds')

#Tip:
k1b = sns.kdeplot(movies.CriticRating,movies.AudienceRating, cmap = 'Reds')

## 7. Working with Subplots

In [None]:
sns.set_style('dark')
k1 = sns.kdeplot(movies.BudgetMillions,movies.AudienceRating)

In [None]:
k2 = sns.kdeplot(movies.BudgetMillions,movies.CriticRating)

In [None]:
f, axes = plt.subplots(1,2,figsize=(12,6), sharex = True, sharey=True)
k1 = sns.kdeplot(movies.BudgetMillions,movies.AudienceRating,ax=axes[0])
k2 = sns.kdeplot(movies.BudgetMillions,movies.CriticRating,ax=axes[1])
k1.set(xlim=(-20,160))

## 7. ViolinPlots vs BoxPlots

In [None]:
# BoxPlot
z = sns.boxplot(data=movies,x='Genre',y='CriticRating')

In [None]:
# ViolinPlots
z = sns.violinplot(data=movies,x='Genre',y='CriticRating')

In [None]:
z = sns.boxplot(data=movies[movies.Genre=='Drama'],x='Year',y='CriticRating')

In [None]:
z = sns.violinplot(data=movies[movies.Genre=='Drama'],x='Year',y='CriticRating')

## 8. Facet Grids

In [None]:
#g = sns.FacetGrid(movies,row='Genre',hue='Genre')
g = sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')

In [None]:
g = sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
g = g.map(plt.scatter,'CriticRating','AudienceRating')

In [None]:
# can populate with any type of chart: Example Histogram
g = sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
g = g.map(plt.hist,'BudgetMillions')

In [None]:
#back to scatterplots
g = sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
kws = dict(s=50,linewidth=0.5,edgecolor='black')
g = g.map(plt.scatter,'CriticRating','AudienceRating',**kws)


## 9. Coordinates and Diagonals

In [None]:
# Controlling Axes and Adding Diagonals
g = sns.FacetGrid(movies,row='Genre',col='Year',hue='Genre')
kws = dict(s=50,linewidth=0.5,edgecolor='black')
g = g.map(plt.scatter,'CriticRating','AudienceRating',**kws)
g.set(xlim=(0,100),ylim=(0,100))
for ax in g.axes.flat:
  ax.plot((0,100),(0,100),c='gray',ls='--')
g.add_legend()

## 10. Building Dashboard

In [None]:
# Seaborn Plots Only
sns.set_style('darkgrid')
f, axes = plt.subplots(2,2,figsize=(15,15))

k1 = sns.kdeplot(movies.BudgetMillions,movies.AudienceRating,ax=axes[0,0])
k2 = sns.kdeplot(movies.BudgetMillions,movies.CriticRating,ax=axes[0,1])
z = sns.violinplot(data=movies,x='Genre',y='CriticRating',ax=axes[1,0])
k4 = sns.kdeplot(movies.CriticRating,movies.AudienceRating, shade = True,shade_lowest=False,cmap = 'Reds', ax=axes[1,1])
k4b = sns.kdeplot(movies.CriticRating,movies.AudienceRating, cmap = 'Reds', ax=axes[1,1])

k1.set(xlim=(-20,160))
k2.set(xlim=(-20,160))
plt.show()

In [None]:
# Seaborn Plots + MatlabPlot 
sns.set_style('darkgrid')
f, axes = plt.subplots(2,2,figsize=(15,15))

k1 = sns.kdeplot(movies.BudgetMillions,movies.AudienceRating,ax=axes[0,0])
k2 = sns.kdeplot(movies.BudgetMillions,movies.CriticRating,ax=axes[0,1])
z = sns.violinplot(data=movies,x='Year',y='BudgetMillions',ax=axes[1,0])
axes[1,1].hist(movies.CriticRating,bins=15)

k1.set(xlim=(-20,160))
k2.set(xlim=(-20,160))
plt.show()

## 11. Styling Tips

In [None]:
# Seaborn Plots Only
sns.set_style('dark',{'axes.facecolor':'black'}) #white, whitegrid, dark, darkgrid, tick
f, axes = plt.subplots(2,2,figsize=(15,15))

# Plot 0,0
k1 = sns.kdeplot(movies.BudgetMillions,movies.AudienceRating,
                 shade=True, shade_lowest=True, cmap='inferno',
                 ax=axes[0,0])
k1b = sns.kdeplot(movies.BudgetMillions,movies.AudienceRating,
                  cmap='cool', ax=axes[0,0])

# Plot 0,1
k2 = sns.kdeplot(movies.BudgetMillions,movies.CriticRating,
                 shade=True, shade_lowest=True, cmap='inferno',
                 ax=axes[0,1])
k2b = sns.kdeplot(movies.BudgetMillions,movies.CriticRating,
                cmap='cool', ax=axes[0,1])

# Plot 1,0
z = sns.violinplot(data=movies,x='Year',y='BudgetMillions',
                   palette='YlOrRd',ax=axes[1,0])

# Plot 1,1
k4 = sns.kdeplot(movies.CriticRating,movies.AudienceRating, 
                 shade = True,shade_lowest=False,cmap = 'Blues_r', 
                 ax=axes[1,1])
k4b = sns.kdeplot(movies.CriticRating,movies.AudienceRating, 
                  cmap = 'gist_gray_r', ax=axes[1,1])

k1.set(xlim=(-20,160))
k2.set(xlim=(-20,160))
plt.show()

## 12. Finishing Touches

In [None]:
#Themetic Edits

In [None]:
list1 = []
myLabels = []

for gen in movies.Genre.cat.categories:
  list1.append(movies[movies.Genre == gen].BudgetMillions)
  myLabels.append(gen)

sns.set_style('whitegrid')

# Handy Trick to resize the plots
fig, ax = plt.subplots()
fig.set_size_inches(11.7,8.27) #Size of A4 paper

h = plt.hist(list1,bins=30,stacked=True,rwidth=1,label=myLabels)
plt.title('Movie Budget Distribution',fontsize=35,
          color='DarkBlue', fontname='DejaVu Sans')

plt.ylabel('Number of Movies',fontsize=25, color='Red')
plt.xlabel('Budget',fontsize=25, color='Green')

plt.yticks(fontsize=20)
plt.xticks(fontsize=20)

plt.legend(frameon=True,fancybox=True,
           shadow=True,framealpha=1,prop={'size':20})
plt.show()

## 13. Exercise

#### Movie % Domestic Gross

The previous consultant had created a chart for them which is illustrated on the next slide. However the Python code used to create the diagram has since been lost and cannot be recovered. Your taks is to come up with the code that will re-create the same chart making it look as close as possible to original

The new dataset has been supplied

Dataset
https://raw.githubusercontent.com/Zurezh/Python_LearnByDoing/master/P4-Section6-Homework-Dataset.csv

Chart Type: BoxPlot 
Chart Name: Domestic Gross % by Genre



#### Solution

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
mov = pd.read_csv('https://raw.githubusercontent.com/Zurezh/Python_LearnByDoing/master/P4-Section6-Homework-Dataset.csv',encoding='latin1')

In [None]:
mov.head()

In [None]:
mov.describe()

In [None]:
mov.info()

In [None]:
vis1 = sns.factorplot(data=mov,x='Day of Week',kind='count',size=10)

In [None]:
mov.Studio.unique()

In [None]:
len(mov.Studio.unique())

In [None]:
mov.Genre.unique()

In [None]:
len(mov.Genre.unique())

In [None]:
genre_filters = ['action','adventure','animation','comedy','drama']
mov2 = mov[mov.Genre.isin(genre_filters)]

In [None]:
mov2.Genre.unique()

In [None]:
studio_filters = ['Buena Vista Studios','Fox','Paramount Pictures','Sony','Universal','WB']
mov3 = mov2[mov2.Studio.isin(studio_filters)]

In [None]:
print(mov3.Studio.unique())

In [None]:
len(mov3)

In [None]:
sns.set(style='darkgrid',palette='muted',color_codes=True)

ax = sns.boxplot(data=mov3,x='Genre',y='Gross % US',orient = 'v',color='lightgray',showfliers = False)
plt.setp(ax.artists,alpha=0.5)

sns.stripplot(x='Genre',y='Gross % US',data=mov3,jitter=True,size=6,linewidth=0,hue='Studio')

ax.axes.set_title('Domestic Gross % by Genre',fontsize=30)
ax.set_xlabel('Genre',fontsize=20)
ax.set_ylabel('Gross % US',fontsize=20)

ax.legend(bbox_to_anchor=(1.05,1),loc=2)

### End of Module 6 - Advanced Visualization