# Data Visualization Tutorial 2

## Load and Setup the Data

In [None]:
# import required library functions
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd

#mpl.style.use('ggplot') # optional: for ggplot-like style

In [None]:
# load data, skip the top 20 and bottom 2 rows as they do not contain relevant data
df_canada = pd.read_excel('data/canada.xlsx',
                          sheet_name = 'Canada by Citizenship',
                          skiprows = range(20),
                          skipfooter = 2)

In [None]:
# conversion index and columns to lists
df_canada.columns.tolist()
df_canada.index.tolist()

# remove unnecessary columns
# in pandas axis=0 re|presents rows (default) and axis=1 represents columns.
df_canada.drop(['AREA','REG','DEV','Type','Coverage'], axis=1, inplace=True)

# rename some columns to make better sense
df_canada.rename(columns={'OdName':'Country', 'AreaName':'Continent', 'RegName':'Region'}, inplace=True)

In [None]:
# convert all column names to strings
df_canada.columns = list(map(str, df_canada.columns))

# full range of the time series
years = list(map(str, range(1980, 2014)))

# add Total column
df_canada['Total'] = df_canada.sum(axis=1)

In [None]:
# index data by country
df_canada.set_index('Country', inplace=True)

## Visualize Parts of a Whole

### Visualize continent wise immigration contributions into Canada

In [None]:
# group by Continent
df_continents = df_canada.groupby(['Continent'], axis = 0).sum()

# show Continent wise distribution
df_continents.head(6)

In [None]:
df_continents['Total'].plot(kind='pie',
                            figsize=(15, 6),
                            autopct='%1.1f%%', 
                            startangle=90,    
                            labels=None,         # turn off labels on pie chart
                            pctdistance=1.12,    # the ratio between the center of each pie slice and the start of the text generated by autopct 
                            )

# scale the title up by 12% to match pctdistance
plt.title('Immigration to Canada by Continent [1980 - 2013]', y=1.12) 

plt.axis('equal') 

# add legend
plt.legend(labels=df_continents.index, loc='upper left') 

plt.show()

## Visualize Categories and Sub-categories

### Visualize total continent wise immigration into Canada between 1980 and 2013

In [None]:
# plot immigration pattern from Continents
df_continents['Total'].plot(kind = 'barh')
plt.title('Immigration to Canada by Continent')
plt.ylabel('Continents')
plt.xlabel('Number of immigrants')

plt.show()

### Visually compare continent wise immigration into Canada from developed vs developing countries

In [None]:
# split data based on level of development
df_development = df_canada.groupby(['Continent', 'DevName'], axis = 0).sum().unstack('DevName').fillna(0)

df_development.head(6)

In [None]:
# plot immigration pattern from continents based on level of development
df_development['Total'].plot(kind = 'barh', stacked=True)

plt.title('Immigration to Canada by Continent and Development Level')
plt.ylabel('Continents')
plt.xlabel('Number of immigrants')

plt.show()

## Visualize Proportions varying over Time

### Visualize total immigration contributions from each continent over time

In [None]:
df_continents[years].transpose().plot(kind='area', stacked=True)

plt.title('Immigration to Canada by Continent over time')
plt.ylabel('Continents')
plt.xlabel('Number of immigrants')
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))

plt.show()

### Visualize fractional immigration contributions from each continent over time

In [None]:
# compute Continent wise proportion
df_fraction = df_continents.divide(df_continents.sum(axis = 0), axis = 1)

df_fraction.head()

In [None]:
df_fraction[years].transpose().plot(kind='area', stacked=True)

plt.title('Immigration to Canada by Continent over time')
plt.ylabel('Continents')
plt.xlabel('Number of immigrants')
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))

plt.show()

## Visualize Parts of a Whole using Waffle Charts

### Visualize continent wise immigration contributions into Canada

In [None]:
# special library for Waffle Charts
from pywaffle import Waffle
import json

In [None]:
# group by Continent
df_continents = df_canada.groupby(['Continent'], axis = 0).sum()

# compute fraction
df_fraction = df_continents.divide(df_continents.sum(axis = 0), axis = 1)['Total'] * 100
df_waffle = df_fraction.round(0).astype(int)

df_waffle.head(6)

In [None]:
# plot waffle chart
data = json.loads(df_waffle.to_json(orient='index'))
fig = plt.figure(
    FigureClass=Waffle,
    rows=5, 
    values=data,
    figsize=(10, 4),
    title={'label': 'Immigration to Canada by Continent [1980 - 2013]', 'loc': 'center'},
    legend={'loc': 'upper left', 'bbox_to_anchor': (0.0, 0.0), 'ncol': len(data), 'framealpha': 0}
)

plt.show()