# Dataviz

In [None]:
# This line configures matplotlib to show figures embedded in the notebook, 
# instead of opening a new window for each figure.
%matplotlib inline

# import the matplotlib.pyplot module under the name plt:
import matplotlib
import matplotlib.pyplot as plt

In [None]:
import numpy as np

## Simple line plot

In [None]:
x = np.linspace(0, 5, 10)
y = x ** 2

We store a reference to the newly created figure instance in the fig variable, and from it we create a new axis instance axes using the add_axes method in the Figure class instance fig:

In [None]:
fig = plt.figure()

axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1)

axes.plot(x, y, 'r')

axes.set_xlabel('x')
axes.set_ylabel('y')
axes.set_title('title');

In [None]:
plt.subplot(1,2,1)
plt.plot(x, y, 'r--')
plt.subplot(1,2,2)
plt.plot(y, x, 'g*-');

Although a little bit more code is involved, the advantage is that we now have full control of where the plot axes are placed, and we can easily add more than one axis to the figure:

In [None]:
fig = plt.figure()

axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # main axes
axes2 = fig.add_axes([0.2, 0.5, 0.4, 0.3]) # inset axes

# main figure
axes1.plot(x, y, 'r')
axes1.set_xlabel('x')
axes1.set_ylabel('y')
axes1.set_title('title')

# insert
axes2.plot(y, x, 'g')
axes2.set_xlabel('y')
axes2.set_ylabel('x')
axes2.set_title('insert title');

In [None]:
# do simple plots without keeping fig and axes

plt.plot(x, y, 'r')
plt.xlabel('xlabel', fontsize=18)
plt.ylabel('ylabel', fontsize=16)

In [None]:
# set resolution and aspect ratio

plt.figure(figsize=(8,4), dpi=100)

plt.plot(x, y, 'r')

## Custom Visual appearance

The simplest method to customize plot appearance is to use format strings, which take three characters and use the format _cml_ where

* c--: The color
* -m-: The marker
* --l: The line sylers

For example, `'b.:'` will produce a blue line with dot markers and dashed.  Format strings are used either withthe plot formats 

* `plt.plot(x,`_format_`)
* `plt.plot(x,y,`_format_`)

In [None]:
plt.plot(x, y, 'ro-')

In [None]:
# by default we can plot several series on the same plot

plt.plot(x, y, 'ro-')
plt.plot(y, x, 'g*--')

In [None]:
# can also specify colors

plt.plot(x, x+1, color="red", alpha=0.5) # half-transparant red
plt.plot(x, x+2, color="#1155dd")        # RGB hex code for a bluish color
plt.plot(x, x+3, color="#15cc55")        # RGB hex code for a greenish color

## save file (export into images)

In [None]:
# we can save a plot inside a file (supports PNG, JPG, EPS, SVG, PGF and PDF)

fig = plt.figure()
plt.plot(x, y, 'ro-')
fig.savefig("output/my_plot.png")

## Title and Legend

In [None]:
plt.plot(x, x**2, label="curve1")
plt.plot(x, x**3, label="curve2")
plt.legend()

plt.title("title")

plt.xlabel("x")
plt.ylabel("y")

## Use raw text to insert LaTex

In [None]:
plt.plot(x, x**2, label=r"$y = \alpha^2$")
plt.plot(x, x**3, label=r"$y = \alpha^3$")
plt.legend(loc=2) # upper left corner
plt.xlabel(r'$\alpha$')
plt.ylabel(r'$y$')
plt.title(r'$Title$');

In [None]:
!ls

### Control over axis appearance

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))

axes[0].plot(x, x**2, x, x**3)
axes[0].set_title("default axes ranges")

axes[1].plot(x, x**2, x, x**3)
axes[1].axis('tight')
axes[1].set_title("tight axes")

axes[2].plot(x, x**2, x, x**3)
axes[2].set_ylim([0, 60])
axes[2].set_xlim([2, 5])
axes[2].set_title("custom axes range");

## Logarithmic scale

It is also possible to set a logarithmic scale for one or both axes. This functionality is in fact only one application of a more general transformation system in Matplotlib. Each of the axes' scales are set seperately using `set_xscale` and `set_yscale` methods which accept one parameter (with the value "log" in this case):

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10,4))
      
axes[0].plot(x, x**2, x, np.exp(x))
axes[0].set_title("Normal scale")

axes[1].plot(x, x**2, x, np.exp(x))
axes[1].set_yscale("log")
axes[1].set_title("Logarithmic scale (y)");

### Placement of ticks and custom tick labels

We can explicitly determine where we want the axis ticks with `set_xticks` and `set_yticks`, which both take a list of values for where on the axis the ticks are to be placed. We can also use the `set_xticklabels` and `set_yticklabels` methods to provide a list of custom text labels for each tick location:

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))

ax.plot(x, x**2, x, x**3, lw=2)

ax.set_xticks([1, 2, 3, 4, 5])
ax.set_xticklabels([r'$\alpha$', r'$\beta$', r'$\gamma$', r'$\delta$', r'$\epsilon$'], fontsize=18)

yticks = [0, 50, 100, 150]
ax.set_yticks(yticks)
ax.set_yticklabels(["$%.1f$" % y for y in yticks], fontsize=18); # use LaTeX formatted labels

### Axis grid

With the `grid` method in the axis object, we can turn on and off grid lines. We can also customize the appearance of the grid lines using the same keyword arguments as the `plot` function:

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10,3))

# default grid appearance
axes[0].plot(x, x**2, x, x**3, lw=2)
axes[0].grid(True)

# custom grid appearance
axes[1].plot(x, x**2, x, x**3, lw=2)
axes[1].grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)

### Twin axes

Sometimes it is useful to have dual x or y axes in a figure; for example, when plotting curves with different units together. Matplotlib supports this with the `twinx` and `twiny` functions:

In [None]:
fig, ax1 = plt.subplots()

ax1.plot(x, x**2, lw=2, color="blue")
ax1.set_ylabel(r"area $(m^2)$", fontsize=18, color="blue")
for label in ax1.get_yticklabels():
    label.set_color("blue")
    
ax2 = ax1.twinx()
ax2.plot(x, x**3, lw=2, color="red")
ax2.set_ylabel(r"volume $(m^3)$", fontsize=18, color="red")
for label in ax2.get_yticklabels():
    label.set_color("red")

### Axes where x and y is zero

In [None]:
fig, ax = plt.subplots()

ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')

ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data',0)) # set position of x spine to x=0

ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data',0))   # set position of y spine to y=0

xx = np.linspace(-0.75, 1., 100)
ax.plot(xx, xx**3);

### Other 2D plot styles

In addition to the regular `plot` method, there are a number of other functions for generating different kind of plots. See the matplotlib plot gallery for a complete list of available plot types: http://matplotlib.org/gallery.html. Some of the more useful ones are show below:

In [None]:
n = np.array([0,1,2,3,4,5])

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(12,3))

axes[0].scatter(xx, xx + 0.25*np.random.randn(len(xx)))
axes[0].set_title("scatter")

axes[1].step(n, n**2, lw=2)
axes[1].set_title("step")

axes[2].bar(n, n**2, align="center", width=0.5, alpha=0.5)
axes[2].set_title("bar")

axes[3].fill_between(x, x**2, x**3, color="green", alpha=0.5);
axes[3].set_title("fill_between");

In [None]:
n = np.random.randn(100000)

In [None]:
# A histogram
fig, axes = plt.subplots(1, 2, figsize=(12,4))

axes[0].hist(n)
axes[0].set_title("Default histogram")
axes[0].set_xlim((min(n), max(n)))

axes[1].hist(n, cumulative=True, bins=50)
axes[1].set_title("Cumulative detailed histogram")
axes[1].set_xlim((min(n), max(n)));

## 3D figures

To use 3D graphics in matplotlib, we first need to create an instance of the `Axes3D` class. 3D axes can be added to a matplotlib figure canvas in exactly the same way as 2D axes; or, more conveniently, by passing a `projection='3d'` keyword argument to the `add_axes` or `add_subplot` methods.

In [None]:
from mpl_toolkits.mplot3d.axes3d import Axes3D

In [None]:
X = np.arange(-5, 5, 0.25)
Y = np.arange(-5, 5, 0.25)
X, Y = np.meshgrid(X, Y)
Z = np.sqrt(X**2 + Y**2)

#### Surface plots

In [None]:
fig = plt.figure(figsize=(8,6))
ax = plt.axes(projection='3d')

p = ax.plot_surface(X, Y, Z, rstride=4, cstride=4, linewidth=0)

#### Wire-frame plot

In [None]:
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(1, 1, 1, projection='3d')

p = ax.plot_wireframe(X, Y, Z, rstride=4, cstride=4)

#### Scatter plot

In [None]:
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(1, 1, 1, projection='3d')

p = ax.scatter(X, Y, Z)

In [None]:
from sklearn import datasets

data = datasets.load_iris()['data']
feature_names = datasets.load_iris()['feature_names']
target = datasets.load_iris()['target']
target_names = datasets.load_iris()['target_names']

In [None]:
import pandas as pd
df = pd.DataFrame(data, columns=feature_names)

df['target'] = target
df['target'] = df['target'].apply(lambda x: target_names[x])

In [None]:
df[df['target'] == 'setosa'].head()

In [None]:
df_setosa = df[df['target'] == 'setosa']
df_versicolor = df[df['target'] == 'versicolor']
df_virginica = df[df['target'] == 'virginica']

## Exercise 1

In [None]:
# Build this graph and save it to data/graph1.png

# hint 1: which plot type is it?
# hint 2: don't forget to set figure size, title, xlabel, ylabel

from IPython.display import Image
Image(filename='data/plt1.png')

## Exercise 2

In [None]:
# Build this graph and save it to data/graph2.png

from IPython.display import Image
Image(filename='data/plt2.png')

# Seaborn

Seaborn is a Python visualization library based on matplotlib. It provides a high-level interface for drawing attractive statistical graphics.

In [None]:
import seaborn as sns
# !conda install seaborn

In [None]:
# Checkout the tips dataset:

tips = sns.load_dataset('tips')

tips.head()

In [None]:
# create tip in percent

tips["tip_percent"] = tips["tip"] / tips["total_bill"]

tips.head()

In [None]:
tips_subset = tips[['day', 'sex', 'tip_percent']]

tips_agg = tips_subset.groupby(['day', 'sex']).mean()[['tip_percent']]
tips_agg.reset_index(inplace=True)

tips_agg.head()

In [None]:
tips_agg_pivoted = tips_agg.pivot("day", "sex", "tip_percent")

tips_agg_pivoted.head()

In [None]:
sns.heatmap(tips_agg_pivoted, annot=True)

In [None]:
# Exercise

# The flights dataset represents filghts in the late 40s and the 50s.
# Pivot the table and plot it as a heatmap
# Make a heatmap (Take a look at https://stanford.edu/~mwaskom/software/seaborn/generated/seaborn.heatmap.html)

# Make lines the divide the cells (width of 0.5)
# Select the yellow-green-blue colormap ("YlGnBu")

flights = sns.load_dataset("flights")
flights.head()
#...

In [None]:
flights = flights.pivot("month", "year", "passengers")
flights.head()

In [None]:
sns.heatmap(flights,  
                 cmap="YlGnBu", # different color set (Yellow, Green, Blue)
                 linewidths=.5, # add lines
                )

# More Dataviz in Python

### Bokeh

http://bokeh.pydata.org/en/latest/

Bokeh is a Python interactive visualization library that targets modern web browsers for presentation

### Lightning

http://lightning-viz.org/

Lightning provides API-based access to reproducible web visualizations.

### Plotly

https://plot.ly/python/

Plotly's Python graphing library makes interactive, publication-quality graphs online. 