# Lunch and Learn. 10/10/2017

# PYTHON VISUALIZATION TOOLS

## Prepared by Tamirlan Seidakhmetov

### Inspired by Coursera Applied Plotting, Charting & Data Representation in Python class by University of Michigan

# Part 1: Matplotlib Visualizations

In [None]:
# %matplotlib inline
%matplotlib notebook

import matplotlib.pyplot as plt
import numpy as np

matplotlib.pyplot provides a MATLAB-like plotting framework.

matplotlib.pyplot is a collection of command style functions that make matplotlib work like MATLAB. Each pyplot function makes some change to a figure: e.g., creates a figure, creates a plotting area in a figure, plots some lines in a plotting area, decorates the plot with labels, etc.

# Simple plots

### Example 1

In [None]:
plt.figure()
plt.plot(1, 2, '.')

In [None]:
?plt.plot

In [None]:
plt.plot(4, 3, 'o', color='r')

In [None]:
# get the current axes
ax = plt.gca()

# # Set axis properties [xmin, xmax, ymin, ymax]
ax.axis([0,6,0,10])
# ax.set_xlim(0,6)
# ax.set_ylim(0,10)

In [None]:
plt.xlabel('X_coord')
plt.ylabel('Y_coord')
plt.title('Simple plot')

### Example 2

In [None]:
# create a new figure
plt.figure()

plt.plot(-1, -2, '+', color='k')
plt.plot(0, 2, '*', color='g')
plt.plot(3, 5, 'o', color='b')

# Scatterplot

### Example 3

In [None]:
x = np.array(range(1,11))
y = x**2

plt.figure()
plt.scatter(x, y) 

### Example 4

In [None]:
# Add Colors
colors = ['blue']*(len(x)-1)
colors.append('red')
colors

In [None]:
plt.figure()
plt.scatter(x, y, c=colors)

### Example 5

In [None]:
plt.figure()

plt.scatter(x[:-1], y[:-1], s=100, c='blue', label='Type 1')
plt.scatter(x[-1:], y[-1:], s=100, c='red', label='Type 2')

# Legends
plt.legend(title='Legend')

# Line Plots

### Example 6

In [None]:
linear_data = np.array(range(1,9))
exponential_data = linear_data**2

plt.figure()
# plot the linear data and the exponential data
plt.plot(linear_data, '-o', exponential_data, '-o')

In [None]:
plt.plot([50, 70, 120])

In [None]:
# fill the area between the linear data and exponential data
plt.gca().fill_between(range(len(linear_data)), 
                       linear_data, exponential_data, 
                       facecolor='blue', 
                       alpha=0.1)

# Bar Chart

### Example 7

In [None]:
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3)

In [None]:
new_xvals = []
# plot another set of bars, adjusting the new xvals to make up for the first set of bars plotted
for item in xvals:
    new_xvals.append(item+0.3)
plt.bar(new_xvals, exponential_data, width = 0.3, color='red')

In [None]:
from random import randint
linear_err = [randint(0,15) for x in range(len(linear_data))] 
# This will plot a new set of bars with errorbars using the list of random error values
plt.bar(xvals, linear_data, width = 0.3, yerr=linear_err)

### Example 8

In [None]:
# stacked bar charts are also possible
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3, color='b')
plt.bar(xvals, exponential_data, width = 0.3, bottom=linear_data, color='r')

### Example 9

In [None]:
# or use barh for horizontal bar charts
plt.figure()
xvals = range(len(linear_data))
plt.barh(xvals, linear_data, height = 0.3, color='b')
plt.barh(xvals, exponential_data, height = 0.3, left=linear_data, color='r')

# Subplots

### Example 10

In [None]:
plt.figure()
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)

linear_data = np.array([1,2,3,4,5,6,7,8])

plt.plot(linear_data, '-o')


exponential_data = linear_data**2 

# subplot with 1 row, 2 columns, and current axis is 2nd subplot axes
plt.subplot(122)
plt.plot(exponential_data, '-o')

In [None]:
'Any problem?'

### Example 11

In [None]:
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-o')

In [None]:
# plt.figure()
# the right hand side is equivalent shorthand syntax
plt.subplot(1,2,1) == plt.subplot(121)

### Example 12

In [None]:
# create a 3x3 grid of subplots
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)
# plot the linear_data on the 5th subplot axes 
ax5.plot(linear_data, '-')

In [None]:
plt.xticks([1,2,3,5,6])

# Histogram

### Example 13

In [None]:
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample)
    axs[n].set_title('n={}'.format(sample_size))

### Example 14

In [None]:
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=100)
    axs[n].set_title('n={}'.format(sample_size))

### Example 15

In [None]:
# repeat with constant size bins
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=np.arange(-4, 4.5, 0.5))
    axs[n].set_title('n={}'.format(sample_size))
plt.xticks(np.arange(-4, 4.1, 1))

# Boxplot

### Example 16

In [None]:
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
normal_sample = np.random.normal(loc=0.0, scale=1.0, size=10000)
random_sample = np.random.random(size=10000)
gamma_sample = np.random.gamma(2, size=10000)

ax = plt.boxplot([normal_sample, random_sample, gamma_sample], whis=[5,95]) #'range')

In [None]:
# ax.xticks(['normal', 'random', 'gamma'])
plt.xticks([1, 2, 3], ['normal', 'random', 'gamma'])

# Heatmaps

### Example 17

In [None]:
plt.figure()

Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
ax = plt.hist2d(X, Y, bins=25)

In [None]:
plt.colorbar()

### Example 18

In [None]:
plt.figure()
ax = plt.hist2d(X, Y, bins=100)
plt.colorbar()

# Animations

### Example 19

In [None]:
import matplotlib.animation as animation

n = 100
x = np.random.randn(n)

In [None]:
# create the function that will do the plotting, where curr is the current frame
def update(curr):
    # check if animation is at the last frame, and if so, stop the animation a
    if curr == n: 
        a.event_source.stop()
    plt.cla()
    bins = np.arange(-4, 4, 0.5)
    plt.hist(x[:curr], bins=bins)
    plt.axis([-4,4,0,30])
    plt.gca().set_title('Sampling the Normal Distribution')
    plt.gca().set_ylabel('Frequency')
    plt.gca().set_xlabel('Value')
    plt.annotate('n = {}'.format(curr), [3,27])

In [None]:
fig = plt.figure()
a = animation.FuncAnimation(fig, update, interval=100)

# Part 2: Pandas Visualizations

In [None]:
import pandas as pd

pd.options.display.max_columns = 200

In [None]:
np.random.seed(123)

# df = pd.DataFrame({'A': np.random.randn(365).cumsum(0), 
#                    'B': np.random.randn(365).cumsum(0) + 20,
#                    'C': np.random.randn(365).cumsum(0) - 20}, 
#                   index=pd.date_range('1/1/2017', periods=365))
df = pd.DataFrame({'A': np.random.randn(365), 
                   'B': np.random.randn(365) + 20,
                   'C': np.random.randn(365) - 20}, 
                  index=pd.date_range('1/1/2017', periods=365))
df.shape

In [None]:
df.head()

### Example 20

In [None]:
df.plot('A', 'B', kind = 'scatter');

You can also choose the plot kind by using the `DataFrame.plot.kind` methods instead of providing the `kind` keyword argument.

`kind` :
- `'line'` : line plot (default)
- `'bar'` : vertical bar plot
- `'barh'` : horizontal bar plot
- `'hist'` : histogram
- `'box'` : boxplot
- `'kde'` : Kernel Density Estimation plot
- `'density'` : same as 'kde'
- `'area'` : area plot
- `'pie'` : pie plot
- `'scatter'` : scatter plot
- `'hexbin'` : hexbin plot

### Example 21

In [None]:
# create a scatter plot of columns 'A' and 'C', with changing color (c) and size (s) based on column 'B'
ax = df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')

In [None]:
ax.set_aspect('equal')

### Example 22

In [None]:
df.plot.box();

### Example 23

In [None]:
df.plot.hist(alpha=0.7, bins = 30);

### Example 24

In [None]:
df.plot.kde();

# The End