# PYTHON VISUALIZATION TOOLS

### Inspired by Coursera Applied Plotting, Charting & Data Representation in Python class by University of Michigan

# Part 1: Matplotlib Visualizations

In [30]:
# %matplotlib inline
%matplotlib notebook

import matplotlib.pyplot as plt
import numpy as np

matplotlib.pyplot provides a MATLAB-like plotting framework.

matplotlib.pyplot is a collection of command style functions that make matplotlib work like MATLAB. Each pyplot function makes some change to a figure: e.g., creates a figure, creates a plotting area in a figure, plots some lines in a plotting area, decorates the plot with labels, etc.

# Simple plots

### Example 1

In [32]:
plt.figure()
plt.plot(1, 2, '.')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x10c765110>]

In [37]:
?plt.plot

In [38]:
plt.plot(4, 3, 'o', color='r')

[<matplotlib.lines.Line2D at 0x11237f690>]

In [43]:
# get the current axes
ax = plt.gca()

# # Set axis properties [xmin, xmax, ymin, ymax]
ax.axis([0,6,0,10])
# ax.set_xlim(0,6)
# ax.set_ylim(0,10)

(0, 10)

In [35]:
plt.xlabel('X_coord')
plt.ylabel('Y_coord')
plt.title('Simple plot')

<matplotlib.text.Text at 0x10c72d1d0>

### Example 2

In [48]:
# create a new figure
plt.figure()

plt.plot(-1, -2, '+', color='k')
plt.plot(0, 2, '*', color='g')
plt.plot(3, 5, 'o', color='b')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1189b43d0>]

# Scatterplot

### Example 3

In [11]:
x = np.array([1,2,3,4,5,6,7,8])
y = x

plt.figure()
plt.scatter(x, y) 

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x10e1bfc90>

### Example 4

In [12]:
# Add Colors
colors = ['green']*(len(x)-1)
colors.append('red')
colors

['green', 'green', 'green', 'green', 'green', 'green', 'green', 'red']

In [13]:
plt.figure()
plt.scatter(x, y, c=colors)

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x10e242990>

### Example 5

In [14]:
plt.figure()
# plot a data series 'Tall students' in red using the first two elements of x and y
plt.scatter(x[:-1], y[:-1], s=100, c='green', label='Type 1')
# plot a second data series 'Short students' in blue using the last three elements of x and y 
plt.scatter(x[-1:], y[-1:], s=100, c='red', label='Type 2')

# Legends
plt.legend()
# plt.legend(loc=4, frameon=False, title='Legend')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x10e2e6f10>

# Line Plots

### Example 6

In [16]:
linear_data = np.array([1,2,3,4,5,6,7,8])
exponential_data = linear_data**2

plt.figure()
# plot the linear data and the exponential data
plt.plot(linear_data, '-o', exponential_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x10e40fb90>,
 <matplotlib.lines.Line2D at 0x10e40fcd0>]

In [17]:
plt.plot([22,44,55], '--r')

[<matplotlib.lines.Line2D at 0x10e2585d0>]

In [18]:
# fill the area between the linear data and exponential data
plt.gca().fill_between(range(len(linear_data)), 
                       linear_data, exponential_data, 
                       facecolor='blue', 
                       alpha=0.25)

<matplotlib.collections.PolyCollection at 0x10e38a990>

# Let's try working with dates!

### Example 7

In [19]:
plt.figure()

observation_dates = np.arange('2017-01-01', '2017-01-09', dtype='datetime64[D]')

plt.plot(observation_dates, linear_data, '-o',  observation_dates, exponential_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x10e0a7c90>,
 <matplotlib.lines.Line2D at 0x10e37ed10>]

In [20]:
x = plt.gca().xaxis

# rotate the tick labels for the x axis
for item in x.get_ticklabels():
    item.set_rotation(45)

In [21]:
plt.xlabel('Date')
plt.ylabel('Units')
# plt.title('Exponential vs. Linear performance')
plt.title("Exponential ($x^2$) vs. Linear ($x$) performance")

<matplotlib.text.Text at 0x10e453ad0>

# Bar Chart

### Example 8

In [22]:
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3)

<IPython.core.display.Javascript object>

<Container object of 8 artists>

In [23]:
new_xvals = []
# plot another set of bars, adjusting the new xvals to make up for the first set of bars plotted
for item in xvals:
    new_xvals.append(item+0.3)
plt.bar(new_xvals, exponential_data, width = 0.3 ,color='red')

<Container object of 8 artists>

In [24]:
from random import randint
linear_err = [randint(0,15) for x in range(len(linear_data))] 
# This will plot a new set of bars with errorbars using the list of random error values
plt.bar(xvals, linear_data, width = 0.3, yerr=linear_err)

<Container object of 8 artists>

### Example 9

In [25]:
# stacked bar charts are also possible
plt.figure()
xvals = range(len(linear_data))
plt.bar(xvals, linear_data, width = 0.3, color='b')
plt.bar(xvals, exponential_data, width = 0.3, bottom=linear_data, color='r')

<IPython.core.display.Javascript object>

<Container object of 8 artists>

### Example 10

In [26]:
# or use barh for horizontal bar charts
plt.figure()
xvals = range(len(linear_data))
plt.barh(xvals, linear_data, height = 0.3, color='b')
plt.barh(xvals, exponential_data, height = 0.3, left=linear_data, color='r')

<IPython.core.display.Javascript object>

<Container object of 8 artists>

# Subplots

### Example 11

In [27]:
plt.figure()
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)

linear_data = np.array([1,2,3,4,5,6,7,8])

plt.plot(linear_data, '-o')


exponential_data = linear_data**2 

# subplot with 1 row, 2 columns, and current axis is 2nd subplot axes
plt.subplot(122)
plt.plot(exponential_data, '-o')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x10e7b1a90>]

In [28]:
'Any problem?'

'Any problem?'

### Example 12

In [29]:
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-x')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x10e9cd2d0>]

In [30]:
# plt.figure()
# the right hand side is equivalent shorthand syntax
plt.subplot(1,2,1) == plt.subplot(121)

True

### Example 13

In [31]:
# create a 3x3 grid of subplots
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)
# plot the linear_data on the 5th subplot axes 
ax5.plot(linear_data, '-')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x10e9cdb90>]

In [32]:
plt.xticks([1,2,3,5,6])

([<matplotlib.axis.XTick at 0x10ed9ebd0>,
  <matplotlib.axis.XTick at 0x10ed9ead0>,
  <matplotlib.axis.XTick at 0x10e4c7350>,
  <matplotlib.axis.XTick at 0x10e9cda90>,
  <matplotlib.axis.XTick at 0x10ee3b2d0>],
 <a list of 5 Text xticklabel objects>)

# Histogram

### Example 14

In [33]:
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample)
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

### Example 15

In [34]:
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=100)
    axs[n].set_title('n={}'.format(sample_size))

<IPython.core.display.Javascript object>

### Example 16

In [35]:
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=np.arange(-4, 4.5, 0.5))
    axs[n].set_title('n={}'.format(sample_size))
plt.xticks(np.arange(-4, 4.1, 1))

<IPython.core.display.Javascript object>

([<matplotlib.axis.XTick at 0x10fd397d0>,
  <matplotlib.axis.XTick at 0x10fd39890>,
  <matplotlib.axis.XTick at 0x10fb89a10>,
  <matplotlib.axis.XTick at 0x10ff02610>,
  <matplotlib.axis.XTick at 0x10ff02d10>,
  <matplotlib.axis.XTick at 0x10ff0e450>,
  <matplotlib.axis.XTick at 0x10ff0eb50>,
  <matplotlib.axis.XTick at 0x10ff1a290>,
  <matplotlib.axis.XTick at 0x10ff1a990>],
 <a list of 9 Text xticklabel objects>)

# Boxplot

### Example 17

In [47]:
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
normal_sample = np.random.normal(loc=0.0, scale=1.0, size=10000)
random_sample = np.random.random(size=10000)
gamma_sample = np.random.gamma(2, size=10000)

ax = plt.boxplot([normal_sample, random_sample, gamma_sample], whis=[5,95]) #'range')

<IPython.core.display.Javascript object>

In [44]:
# ax.xticks(['normal', 'random', 'gamma'])
plt.xticks([1, 2, 3], ['normal', 'random', 'gamma'])

([<matplotlib.axis.XTick at 0x10e8f8910>,
  <matplotlib.axis.XTick at 0x11b28c090>,
  <matplotlib.axis.XTick at 0x11c138c50>],
 <a list of 3 Text xticklabel objects>)

# Heatmaps

### Example 18

In [53]:
plt.figure()

Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
ax = plt.hist2d(X, Y, bins=25)

<IPython.core.display.Javascript object>

In [54]:
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x1245abf90>

### Example 19

In [55]:
plt.figure()
ax = plt.hist2d(X, Y, bins=100)
plt.colorbar()

<IPython.core.display.Javascript object>

<matplotlib.colorbar.Colorbar at 0x1260b1e90>

# Animations

### Example 20

In [56]:
import matplotlib.animation as animation

n = 100
x = np.random.randn(n)

In [57]:
# create the function that will do the plotting, where curr is the current frame
def update(curr):
    # check if animation is at the last frame, and if so, stop the animation a
    if curr == n: 
        a.event_source.stop()
    plt.cla()
    bins = np.arange(-4, 4, 0.5)
    plt.hist(x[:curr], bins=bins)
    plt.axis([-4,4,0,30])
    plt.gca().set_title('Sampling the Normal Distribution')
    plt.gca().set_ylabel('Frequency')
    plt.gca().set_xlabel('Value')
    plt.annotate('n = {}'.format(curr), [3,27])

In [58]:
fig = plt.figure()
a = animation.FuncAnimation(fig, update, interval=100)



<IPython.core.display.Javascript object>

### Example 21

# Part 2: Pandas Visualizations

In [1]:
import pandas as pd

pd.options.display.max_columns = 200

In [2]:
# from sklearn import datasets
# iris = datasets.load_iris()
# iris = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
#                      columns= iris['feature_names'] + ['target'])
# needed_cols = ['VEHICLE_ODOMETER', 'COMP_COMPANY_CODE', 'FILE_AMT', 'COND_DRIVEABLE_CODE', 'LOSS_DATETIME', 'VEH_YEAR_CODE', 'VEH_MAKE_CODE', 'VEH_MODEL_DESC', 'VEH_STYLE_CODE']
# df = pd.read_csv('ces_EE1943.csv', usecols=needed_cols)
# df.shape

  interactivity=interactivity, compiler=compiler, result=result)


(75284, 9)

In [3]:
# df.dropna(axis=0, inplace=True)
# for i in needed_cols:
#     if df[i].dtype == 'object':
#         df = df[df[i] != 'U']
# df.shape

(71555, 9)

In [16]:
np.random.seed(123)

# df = pd.DataFrame({'A': np.random.randn(365).cumsum(0), 
#                    'B': np.random.randn(365).cumsum(0) + 20,
#                    'C': np.random.randn(365).cumsum(0) - 20}, 
#                   index=pd.date_range('1/1/2017', periods=365))
df = pd.DataFrame({'A': np.random.randn(365), 
                   'B': np.random.randn(365) + 20,
                   'C': np.random.randn(365) - 20}, 
                  index=pd.date_range('1/1/2017', periods=365))
df.shape

(365, 3)

In [17]:
df.head()

Unnamed: 0,A,B,C
2017-01-01,-1.085631,20.059291,-20.230904
2017-01-02,0.997345,21.744041,-16.428421
2017-01-03,0.282978,19.032256,-20.396156
2017-01-04,-1.506295,20.419568,-20.038321
2017-01-05,-0.5786,20.206928,-22.424836


In [18]:
df.plot('A', 'B', kind = 'scatter');

<IPython.core.display.Javascript object>

You can also choose the plot kind by using the `DataFrame.plot.kind` methods instead of providing the `kind` keyword argument.

`kind` :
- `'line'` : line plot (default)
- `'bar'` : vertical bar plot
- `'barh'` : horizontal bar plot
- `'hist'` : histogram
- `'box'` : boxplot
- `'kde'` : Kernel Density Estimation plot
- `'density'` : same as 'kde'
- `'area'` : area plot
- `'pie'` : pie plot
- `'scatter'` : scatter plot
- `'hexbin'` : hexbin plot

In [23]:
# create a scatter plot of columns 'A' and 'C', with changing color (c) and size (s) based on column 'B'
ax = df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')

<IPython.core.display.Javascript object>

In [24]:
ax.set_aspect('equal')

In [25]:
df.plot.box();

<IPython.core.display.Javascript object>

In [27]:
df.plot.hist(alpha=0.7, bins = 30);

<IPython.core.display.Javascript object>

In [28]:
df.plot.kde();

<IPython.core.display.Javascript object>