# Subplots

In [1]:
%matplotlib widget

import matplotlib.pyplot as plt
import numpy as np

plt.subplot()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x1be624bf108>

In [2]:
plt.figure()
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes
plt.subplot(1, 2, 1)

linear_data = np.array([1,2,3,4,5,6,7,8])

plt.plot(linear_data, '-o')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x1be5fca1dc8>]

In [3]:
exponential_data = linear_data**2 

# subplot with 1 row, 2 columns, and current axis is 2nd subplot axes
plt.subplot(1, 2, 2)
plt.plot(exponential_data, '-o')

[<matplotlib.lines.Line2D at 0x1be5fcbf708>]

In [4]:
# plot exponential data on 1st subplot axes
plt.subplot(1, 2, 1)
plt.plot(exponential_data, '-x')

  


[<matplotlib.lines.Line2D at 0x1be5fcff108>]

In [5]:
plt.figure()
ax1 = plt.subplot(1, 2, 1)
plt.plot(linear_data, '-o')
# pass sharey=ax1 to ensure the two subplots share the same y axis
ax2 = plt.subplot(1, 2, 2, sharey=ax1)
plt.plot(exponential_data, '-x')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x1be635761c8>]

In [6]:
plt.figure()
# the right hand side is equivalent shorthand syntax
plt.subplot(1,2,1) == plt.subplot(121)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  This is separate from the ipykernel package so we can avoid doing imports until


True

In [7]:
# create a 3x3 grid of subplots
fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True)
# plot the linear_data on the 5th subplot axes 
ax5.plot(linear_data, '-')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x1be636c1708>]

In [8]:
# set inside tick labels to visible (DIDN'T WORK)
for ax in plt.gcf().get_axes():
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_visible(True)

In [9]:
# necessary on some systems to update the plot
plt.gcf().canvas.draw()

# Histograms

In [10]:
# create 2x2 grid of axis subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

# draw n = 10, 100, 1000, and 10000 samples from the normal distribution and plot corresponding histograms
for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample)
    axs[n].set_title('n={}'.format(sample_size))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
# repeat with number of bins set to 100
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex=True)
axs = [ax1,ax2,ax3,ax4]

for n in range(0,len(axs)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc=0.0, scale=1.0, size=sample_size)
    axs[n].hist(sample, bins=100)
    axs[n].set_title('n={}'.format(sample_size))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
plt.figure()
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
plt.scatter(X,Y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x1be6356cb48>

In [13]:
# use gridspec to partition the figure into subplots
import matplotlib.gridspec as gridspec

plt.figure(figsize= (7,7))
gspec = gridspec.GridSpec(3, 3)

top_histogram = plt.subplot(gspec[0, 1:])
side_histogram = plt.subplot(gspec[1:, 0])
lower_right = plt.subplot(gspec[1:, 1:])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)
lower_right.scatter(X, Y)
top_histogram.hist(X, bins=100)
s = side_histogram.hist(Y, bins=100, orientation='horizontal')

In [15]:
# clear the histograms and plot normed histograms
top_histogram.clear()
top_histogram.hist(X, bins=100, density=True)
side_histogram.clear()
side_histogram.hist(Y, bins=100, orientation='horizontal', density=True)
# flip the side histogram's x axis
side_histogram.invert_xaxis()

In [16]:
# change axes limits
for ax in [top_histogram, lower_right]:
    ax.set_xlim(0, 1)
for ax in [side_histogram, lower_right]:
    ax.set_ylim(-5, 5)

![MOOC DATA](moocdata.png "Image")

# Box and Whisker Plots

In [17]:
import pandas as pd
normal_sample = np.random.normal(loc=0.0, scale=1.0, size=100000)
random_sample = np.random.random(size=100000)
gamma_sample = np.random.gamma(2, size=100000)

df = pd.DataFrame({'normal': normal_sample, 
                   'random': random_sample, 
                   'gamma': gamma_sample})

In [18]:
df.describe()

Unnamed: 0,normal,random,gamma
count,100000.0,100000.0,100000.0
mean,0.001136,0.500437,1.999853
std,0.998034,0.289091,1.422295
min,-4.577031,2e-06,0.004109
25%,-0.674853,0.250816,0.960365
50%,0.001626,0.501163,1.675384
75%,0.672998,0.750673,2.686692
max,4.321745,0.999998,14.020975


In [19]:
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
_ = plt.boxplot(df['normal'], whis='range')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [20]:
# clear the current figure
plt.clf()
# plot boxplots for all three of df's columns
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')

In [21]:
import seaborn as sns
plt.figure()
sns.distplot(df['gamma'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x1be678539c8>

In [22]:
# plt.figure()
_ = plt.hist(df['gamma'], bins=100, density = True)

In [23]:
import mpl_toolkits.axes_grid1.inset_locator as mpl_il

plt.figure()
plt.boxplot([ df['normal'], df['random'], df['gamma'] ], whis='range')
# overlay axis on top of another 
ax2 = mpl_il.inset_axes(plt.gca(), width='60%', height='40%', loc=2)
ax2.hist(df['gamma'], bins=100)
ax2.margins(x=0.5)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [24]:
# switch the y axis ticks for ax2 to the right side
ax2.yaxis.tick_right()

In [25]:
# if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers
plt.figure()
_ = plt.boxplot([ df['normal'], df['random'], df['gamma'] ] )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Heatmaps

In [26]:

Y = np.random.normal(loc=0.0, scale=1.0, size=10000)
X = np.random.random(size=10000)

import matplotlib.gridspec as gridspec

plt.figure(figsize= (7,7))
gspec = gridspec.GridSpec(3, 3)

top_histogram = plt.subplot(gspec[0, 1:])
side_histogram = plt.subplot(gspec[1:, 0])
lower_right = plt.subplot(gspec[1:, 1:])

_ = lower_right.hist2d(X, Y, bins=25)
top_histogram.hist(X, bins=100)
s = side_histogram.hist(Y, bins=100, orientation='horizontal')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [28]:
plt.figure()
_ = plt.hist2d(X, Y, bins=100, density=True)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [29]:
# add a colorbar legend
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x29ffdd97dc8>

# Animations

In [43]:
import matplotlib.animation as animation

n = 100
x = np.random.randn(n)

In [44]:
# create the function that will do the plotting, where curr is the current frame
def update(curr):
    # check if animation is at the last frame, and if so, stop the animation a
    if curr == n: 
        a.event_source.stop()
    plt.cla()
    bins = np.arange(-4, 4, 0.5)
    plt.hist(x[:curr], bins=bins)
    plt.axis([-4,4,0,30])
    plt.gca().set_title('Sampling the Normal Distribution')
    plt.gca().set_ylabel('Frequency')
    plt.gca().set_xlabel('Value')
    plt.annotate('n = {}'.format(curr), [3,27])

In [45]:
fig = plt.figure()
a = animation.FuncAnimation(fig, update, interval=100)

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Interactivity

In [2]:
plt.figure()
data = np.random.rand(10)
plt.plot(data)

def onclick(event):
    plt.cla()
    plt.plot(data)
    plt.gca().set_title('Event at pixels {},{} \nand data {},{}'.format(event.x, event.y, event.xdata, event.ydata))

# tell mpl_connect we want to pass a 'button_press_event' into onclick when the event is detected
plt.gcf().canvas.mpl_connect('button_press_event', onclick)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

7

In [6]:
from random import shuffle
import pandas as pd

origins = ['China', 'Brazil', 'India', 'USA', 'Canada', 'UK', 'Germany', 'Iraq', 'Chile', 'Mexico']

shuffle(origins)

df = pd.DataFrame({'height': np.random.rand(10),
                   'weight': np.random.rand(10),
                   'origin': origins})
df

Unnamed: 0,height,weight,origin
0,0.475723,0.346403,Brazil
1,0.888245,0.642192,India
2,0.849461,0.217104,UK
3,0.10325,0.129965,Chile
4,0.378213,0.811891,Canada
5,0.023567,0.593924,Germany
6,0.758345,0.341058,Iraq
7,0.069846,0.337693,China
8,0.641857,0.329305,Mexico
9,0.120887,0.452462,USA


In [7]:
plt.figure()
# picker=5 means the mouse doesn't have to click directly on an event, but can be up to 5 pixels away
plt.scatter(df['height'], df['weight'], picker=5)
plt.gca().set_ylabel('Weight')
plt.gca().set_xlabel('Height')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'Height')

In [9]:
def onpick(event):
    origin = df.iloc[event.ind[0]]['origin']
    plt.text(0.2,0.2, origin)
    plt.gca().set_title('Selected item came from {}'.format(origin))

# tell mpl_connect we want to pass a 'pick_event' into onpick when the event is detected
plt.gcf().canvas.mpl_connect('pick_event', onpick)

8

In [95]:
import pandas as pd

In [98]:
pd.read_json('https://api.github.com/users/jtleek/repos')

Unnamed: 0,id,node_id,name,full_name,private,owner,html_url,description,fork,url,...,forks_count,mirror_url,archived,disabled,open_issues_count,license,forks,open_issues,watchers,default_branch
0,155565363,MDEwOlJlcG9zaXRvcnkxNTU1NjUzNjM=,2018,jtleek/2018,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/2018,Fall 2018 repository with course materials for...,True,https://api.github.com/repos/jtleek/2018,...,2,,False,False,0,,2,0,0,master
1,264786491,MDEwOlJlcG9zaXRvcnkyNjQ3ODY0OTE=,ads2020,jtleek/ads2020,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/ads2020,Advanced Data Science 2020 Edition,False,https://api.github.com/repos/jtleek/ads2020,...,3,,False,False,0,,3,0,30,master
2,101394164,MDEwOlJlcG9zaXRvcnkxMDEzOTQxNjQ=,advdatasci,jtleek/advdatasci,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/advdatasci,,True,https://api.github.com/repos/jtleek/advdatasci,...,8,,False,False,0,,8,0,16,master
3,111447948,MDEwOlJlcG9zaXRvcnkxMTE0NDc5NDg=,advdatasci-project,jtleek/advdatasci-project,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/advdatasci-project,Awesome project!,False,https://api.github.com/repos/jtleek/advdatasci...,...,0,,False,False,0,,0,0,0,master
4,47568815,MDEwOlJlcG9zaXRvcnk0NzU2ODgxNQ==,advdatasci-swirl,jtleek/advdatasci-swirl,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/advdatasci-swirl,,False,https://api.github.com/repos/jtleek/advdatasci...,...,2,,False,False,1,,2,1,4,master
5,41645119,MDEwOlJlcG9zaXRvcnk0MTY0NTExOQ==,advdatasci15,jtleek/advdatasci15,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/advdatasci15,Advanced Data Science @ JHU Biostats,False,https://api.github.com/repos/jtleek/advdatasci15,...,7,,False,False,0,,7,0,14,gh-pages
6,65922328,MDEwOlJlcG9zaXRvcnk2NTkyMjMyOA==,advdatasci16,jtleek/advdatasci16,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/advdatasci16,,False,https://api.github.com/repos/jtleek/advdatasci16,...,9,,False,False,0,,9,0,6,gh-pages
7,66415014,MDEwOlJlcG9zaXRvcnk2NjQxNTAxNA==,advdatasci_swirl,jtleek/advdatasci_swirl,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/advdatasci_swirl,,False,https://api.github.com/repos/jtleek/advdatasci...,...,4,,False,False,1,,4,1,4,master
8,12441219,MDEwOlJlcG9zaXRvcnkxMjQ0MTIxOQ==,ballgown,jtleek/ballgown,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/ballgown,code for manipulating ballgown output in R,True,https://api.github.com/repos/jtleek/ballgown,...,1,,False,False,0,,1,0,0,master
9,258010045,MDEwOlJlcG9zaXRvcnkyNTgwMTAwNDU=,big_course,jtleek/big_course,False,"{'login': 'jtleek', 'id': 1571674, 'node_id': ...",https://github.com/jtleek/big_course,Repo for the BIG Experiment MOOC,False,https://api.github.com/repos/jtleek/big_course,...,0,,False,False,0,,0,0,1,master
