# Plotting and Visualization

In [1]:
import numpy as np
import pandas as pd
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.random.seed(12345)
import matplotlib.pyplot as plt
import matplotlib
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

%matplotlib notebook

## A Brief matplotlib API Primer

In [4]:
%matplotlib notebook
import matplotlib.pyplot as plt

In [7]:
import numpy as np
data = np.arange(10)
print(data)
plt.plot(data)

[0 1 2 3 4 5 6 7 8 9]


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1f49dc5aeb8>]

### Figures and Subplots

In [13]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [17]:
ax1 = fig.add_subplot(2, 2, 1)



In [15]:
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

In [16]:
plt.plot(np.random.randn(50).cumsum(), 'k--')

[<matplotlib.lines.Line2D at 0x1f49e3806d8>]

In [18]:
_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<matplotlib.collections.PathCollection at 0x1f49e38ab70>

In [19]:
plt.close('all')

In [20]:
fig, axes = plt.subplots(2, 3)
axes

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001F49E3FC668>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001F49E432FD0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001F49E467DA0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001F49E4AC240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001F49E4E7160>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001F49FAE2588>]], dtype=object)

#### Adjusting the spacing around subplots

subplots_adjust(left=None, bottom=None, right=None, top=None,
                wspace=None, hspace=None)

fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

In [23]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

<IPython.core.display.Javascript object>

### Colors, Markers, and Line Styles

ax.plot(x, y, 'g--')

ax.plot(x, y, linestyle='--', color='g')

In [24]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [25]:
from numpy.random import randn
plt.plot(randn(30).cumsum(), 'ko--')

[<matplotlib.lines.Line2D at 0x1f49ff601d0>]

plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='o')

In [26]:
plt.close('all')

In [27]:
data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1f4a003aeb8>

### Ticks, Labels, and Legends

#### Setting the title, axis labels, ticks, and ticklabels

In [28]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1f49ff4c358>]

In [29]:
ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30, fontsize='small')

In [30]:
ax.set_title('My first matplotlib plot')
ax.set_xlabel('Stages')

Text(0.5,10.7639,'Stages')

props = {
    'title': 'My first matplotlib plot',
    'xlabel': 'Stages'
}
ax.set(**props)

#### Adding legends

In [31]:
from numpy.random import randn
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1f49e745438>]

In [32]:
ax.legend(loc='best')

<matplotlib.legend.Legend at 0x1f49e769198>

### Annotations and Drawing on a Subplot

ax.text(x, y, 'Hello world!',
        family='monospace', fontsize=10)

In [43]:
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']
#print(spx)

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2,
                                headlength=4),
                horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')
plt.savefig('spx.svg')

<IPython.core.display.Javascript object>

In [None]:
ax.set_title('Important dates in the 2008–2009 financial crisis')

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

In [40]:
fig = plt.figure(figsize=(12, 6)); ax = fig.add_subplot(1, 1, 1)
rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x1f4a1d61d30>

### Saving Plots to File

plt.savefig('figpath.svg')

plt.savefig('figpath.png', dpi=400, bbox_inches='tight')

from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()

### matplotlib Configuration

plt.rc('figure', figsize=(10, 10))

font_options = {'family' : 'monospace',
                'weight' : 'bold',
                'size'   : 'small'}
plt.rc('font', **font_options)

## Plotting with pandas and seaborn

### Line Plots

In [44]:
plt.close('all')

In [45]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f4a3cea748>

In [46]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))
print(df)
df.plot()

           A         B         C         D
0   0.796007 -1.372546 -0.212470 -0.964650
10  0.076844 -1.028433  0.255753 -1.368903
20  1.822479  0.205969  0.343432 -0.890841
30 -0.335017 -0.186973 -0.391902 -0.119292
40  1.994855 -2.900248 -1.918237  1.303942
50  0.693087 -3.217446 -1.150329  0.501662
60 -1.189151 -2.629030 -1.105967  0.954904
70 -1.962184 -4.166524 -2.089815  0.564406
80 -2.909266 -4.591008 -3.264575 -0.337069
90 -3.819802 -4.885711 -4.710928 -1.528682


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f4a3cb9a90>

### Bar Plots

In [47]:
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f4a3e00908>

In [None]:
np.random.seed(12348)

In [48]:
df = pd.DataFrame(np.random.rand(6, 4),
                  index=['one', 'two', 'three', 'four', 'five', 'six'],
                  columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
print(df)
df.plot.bar()

Genus         A         B         C         D
one    0.246927  0.101336  0.155366  0.323460
two    0.467364  0.003187  0.052101  0.012485
three  0.075105  0.905738  0.015175  0.427970
four   0.907012  0.443965  0.136207  0.681712
five   0.995781  0.159971  0.450561  0.873033
six    0.879165  0.779880  0.174873  0.773729


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f4a1df34a8>

In [49]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [50]:
df.plot.barh(stacked=True, alpha=0.5)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f49dc64b00>

In [51]:
plt.close('all')

In [52]:
tips = pd.read_csv('examples/tips.csv')
party_counts = pd.crosstab(tips['day'], tips['size'])
print(party_counts)
# Not many 1- and 6-person parties
party_counts = party_counts.loc[:, 2:5]
party_counts

size  1   2   3   4  5  6
day                      
Fri   1  16   1   1  0  0
Sat   2  53  18  13  1  0
Sun   0  39  15  18  3  1
Thur  1  48   4   5  1  3


size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,16,1,1,0
Sat,53,18,13,1
Sun,39,15,18,3
Thur,48,4,5,1


In [53]:
# Normalize to sum to 1
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
print(party_pcts)
party_pcts.plot.bar()

size         2         3         4         5
day                                         
Fri   0.888889  0.055556  0.055556  0.000000
Sat   0.623529  0.211765  0.152941  0.011765
Sun   0.520000  0.200000  0.240000  0.040000
Thur  0.827586  0.068966  0.086207  0.017241


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f49ff08ef0>

In [54]:
plt.close('all')

In [56]:
import seaborn as sns
tips['tip_pct'] = tips['tip'] / tips['total_bill']
print(tips.head())
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

   total_bill   tip smoker  day    time  size   tip_pct
0       16.99  1.01     No  Sun  Dinner     2  0.059447
1       10.34  1.66     No  Sun  Dinner     3  0.160542
2       21.01  3.50     No  Sun  Dinner     3  0.166587
3       23.68  3.31     No  Sun  Dinner     2  0.139780
4       24.59  3.61     No  Sun  Dinner     4  0.146808


<matplotlib.axes._subplots.AxesSubplot at 0x1f4a5b4b128>

In [57]:
plt.close('all')

In [60]:
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f4a57359b0>

In [62]:
plt.close('all')

In [63]:
sns.set(style="whitegrid")

### Histograms and Density Plots

In [64]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [65]:
tips['tip_pct'].plot.hist(bins=50)

<matplotlib.axes._subplots.AxesSubplot at 0x1f4a5906eb8>

In [66]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [67]:
tips['tip_pct'].plot.density()

<matplotlib.axes._subplots.AxesSubplot at 0x1f4a5a60c88>

In [68]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [69]:
comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
print(comp1)
print(comp2)
values = pd.Series(np.concatenate([comp1, comp2]))
print(values)
sns.distplot(values, bins=100, color='k')

[-0.7114  0.4241 -0.3227 -0.6866 -0.3414  0.1225 -1.7056 -0.0662  0.2398
 -1.2243  0.5553  1.1975  0.1601  0.3771  0.9086  0.6433 -1.359   1.0102
  0.6043 -2.1663  0.1467  0.0987 -0.739   0.7918  0.3868 -0.9455  1.0575
 -0.3254  0.6275  0.0086 -0.0913 -0.0211  1.5912  0.4462  0.2113  1.2122
 -1.8345 -1.1362 -0.1177 -1.2153 -0.5147  0.2394  1.6259 -0.8986  0.4382
  0.374  -0.2213 -1.2534 -1.0548 -1.8443 -1.8514 -0.6669 -0.6797 -0.0111
  1.1626  0.9297  1.9944 -0.9779  0.7781  1.9751  1.8331  0.3908  0.1367
  0.646  -1.5658  0.9764 -1.2821  1.2227  0.4914 -2.5562  0.1255 -0.4626
 -0.7954 -1.3844 -0.7199 -0.4302  0.5555 -0.4134 -2.6572  1.7581 -0.9532
 -1.9345 -0.4674 -0.8493 -2.2066 -1.4146  1.4886  0.5803  0.7571  0.2262
 -0.8095  0.1495 -0.6913  0.7382  1.1666 -0.1363  1.3316 -0.0321  1.2908
 -0.3895  0.7956  0.5852  0.0509  1.0519  0.4402  1.4455 -0.9009  0.8413
  0.0389 -1.5133 -0.9464 -0.9839 -0.0407  0.336  -0.5298 -0.6929  1.3383
 -0.2526 -0.8081  1.4413  1.1239  0.2503  0.4485 -0

<matplotlib.axes._subplots.AxesSubplot at 0x1f4a5afbcc0>

### Scatter or Point Plots

In [70]:
macro = pd.read_csv('examples/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [71]:
plt.figure()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [72]:
sns.regplot('m1', 'unemp', data=trans_data)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5,1,'Changes in log m1 versus log unemp')

In [73]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x1f4a72117f0>

### Facet Grids and Categorical Data

In [74]:
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1f4a8fd4978>

In [75]:
sns.factorplot(x='day', y='tip_pct', row='time',
               col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1f4a940b160>

In [76]:
sns.factorplot(x='tip_pct', y='day', kind='box',
               data=tips[tips.tip_pct < 0.5])

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1f4a9bbb3c8>

## Other Python Visualization Tools

In [None]:
pd.options.display.max_rows = PREVIOUS_MAX_ROWS

## Conclusion