# matplotlib - Plotting and Visualization

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
sns.set()
from pandas import DataFrame, Series
from matplotlib import pyplot as plt

## A Brief matplotlib API Primer

In [3]:
data = np.arange(10)
data

In [4]:
plt.plot(data)

### Figures and Subplots

A note when working with Jupyter Notebooks...

*One nuance of using Jupyter notebooks is that plots are reset after each cell is evaluated, so for more complex plots you must put all of the plotting commands in a single notebook cell.*

In [5]:
fig = plt.figure(figsize=(10, 7)) # new figure
ax1 = fig.add_subplot(2, 2, 1) # this means 2 * 2 (4 plots in total)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

# draws on the last plot (if not specified on what axis)
plt.plot(np.random.randn(50).cumsum(), 'k--') # k-- (black dashed line)

In [6]:
fig = plt.figure(figsize=(10, 7)) # new figure
ax1 = fig.add_subplot(2, 2, 1) # this means 2 * 2 (4 plots in total)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

# draws on the last plot
plt.plot(np.random.randn(50).cumsum(), 'k--') # k-- (black dashed line)

_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)

In [7]:
fig = plt.figure(figsize=(10, 7)) # new figure
ax1 = fig.add_subplot(2, 2, 1) # this means 2 * 2 (4 plots in total)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

# draws on the last plot
plt.plot(np.random.randn(50).cumsum(), 'k--') # -- (dashed line)

_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)

ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

In [8]:
# easier way to create a figure with multiple plots
fig, axes = plt.subplots(2, 3) 
axes

# these subplots are
# (0,0), (0,1), (0,2)
# (1,0), (1,1), (1,2)

#### Adjusting the spacing around subplots

In [9]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='red', alpha=0.5)
        
plt.subplots_adjust(wspace=0, hspace=0) # remove spaces between all plots

# wspace = width spacing
# hspace = height spacing

### Colors, Markers, and Line Styles

```ax.plot(x, y, 'g--')```

a more convenient way to see the code above

```ax.plot(x, y, linestyle='--', color='g')```

In [10]:
plt.plot(np.random.randn(30).cumsum(), 'ko--')

In [11]:
plt.plot(np.random.randn(30), color='k', linestyle='dashed', marker='o') # just like the code above just a bit more convinient

In [12]:
data = np.random.randn(30).cumsum()

In [13]:
plt.plot(data, 'k--', label='Default')

In [14]:
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post') # configuring draw style option (for line plots)

In [15]:
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k--', drawstyle='steps-post', label='steps-post') # configuring draw style option (for line plots)
plt.legend(loc='best') # showing plot legend, reflex on each labels

### Ticks, Labels, and Legends

#### Setting the title, axis labels, ticks, and ticklabels

In [16]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())

In [17]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ticks = ax.set_xticks([0, 250, 500, 750, 1000]) # setting x ticks manually
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'], rotation=30, fontsize='small') # setting x labels name
ax.set_title('This is a title') # plot title
ax.set_xlabel('Stages') # set title for x axis
ax.plot(np.random.randn(1000).cumsum())

In [18]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ticks = ax.set_xticks([0, 250, 500, 750, 1000]) # setting x ticks manually
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'], rotation=30, fontsize='small') # setting x labels name

props = {
    'title': 'This is a title',
    'xlabel': 'Stages'
}
ax.set(**props) # can also do this

ax.plot(np.random.randn(1000).cumsum())

#### Adding legends

In [19]:
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)

ax.plot(np.random.randn(1000).cumsum(), 'k', label='one')
ax.plot(np.random.randn(1000).cumsum(), 'k--', label='two')
ax.plot(np.random.randn(1000).cumsum(), 'k.', label='three')
ax.legend(loc='best') # best will choose a location that is most out of the way

### Annotations and Drawing on a Subplot

In [20]:
from datetime import datetime

fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(1, 1, 1)

# Example from S&P 500 index price since 2007
data = pd.read_csv('../input/pandas-data-loading-storage-file-formats/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy'),
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date)+75),
                xytext=(date, spx.asof(date)+225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2, headlength=4),
                horizontalalignment='left',
                verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

In [21]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]], color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

### Saving Plots to File

In [22]:
plt.savefig('figpath.svg')

In [23]:
# optimal way
plt.savefig('figpath.png', dpi=400, bbox_inches='tight')

### matplotlib Configuration

In [24]:
plt.rc('figure', figsize=(10, 10)) # sets all figure to 10 x 10 globally

In [26]:
font_options = {'family' : 'monospace',
                'weight' : 'bold',
                'size' : 10}
plt.rc('font', **font_options) # sets to all fonts

# 'figure', 'axes', 'xtick', 'ytick', 'grid', 'legend'

## Plotting with pandas and seaborn

### Line Plots

In [27]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()

In [28]:
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))
df.plot()

### Bar Plots

In [30]:
fig, axes = plt.subplots(2, 1)

data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
print(data)

data.plot.bar(ax=axes[0], color='k', alpha=0.7)

data.plot.barh(ax=axes[1], color='k', alpha=0.7)

In [33]:
df = DataFrame(np.random.rand(6, 4),
               index=['one', 'two', 'three', 'four', 'five', 'six'],
               columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus')) # Genus => legend
df

In [34]:
df.plot.bar()

In [38]:
df.plot.barh(stacked=True, alpha=0.5)

In [41]:
tips = pd.read_csv('../input/pandas-data-loading-storage-file-formats/tips.csv')
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts

In [42]:
party_counts = party_counts.loc[:, 2: 5]
party_pcts = party_counts.div(party_counts.sum(1), axis=0) # normalize to sum to 1
party_pcts

In [43]:
party_pcts.plot.bar()

In [44]:
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()

In [45]:
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

In [46]:
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

In [47]:
sns.set(style='whitegrid')

### Histograms and Density Plots

In [48]:
tips['tip_pct'].plot.hist(bins=50)

In [49]:
tips['tip_pct'].plot.density()

In [52]:
comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
values = pd.Series(np.concatenate([comp1, comp2]))

sns.distplot(values, bins=100, color='k') # distplot plots density and histogram simultaneously

### Scatter or Point Plots

In [53]:
macro = pd.read_csv('../input/pandas-data-loading-storage-file-formats/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
data

In [56]:
trans_data = np.log(data).diff().dropna()
trans_data

In [57]:
sns.regplot('m1', 'unemp', data=trans_data) # regression/scatter plot
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

In [58]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2}) # great for exploratory data analysis

### Facet Grids and Categorical Data

Facet grid is often useful to visualize data with many categorical variables.

In [59]:
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker', kind='bar', data=tips[tips.tip_pct < 1])

In [61]:
sns.factorplot(x='day', y='tip_pct', row='time', col='smoker', kind='bar', data=tips[tips.tip_pct < 1])

In [62]:
sns.factorplot(x='tip_pct', y='day', kind='box', data=tips[tips.tip_pct < 0.5])