# Visualizing with the `matplotlib`)
***

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

#import seaborn as sns
#sns.set()
#sns.set_context('notebook', font_scale=1.5)
#cp = sns.color_palette()

<br/>

### Thing 1: Line Chart (with many lines)
***

In [2]:
ts = pd.read_csv('data/ts.csv')

# casting to datetime is important for
# ensuring plots "just work"
ts = ts.assign(dt = pd.to_datetime(ts.dt))
ts.head()

Unnamed: 0,dt,kind,value
0,2000-01-01,A,1.442521
1,2000-01-02,A,1.98129
2,2000-01-03,A,1.586494
3,2000-01-04,A,1.378969
4,2000-01-05,A,-0.277937


In [None]:
# in matplotlib-land, the notion of a "tidy"
# dataframe matters not
dfp = ts.pivot(index='dt', columns='kind', values='value')
dfp.head()

In [None]:
fig, ax = plt.subplots(1, 1,
                       figsize=(7.5, 5))

for k in ts.kind.unique():
    tmp = ts[ts.kind == k]
    ax.plot(tmp.dt, tmp.value, label=k)

ax.set(xlabel='Date',
       ylabel='Value',
       title='Random Timeseries')    
    
ax.legend(loc=2)
fig.autofmt_xdate()

In [None]:
fig, ax = plt.subplots(1, 1,
                       figsize=(7.5, 5))

ax.plot(dfp)

ax.set(xlabel='Date',
       ylabel='Value',
       title='Random Timeseries')

ax.legend(dfp.columns, loc=2)
fig.autofmt_xdate()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7.5, 7.5))

def scatter(group):
    plt.plot(group['dt'],
             group['value'],
             label=group.name)

ts.groupby('kind').apply(scatter)

ax.set(xlabel='Petal Length',
       ylabel='Petal Width',
       title='Petal Width v. Length -- by Species')

ax.legend(loc=2)

<br/>

### Thing 2: Scatter
***

In [None]:
df = pd.read_csv('data/iris.csv')
df.head()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7.5, 7.5))

for i, s in enumerate(df.species.unique()):
    tmp = df[df.species == s]
    ax.scatter(tmp.petalLength, tmp.petalWidth,
               label=s)

ax.set(xlabel='Petal Length',
       ylabel='Petal Width',
       title='Petal Width v. Length -- by Species')

ax.legend(loc=2)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7.5, 7.5))

for i, s in enumerate(df.species.unique()):
    tmp = df[df.species == s]
    ax.scatter(tmp.petalLength, tmp.petalWidth,
               label=s, color=cp[i])

ax.set(xlabel='Petal Length',
       ylabel='Petal Width',
       title='Petal Width v. Length -- by Species')

ax.legend(loc=2)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7.5, 7.5))

def scatter(group):
    plt.plot(group['petalLength'],
             group['petalWidth'],
             'o', label=group.name)

df.groupby('species').apply(scatter)

ax.set(xlabel='Petal Length',
       ylabel='Petal Width',
       title='Petal Width v. Length -- by Species')

ax.legend(loc=2)

<br/>

### Thing 3: Trellising the Above
***

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(10, 10))

for i, k in enumerate(ts.kind.unique()):
    ax = plt.subplot(int('22' + str(i + 1)))
    tmp = ts[ts.kind == k]
    ax.plot(tmp.dt, tmp.value, label=k, c=cp[i])

    ax.set(xlabel='Date',
           ylabel='Value',
           title=k)    
    
fig.autofmt_xdate()
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 5))

for i, s in enumerate(df.species.unique()):
    tmp = df[df.species == s]

    ax[i].scatter(tmp.petalLength, tmp.petalWidth, c=cp[i])

    ax[i].set(xlabel='Petal Length',
              ylabel='Petal Width',
              title=s)

fig.tight_layout()

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 5))

for i, s in enumerate(df.species.unique()):
    tmp = df[df.species == s]

    ax[i].scatter(tmp.petalLength,
                  tmp.petalWidth,
                  c=cp[i])

    ax[i].set(xlabel='Petal Length',
              ylabel='Petal Width',
              title=s)

    ax[i].set_ylim(bottom=0, top=1.05*np.max(df.petalWidth))
    ax[i].set_xlim(left=0, right=1.05*np.max(df.petalLength))
    
fig.tight_layout()

In [None]:
tmp_n = df.shape[0] - df.shape[0]/2

df['random_factor'] = np.random.permutation(['A'] * tmp_n + ['B'] * (df.shape[0] - tmp_n))
df.head()

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(15, 10))

# this is preposterous -- don't do this
for i, s in enumerate(df.species.unique()):
    for j, r in enumerate(df.random_factor.sort_values().unique()):
        tmp = df[(df.species == s) & (df.random_factor == r)]

        ax[j][i].scatter(tmp.petalLength,
                         tmp.petalWidth,
                         c=cp[i+j])

        ax[j][i].set(xlabel='Petal Length',
                     ylabel='Petal Width',
                     title=s + '--' + r)

        ax[j][i].set_ylim(bottom=0, top=1.05*np.max(df.petalWidth))
        ax[j][i].set_xlim(left=0, right=1.05*np.max(df.petalLength))
    
fig.tight_layout()

<br/>

### Thing 4: Visualizing Distributions (Boxplot and Histogram)
***

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))

ax.boxplot([df[df.species == s]['petalWidth'].values
                for s in df.species.unique()])

ax.set(xticklabels=df.species.unique(),
       xlabel='Species',
       ylabel='Petal Width',
       title='Distribution of Petal Width by Species')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))

for i, s in enumerate(df.species.unique()):
    tmp = df[df.species == s]
    ax.hist(tmp.petalWidth, label=s, alpha=.8)

ax.set(xlabel='Petal Width',
       ylabel='Frequency',
       title='Distribution of Petal Width by Species')    

ax.legend(loc=1)

<br/>

### Thing 5: Bar Chart
***

In [None]:
df = pd.read_csv('data/titanic.csv')
df.head()

In [None]:
dfg = df.groupby(['survived', 'pclass']).agg({'fare': 'mean'})
dfg

In [None]:
died = dfg.loc[0, :]
survived = dfg.loc[1, :]

In [None]:
# more or less copied from matplotlib's own
# api example
fig, ax = plt.subplots(1, 1, figsize=(12.5, 7))

N = 3

ind = np.arange(N)  # the x locations for the groups
width = 0.35        # the width of the bars

rects1 = ax.bar(ind, died.fare, width, color='r')
rects2 = ax.bar(ind + width, survived.fare, width, color='y')

# add some text for labels, title and axes ticks
ax.set_ylabel('Fare')
ax.set_title('Fare by survival and class')
ax.set_xticks(ind + width)
ax.set_xticklabels(('First', 'Second', 'Third'))

ax.legend((rects1[0], rects2[0]), ('Died', 'Survived'))


def autolabel(rects):
    # attach some text labels
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
                '%d' % int(height),
                ha='center', va='bottom')

ax.set_ylim(0, 110)        

autolabel(rects1)
autolabel(rects2)

plt.show()