# Applied Visualizations

## Plotting with Pandas

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

In [2]:
plt.style.available

['_classic_test',
 'bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark-palette',
 'seaborn-dark',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'seaborn',
 'Solarize_Light2']

In [3]:
plt.style.use('seaborn-colorblind')

### DataFrame.plot

In [4]:
np.random.seed(123)

df = pd.DataFrame({'A': np.random.randn(365).cumsum(0),
                   'B': np.random.randn(365).cumsum(0) + 20,
                   'C': np.random.randn(365).cumsum(0) - 20}, 
                 index=pd.date_range('1/1/2017', periods=365))
df.head(50)

Unnamed: 0,A,B,C
2017-01-01,-1.085631,20.059291,-20.230904
2017-01-02,-0.088285,21.803332,-16.659325
2017-01-03,0.194693,20.835588,-17.055481
2017-01-04,-1.311601,21.255156,-17.093802
2017-01-05,-1.890202,21.462083,-19.518638
2017-01-06,-0.238765,19.210548,-18.412372
2017-01-07,-2.665444,18.621577,-18.964625
2017-01-08,-3.094357,19.752729,-19.025419
2017-01-09,-1.828421,19.887807,-19.553459
2017-01-10,-2.695161,18.675538,-19.692142


In [31]:
df.plot();

<IPython.core.display.Javascript object>

In [29]:
df.plot('A', 'B', kind='scatter')

<IPython.core.display.Javascript object>

In [32]:
# Size and Color based on value in B column
df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a2631ffd0>

In [8]:
ax = df.plot.scatter('A', 'C', c='B', s=df['B'], colormap='viridis')
ax.set_aspect('equal')
# The range of A is much smaller than the range of C

<IPython.core.display.Javascript object>

In [9]:
df.plot.box();

<IPython.core.display.Javascript object>

In [33]:
df.plot.hist(alpha=0.7, edgecolor='lightgray')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a23c025c0>

In [11]:
df.plot.kde();

<IPython.core.display.Javascript object>

### pandas.plotting

In [12]:
iris = pd.read_csv('data/iris.csv')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


### Scatter Matrix
Compare each column in a DataFrame to every other column in a pairwise fashion

In [36]:
pd.plotting.scatter_matrix(iris, hist_kwds={'edgecolor':'lightgray'});

<IPython.core.display.Javascript object>

### Parallel Coordinates
Each variable in the dataset corresponds to an equally spaced parallel vertical line. The values of each variable are connected by lines between for each individual observation.

We can see that petal length and petal width split the species clearly. Observe that Iris Virginica has the longest petal length and width, and Iris Setosa has the shortest petal length and width.

In [14]:
plt.figure()
pd.plotting.parallel_coordinates(iris, 'species');

<IPython.core.display.Javascript object>

## Seaborn

In [37]:
import seaborn as sns

# Figure aesthetics
# sns.set_style('darkgrid', {"xtick.major.size": 8, "ytick.major.size": 8})

In [38]:
np.random.seed(1234)

v1 = pd.Series(np.random.normal(0,10,1000), name='v1')
v2 = pd.Series(2*v1 + np.random.normal(60,15,1000), name='v2')

In [39]:
plt.figure()
plt.hist(v1, alpha=0.7, bins=np.arange(-50,150,5), label='v1', edgecolor='lightgray');
plt.hist(v2, alpha=0.7, bins=np.arange(-50,150,5), label='v2', edgecolor='lightgray');
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1a24ff2be0>

In [18]:
plt.figure()
plt.hist([v1, v2], histtype='barstacked', edgecolor='lightgray', normed=True)

v3 = np.concatenate((v1, v2))
# Estimates PDF of v3
sns.kdeplot(v3, color='black');

<IPython.core.display.Javascript object>

In [19]:
plt.figure()
sns.distplot(v3, hist_kws={'color': 'Teal', 'edgecolor':'lightgray', 'alpha':0.7}, kde_kws={'color': 'Navy'})

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a1fd82b00>

### Joint Plot

Creates histograms along side a scatter plot.

We can see that v1 and v2 are normally distributed, and have a positive correlation.

In [43]:
sns.jointplot(v1, v2, alpha=0.8)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.JointGrid at 0x1a1f569470>

In [21]:
grid = sns.jointplot(v1, v2, alpha=0.7)
grid.ax_joint.set_aspect('equal')

<IPython.core.display.Javascript object>

In [22]:
sns.jointplot(v1, v2, kind='hex')

<IPython.core.display.Javascript object>

<seaborn.axisgrid.JointGrid at 0x1a203a5dd8>

In [23]:
# sns.set_style('white')
sns.jointplot(v1, v2, kind='kde', space=0)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.JointGrid at 0x1a20ba3710>

In [24]:
# Use iris dataset
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


### Pairplot

In [46]:
sns.pairplot(iris, hue='species', diag_kind='kde', palette='viridis', )

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x1a1ff93ef0>

### Violin plot

In [26]:
plt.figure(figsize=(8, 6))
plt.subplot(121)
sns.swarmplot('species', 'petal_length', data=iris)
plt.subplot(122)
sns.violinplot('species', 'petal_length', data=iris)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1a21cca748>

## Spurious Correlations

### link: http://www.tylervigen.com/spurious-correlations