# Summary of VIS

This notebook contains a list of Python methods that were introduced in the VIS programming exercises.

### Import the libraries

In [None]:
import numpy as np
import pandas as pd

# next command ensures that plots appear inside the notebook
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns  # also improves the look of plots
sns.set()  # set Seaborn defaults
plt.rcParams['figure.figsize'] = 10, 5  # default hor./vert. size of plots, in inches
plt.rcParams['lines.markeredgewidth'] = 1  # to fix issue with seaborn box plots; needed after import seaborn

In [None]:
df_stocks = pd.read_csv('../datasets/stocks-monthly.csv',parse_dates=[0]).set_index('Date')

## A simple plot

In [None]:
df_stocks['NASDAQ'].plot();

## Title, axis labels, legend


In [None]:
ax_nasdaq = df_stocks['NASDAQ'].plot()
ax_nasdaq.set_title("Development of NASDAQ Composite index", size=16, weight='bold')
ax_nasdaq.set_xlabel('Date')
ax_nasdaq.set_ylabel("Index")
ax_nasdaq.legend(['NASDAQ']);

## Saving a plot


In [None]:
ax_nasdaq.get_figure().savefig('NASDAQ-composite.png')

## Line styles, colors, marker styles
### Line styles


In [None]:
mpl.lines.lineStyles

In [None]:
ax_nasdaq = df_stocks['NASDAQ'].plot(linestyle=':')

### Marker styles


In [None]:
mpl.markers.MarkerStyle.markers

In [None]:
ax_nasdaq = df_stocks['NASDAQ'].plot( marker='8' )

### Colors


In [None]:
mpl.colors.BASE_COLORS

In [None]:
ax_nasdaq = df_stocks['NASDAQ'].plot(color="r")

In [None]:
mpl.colors.cnames

## Plot range limits and plot size


In [None]:
ax_nasdaq = df_stocks['NASDAQ'].plot(figsize=(16,9))
ax_nasdaq.tick_params(labelsize=14)
ax_nasdaq.set_ylim(4000,6000)
ax_nasdaq.set_xlim('2015-01-01', '2016-12-31');

## Text and annotation


In [None]:
ax = df_stocks['NASDAQ'].plot(figsize=(9,6))
ax.set_ylim(0,8000)
ax.set_xlim('2007-01-01', '2018-01-01');  # limit the x-range

ax.text('2015-01-01', 6500, 'Where is this going?')  # add text; can use LaTeX; needs doubled backslashes
arrow = {'facecolor':'black', 'width':2, 'shrink':0.05}  # arrow properties
ax.annotate('Bankruptcy of Lehman Brothers', xy=('2008-09-15',2400), xytext=('2009-06-06',3500), arrowprops=arrow);  # add annotation

## Combining multiple plots


### Superimposing


In [None]:
ax_combined = df_stocks['GOOG'].plot()
df_stocks['AMZN'].plot(ax=ax_combined)
ax_combined.legend(['GOOG', 'AMZN']);

### Juxtaposition (subplots)


In [None]:
fig, ax = plt.subplots(nrows=2, ncols=2, squeeze=False, sharex=True, figsize=(10,10))
df_stocks['NASDAQ'].plot(ax=ax[0, 0])
df_stocks['AMZN'].plot(ax=ax[0, 1])
df_stocks['GOOG'].plot(ax=ax[1, 0])
df_stocks['MSFT'].plot(ax=ax[1, 1]);
ax[0, 0].set_title('NASDAQ')
ax[0, 1].set_title('AMZN')
ax[1, 0].set_title('GOOG')
ax[1, 1].set_title('MSFT')
ax[0, 0].set_ylabel('Price (USD)')
ax[1, 0].set_ylabel('Price (USD)')
fig.suptitle("Development of stocks over time", size=18, weight='bold');

## Scatter plots
### Scatter matrix


In [None]:
df_sd = pd.DataFrame(((x, v, c, e, not c)
                      for c in (False, True)
                      for x in np.random.sample(5000 if c else 20000)
                      for e in np.random.sample(1)
                      for v in [x**3 + (e-0.5)/10 if c else x + (e - 0.8)*0.7]
                      if 0 <= v < 1),
                     columns=list('abcef'))

In [None]:
# scatter matrix for pairs of colums 'a', 'b', and 'e'
pd.plotting.scatter_matrix(df_sd[['a', 'b', 'e']], diagonal='density', figsize=(10,10));

### Transparency (alpha setting), point size


In [None]:
# effect of alpha (transparency: 0 = transparent; 1 = opaque)
fig, ax = plt.subplots(ncols=2, sharey=True)
df_sd.plot(kind='scatter', x='a', y='b', c='blue', alpha=1, ax=ax[0])  # too high
df_sd.plot(kind='scatter', x='a', y='b', c='blue', alpha=0.005, ax=ax[1]);  # a bit low

In [None]:
# effect of size (s=)
fig, ax = plt.subplots(ncols=2, sharey=True, figsize=(11, 5))
df_sd.plot(kind='scatter', x='a', y='b', c='blue', s=10, ax=ax[0])  # too high
df_sd.plot(kind='scatter', x='a', y='b', c='blue', s=.01, ax=ax[1]);  # a bit low

### Modulating dot color or size


In [None]:
# modulate color (c=) by value in column 'e'
df_sd.plot(kind='scatter', x='a', y='b', c='e', figsize=(6,5));

## Plotting grouped data


In [None]:
ax = df_sd.groupby('c').plot(kind='scatter', x='a', y='b', c="blue", figsize=(6, 5))
ax[0].set_title('c is False', size=16)
ax[1].set_title('c is True' , size=16);

In [None]:
colors = {True: "k", False: "r"}

In [None]:
fig, ax = plt.subplots(figsize=(6, 5))
for name, group in df_sd.groupby('c'):
    group.plot(kind='scatter', x='a', y='b', c=colors[name], ax=ax)

# Seaborn and Pandas

In [None]:
iris = pd.read_csv('../datasets/iris.csv')
iris1 = iris[iris['species'] == 'Iris-setosa']
iris2 = iris[iris['species'] == 'Iris-versicolor']
iris3 = iris[iris['species'] == 'Iris-virginica']
iris2 = iris2.reset_index(drop=True)
iris3 = iris3.reset_index(drop=True)

### Histogram and density plot


In [None]:
sns.distplot(iris['petal length']);

In [None]:
sns.distplot(iris[['petal length']]);

In [None]:
sns.distplot(iris[['petal length']], kde=False);

In [None]:
sns.distplot(iris[['petal length']], hist=False);

## Strip plot


In [None]:
sns.stripplot(data=iris, x='species', y='petal length', jitter=True);

## Violin plot


In [None]:
sns.violinplot(data=iris, x='species', y='petal length');

## Heat map


In [None]:
levels = ["tiny", "small", "medium", "big", "large"]
iris["petal width level"] = pd.cut(iris["petal width"], len(levels), labels=levels)
iris["petal length level"] = pd.cut(iris["petal length"], len(levels), labels=levels)
iris_grouped = iris.groupby(["petal width level", "petal length level"]).count()
iris_grouped["count"] = iris_grouped["species"].fillna(0)
iris_matrix = iris_grouped["count"].unstack()

In [None]:
sns.heatmap(iris_matrix, square=True);