# seaborn:  statistical visualizations

<U>Notes if you are using Jupyter Notebook</U>:  to call <B>exit()</B> from a notebook, please use <B>sys.exit()</B> (requires <B>import sys</B>); if a strange error occurs, it may be because Jupyter retains variables from all executed cells.  To reset the notebook's variables, click 'Restart Kernel' (the circular arrow) -- this will not undo any text changes.  

### import seaborn and matplotlib.pyplot

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### scatter with multiple variables

In [None]:
tips = pd.read_csv('tips.csv')  # or sns.load_dataset("tips")
tips['pct'] = round(tips.tip / tips.total_bill * 100, 2)
tips = tips.loc[:, ['total_bill', 'tip', 'pct', 'sex', 'smoker', 'day', 'time', 'size']]

tips.head()

### 'relational plot':  plotting total bill against tip 

In [None]:
sns.relplot(x="total_bill", y="pct", data=tips);

### 'category plot':  plotting tip against party size

In [None]:
sns.catplot(x="size", y="total_bill", data=tips, kind='bar');

In [None]:
sns.relplot(x="total_bill", y="tip", col="time",
            hue="smoker", style="smoker", size="size",
            data=tips);

### line with multiple variables:  (firing rate x time), align, choice, coherence

In [None]:
dots = pd.read_csv('dots.csv')

In [None]:
sns.relplot(x="time", y="firing_rate", col="align",
            hue="choice", size="coherence", style="choice",
            facet_kws={'sharex': False},
            kind="line", legend="full", data=dots);

### line chart with stats estimation error bars (signal x timepoint), region, event:  highlights are "confidence level"

More complex datasets will have multiple measurements for the same value of the x variable. The default behavior in seaborn is to aggregate the multiple measurements at each x value by plotting the mean and the 95% confidence interval around the mean:

In [None]:
fmri = pd.read_csv('fmri.csv')
sns.relplot(x="timepoint", y="signal", col="region",
            hue="event", style="event",
            kind="line", data=fmri);

### line chart with linear regression (tip =x= total bill), smoker

In [None]:
sns.lmplot(x="total_bill", y="tip", col="time", hue="smoker",
           data=tips);

## sns.catplot:  categorical plot

### a categorical plot generalizes over different representations of the relationship between one numeric value and one or more categorical variables

#### 'swarm' scatter plot showing relationship between total bill, day of week and smoker/nonsmoker

In [None]:
sns.catplot(x="day", y="total_bill", hue="smoker",
            kind="swarm", data=tips);

#### 'violin' scatter plot, same but "arearizing" scatter

In [None]:
sns.catplot(x="day", y="total_bill", hue="smoker",
            kind="violin", split=True, data=tips);

#### 'bar' plot, showing mean of tip by day by smoker, with level of confidence

In [None]:
sns.catplot(x="day", y="total_bill", hue="smoker",
            kind="bar", data=tips);

### boxplot and scatterplot in a figure

#### boxplot: tips by day in quartiles; scatter: tips by day

In [None]:
import matplotlib.pyplot as plt
f, splts = plt.subplots(1, 2, sharey=True, figsize=(6, 4))
spl1 = sns.boxplot(     x="day",        y="tip",            data=tips, ax=splts[0])
spl2 = sns.scatterplot( x="total_bill", y="tip", hue="day", data=tips, ax=splts[1]);

### figure-level is set by side of each 'facet' (or 'axis'); 'aspect' controls aspect ratio

In [None]:
ax = sns.relplot(x="time", y="firing_rate", col="align",
                 hue="choice", size="coherence", style="choice",
                 height=4.5, aspect=2 / 3,
                 facet_kws=dict(sharex=False),
                 kind="line", legend="full", data=dots);

### jointplot - focuses on single relationship

In [None]:
iris = pd.read_csv('iris.csv')

In [None]:
sns.jointplot(x="sepal_length", y="petal_length", data=iris);

### pairplot - 'demo' plot - show all pairwise relationships and marginal distributions

In [None]:
sns.pairplot(data=iris, hue="species");

### setting style

In [None]:
sns.set(style="ticks", palette="muted")
sns.relplot(x="total_bill", y="tip", col="time",
            hue="smoker", style="smoker", size="size",
            data=tips);

### can also pass styling arguments to matplotlib

In [None]:
sns.relplot(x="total_bill", y="tip", col="time",
            hue="size", style="smoker", size="size",
            palette="YlGnBu", markers=["D", "o"], sizes=(10, 125),
            edgecolor=".2", linewidth=.5, alpha=.75,
            data=tips);