# Plotting

In [None]:
%matplotlib inline

## ... with Pandas

In [None]:
import pandas as pd

FNAME = "http://www.stat.ucla.edu/~rgould/datasets/twins.dat"

df = pd.read_csv(FNAME, sep='\t', na_values="?")

In [None]:
df.plot.scatter(x="AGE", y="DEDUC1")

In [None]:
df

In [None]:
df.plot.scatter(x='AGE', y='AGESQ', marker="x", color="green")

## ... adding matplotlib

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
df.plot.scatter(x='AGE', y='AGESQ', marker="x", color="green", ax=ax)

ax.set(ylabel="Age squared", title="Age vs. Age squared")

# Saving
fig.savefig("example_barplot.pdf")

In [None]:
# Multiple plots in one figure
fig, axes = plt.subplots(2, 2, sharey=True)
df.plot.scatter(x='AGE', y='AGESQ', ax=axes[0][0])
df.plot.scatter(x='AGE', y='AGESQ', ax=axes[0][1], color="green")

## ... after grouping

In [None]:
grouped = df.groupby(['WHITEL', 'MALEL'])
grouped.head()  # Looks like a DataFrame, but is none

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#aggregation
sizes = df.groupby(['WHITEH', 'MALEH']).size()
print(type(sizes))
print(sizes.head())

In [None]:
sizes = sizes.unstack()
sizes

In [None]:
sizes.T.plot(kind="bar", stacked=True)

In [None]:
sizes.plot.barh()

## ... with seaborn

In [None]:
import seaborn as sns

sns.set(style="white", palette="Set1")  # Set global style and color palette

### Statistical relationships

In [None]:
sns.relplot(x="EDUCH", y="LHRWAGEH", data=df, kind="line")

In [None]:
# Different lines and color by "MALEH"
sns.relplot(x="EDUCH", y="LHRWAGEH", data=df, kind="line",
            hue="MALEH", style="MALEH", markers=True)

In [None]:
# Different lines and color by "MALEH" and two plots by "WHITEH"
sns.relplot(x="EDUCH", y="LHRWAGEH", data=df, kind="line",
            hue="MALEH", style="MALEH", markers=True, col="WHITEH")

### Categorical data

In [None]:
sns.catplot(x="MALEH", y="LHRWAGEH", data=df, kind="box")

In [None]:
sns.catplot(x="MALEH", y="LHRWAGEH", data=df, kind="bar", ci=99)

### Regression plots

In [None]:
sns.regplot(x="LHRWAGEL", y="LHRWAGEH", data=df)

In [None]:
df.head()

### Heatmaps

In [None]:
cross = pd.crosstab(df["MALEL"], df["MALEH"])
cross

In [None]:
sns.heatmap(cross)  # try different cmap, e.g., "viridis"

### Matrix plots

In [None]:
cols = ["DLHRWAGE", "AGE", "LHRWAGEH", "LHRWAGEL"]
sns.pairplot(df, vars=cols, diag_kind='kde')