---
title: Plotting with Python
---

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sns.set_style("darkgrid")

# make graphics sharper on a good screen
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('retina', 'png')

In [None]:
penguins = sns.load_dataset("penguins")
penguins.head()

In [None]:
plt.scatter(penguins.bill_length_mm, penguins.flipper_length_mm) ;  # semi colon makes last value None

In [None]:
sns.set_style("ticks")
# sns.set_style("darkgrid")
# sns.set_style("whitegrid")
# sns.set_style("white")
# sns.set_style("dark")

plt.scatter(penguins.bill_length_mm, penguins.flipper_length_mm)
sns.despine()

In [None]:
plt.hist(penguins.bill_length_mm) ;

# Better plotting with seaborn (on top of matplotlib)

In [None]:
sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm") ;

In [None]:
sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species") ;

In [None]:
sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species", style="sex") ;

In [None]:
sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species", 
                style="sex", size="body_mass_g") ;

In [None]:
def legend_outside():
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)

In [None]:
sns.scatterplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", hue="species", style="sex", size="body_mass_g") ;

legend_outside()

plt.title("Penguin measurements")
plt.ylabel("flipper length (mm)")
plt.xlabel("bill length in (mm)") ;

# Wide or long data

Wide format data:

In [None]:
penguins[['bill_length_mm', 'bill_depth_mm']]

Long format data:

In [None]:
long_df = penguins.melt(value_vars=['bill_length_mm', 'bill_depth_mm'])
long_df

long format is required when you want a number of columns to appear as a "variable" in the plot. As in the example below where the color "variable" reflects whether the point is `bill_depth_mm` or `bill_length_mm`.

Retain other information for each observation:

In [None]:
long_df = penguins.melt(id_vars=['species', 'body_mass_g', 'island'], value_vars=['bill_length_mm', 'bill_depth_mm'])
long_df

In [None]:
sns.scatterplot(data=long_df, x='body_mass_g', y='value', hue='variable', style='species')
legend_outside()

In [None]:
sns.boxplot(data=long_df, x='species', y='value', hue='variable') ;

In [None]:
sns.boxplot(data=long_df, x='variable', y='value', hue='species') ;

In [None]:
sns.boxplot(data=long_df, x='species', y='value', hue='variable') ;

# Plots with multiple facets

In [None]:
g = sns.FacetGrid(penguins, col="island")
g ;

Map plotting to each facet:

In [None]:
g = sns.FacetGrid(penguins, col="island", hue="species", height=3)
g.map(sns.scatterplot, "bill_length_mm", "flipper_length_mm") ;

Grid of facets representing combinations of two variables:

In [None]:
g = sns.FacetGrid(penguins, row="sex", col="island", hue="species", height=3) ;
g.map(sns.scatterplot, "bill_length_mm", "flipper_length_mm") ;

In [None]:
g = sns.FacetGrid(penguins, row="sex", col="island", hue="species", height=3) ;
g.map(sns.regplot, "bill_length_mm", "flipper_length_mm") ;

In [None]:
sns.lmplot(data=penguins, x="bill_length_mm", y="flipper_length_mm", row="sex", col="island", hue="species", height=2) ;

## FacetGrid.map vs. FacetGrid.map_dataframe

When you use `FacetGrid.map(func, "col1", "col2", ...)`, the function `func` is passed the values of the columns `"col1"` and `"col2"` (and more if needed) as parameters 1 and 2 (`args[0]`, `args[1]`, ...). In addition, the function always receives a keyword argument named `color=`.

In [None]:
def scatter(*args, **kwargs):
    return plt.scatter(args[0], args[1], **kwargs)
    
g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map(scatter, "bill_length_mm", "flipper_length_mm") ;

When you use `FacetGrid.map_dataframe(func, "col1", "col2", ...)`, the function `func` is passed the names `"col1"` and `"col2"` (and more if needed) as parameters 1 and 2 (`args[0]`, `args[1]`, ...), and the filtered dataframe as keyword argument `data=`. In addition, the function always receives a keyword argument named `color=`.

In [None]:
def scatterplot(*args, **kwargs):
    return sns.scatterplot(x=args[0], y=args[1], **kwargs)

g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map_dataframe(scatterplot, "bill_length_mm", "flipper_length_mm") ;

In [None]:
g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map(sns.histplot, "bill_length_mm") ;

In [None]:
g = sns.FacetGrid(penguins, row="sex", col="island", hue="species") ;
g.map(sns.kdeplot, "bill_length_mm") ;

In [None]:
sns.pairplot(penguins, hue="species") ;

In [None]:
sns.pairplot(penguins, hue="sex") ;