Plotting with matplotlib - 1
========================

In [2]:
# plotting imports
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# other imports
import numpy as np
import pandas as pd
from scipy import stats

Hello world
---

Using the `pyplot` notation, very similar to how MATLAB works

In [None]:
plt.plot([0, 1, 2, 3, 4],
         [0, 1, 2, 5, 10], 'bo-')
plt.text(1.5, 5, 'Hello world', size=14)
plt.xlabel('X axis\n($\mu g/mL$)')
plt.ylabel('y axis\n($X^2$)');

Hello world, reprise
---

Using the reccommended "object-oriented" (OO) style

In [None]:
fig, ax = plt.subplots()
ax.plot([0, 1, 2, 3, 4],
        [0, 1, 2, 5, 10], 'bo-')
ax.text(1.5, 5, 'Hello world', size=14)
ax.set_xlabel('X axis\n($\mu g/mL$)')
ax.set_ylabel('y axis\n($X^2$)');

In [None]:
# create some data
x = np.linspace(0, 2, 100)

In [None]:
fig, ax = plt.subplots()

ax.plot(x, x, label='linear')
ax.plot(x, x**2, label='quadratic')
ax.plot(x, x**3, label='cubic')

ax.set_xlabel('x label')
ax.set_ylabel('y label')
ax.set_title('Simple Plot')
ax.legend()

Controlling a figure aspect
---

In [None]:
# figure size
# width / height
fig, ax = plt.subplots(figsize=(9, 4))

ax.plot(x, x, label='linear')
ax.plot(x, x**2, label='quadratic')
ax.plot(x, x**3, label='cubic')

ax.set_xlabel('x label')
ax.set_ylabel('y label')
ax.set_title('Simple Plot')
ax.legend();

In [None]:
fig, ax = plt.subplots(figsize=(9, 4))

# change markers
ax.plot(x, x, '--', color='grey', label='linear')
ax.plot(x, x**2, '.-', color='red', label='quadratic')
ax.plot(x, x**3, '*', color='#3bb44a', label='cubic')

ax.set_xlabel('x label')
ax.set_ylabel('y label')
ax.set_title('Simple Plot')

# move the legend
ax.legend(loc='upper right');
# alternative ways to move it
# ax.legend(loc='center left',
#            bbox_to_anchor=(1, 0.5),
#            ncol=3);

Multiple panels
---

In [None]:
x1 = np.linspace(0.0, 5.0)
x2 = np.linspace(0.0, 2.0)

y1 = np.cos(2 * np.pi * x1) * np.exp(-x1)
y2 = np.cos(2 * np.pi * x2)

# rows, columns
fig, axes = plt.subplots(2, 1, figsize=(6, 4))

# axes is a list of "panels"
print(axes)

ax = axes[0]
ax.plot(x1, y1, 'o-')
ax.set_title('A tale of 2 subplots')
ax.set_ylabel('Damped oscillation')

ax = axes[1]
ax.plot(x2, y2, '.-')
ax.set_xlabel('time (s)')
ax.set_ylabel('Undamped');

Automagically adjust panels so that they fit in the figure
---

In [None]:
def example_plot(ax, fontsize=12):
    ax.plot([1, 2])

    ax.set_xlabel('x-label', fontsize=fontsize)
    ax.set_ylabel('y-label', fontsize=fontsize)
    ax.set_title('Title', fontsize=fontsize)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(4, 4),
                        constrained_layout=False)
print(axs)
for ax in axs.flat:
    example_plot(ax)

In [None]:
# warning: "constrained_layout" is an experimental feature
fig, axs = plt.subplots(2, 2, figsize=(4, 4),
                        constrained_layout=True)

for ax in axs.flat:
    example_plot(ax)

In [None]:
# alternative way
fig, axs = plt.subplots(2, 2, figsize=(4, 4), constrained_layout=False)

for ax in axs.flat:
    example_plot(ax)
    
# alternative to constrained_layout
plt.tight_layout();

Example of manipulating axes limits
---

Extra: a look at ways to choose colors
and manipulating transparency

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(9, 4))

# same plot for both panels
# we are just gonna change the axes' limits
for ax in axes:
    # more color choices
    # (see here for a full list: https://matplotlib.org/tutorials/colors/colors.html)
    
    # xkcd rgb color survey: https://xkcd.com/color/rgb/
    ax.plot(x, x, '--', color='xkcd:olive green', label='linear')
    # RGBA (red, green, blue, alpha)
    ax.plot(x, x**2, '.-', color=(0.1, 0.2, 0.5, 0.3), label='quadratic')
    # one of {'b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'}
    # they are the single character short-hand notations for:
    # blue, green, red, cyan, magenta, yellow, black, and white
    ax.plot(x, x**3, '*', color='m', label='cubic')
    # transparency can be manipulated with the "alpha" kwarg (= keyword argument)
    ax.plot(x, x**4, '-', color='b', linewidth=4, alpha=0.3, label='white house')

    ax.set_xlabel('x label')
    ax.set_ylabel('y label')
    ax.set_title('Simple Plot')

# only manipulate last axes
ax.set_ylim(1, 16.4)
ax.set_xlim(1.65, 2.03)

ax.legend(loc='center left',
          bbox_to_anchor=(1, 0.5),
          title='Fit');

Other sample plots using "vanilla" matplotlib
---

In [None]:
# scatter plot
fig, ax = plt.subplots(figsize=(6, 4))

N = 10
x = np.linspace(0, 1, N)
y = x ** 2
# colors is a list of colors
# in the same format as shown before
colors = np.linspace(0, 1, N)
# alternative
# colors = ['b', 'b', 'b',
#           'k', 'k', 'k',
#           'r', 'r', 'r',
#           'xkcd:jade']
area = 5 + (20 * x) ** 3

print(f'x: {x}')
print(f'y: {y}')
print(f'colors: {colors}')
print(f'area: {area}')

ax.scatter(x, y, s=area, c=colors,
           alpha=0.9,
           edgecolors='w', linewidths=3,
           label='Data')
ax.legend(loc='upper left');

In [None]:
# generate 2d random data
data = np.random.randn(2, 100)
data

In [None]:
# histogram
fig, axs = plt.subplots(1, 2, figsize=(6, 3))

bins = 25

axs[0].hist(data[0], bins=bins)
axs[1].hist2d(data[0], data[1], bins=bins);

Other useful tips
---

In [None]:
# scatter plot with log axes
fig, ax = plt.subplots(figsize=(6, 4))

N = 10
x = np.linspace(0, 10, N)
y = 2 ** x
colors = np.linspace(0, 1, N)
area = 500

ax.scatter(x, y, s=area, c=colors,
           alpha=0.9,
           edgecolors='w', linewidths=3,
           label='Data')
ax.set_yscale('log', base=10);

In [None]:
# scatter plot with log axes
fig, ax = plt.subplots(figsize=(6, 4))

N = 10
x = 10 ** np.linspace(1, 4, N)
y = x ** 2
colors = np.linspace(0, 1, N)
area = 500

ax.scatter(x, y, s=area, c=colors,
           alpha=0.9,
           edgecolors='w', linewidths=3,
           label='Data')
ax.set_yscale('log', base=2)
ax.set_xscale('log', base=10);

In [None]:
# changing colormap
# find an exhaustive list here:
# https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
fig, ax = plt.subplots(figsize=(6, 4))

N = 10
x = 10 ** np.linspace(1, 4, N)
y = x ** 2
colors = np.linspace(0, 1, N)
area = 500

ax.scatter(x, y, s=area, c=colors,
           alpha=0.9,
           edgecolors='w', linewidths=3,
           label='Data',
#            cmap='plasma',
#            cmap='jet',
#            cmap='Blues',
#            cmap='Blues_r',
           cmap='tab20',
          )
ax.set_yscale('log', base=2)
ax.set_xscale('log', base=10);

Saving your plot
---

In [None]:
fig, ax = plt.subplots(figsize=(3, 2))

N = 10
x = 10 ** np.linspace(1, 4, N)
y = x ** 2
colors = np.linspace(0, 1, N)
area = 500

ax.scatter(x, y, s=area, c=colors,
           alpha=0.9,
           edgecolors='w', linewidths=3,
           cmap='tab20',
           label='My awesome data is the best thing ever',
#            rasterized=True
          )

ax.legend(bbox_to_anchor=(1, 0.5),
          loc='center left')

ax.set_yscale('log', basey=2)
ax.set_xscale('log', basex=10)

plt.savefig('the_awesomest_plot_ever.png',
            dpi=300,
            bbox_inches='tight',
            transparent=True
           )
plt.savefig('the_awesomest_plot_ever.svg',
            dpi=300, bbox_inches='tight',
            transparent=True);

---


Exercises
---------

Using the data from this URL: https://evocellnet.github.io/ecoref/data/phenotypic_data.tsv

Can you make a scatterplot for the relationship between s-scores and the corrected p-value?

Can you make a scatterplot for the relationship between s-scores and the corrected p-value, but only considering two strains plotted with different colors?

Select four conditions and create a multipanel figure with the same scatterplot for each condition. Experiment with different layouts

Using the [Iris dataset](https://en.wikipedia.org/wiki/Iris_flower_data_set) (which you can find at `../data/iris.csv`), prepare the following plot: for each pair of variables, prepare a scatterplot with each species having its own color

Make the same series of plots as before but in a single figure

Make a single panel now, changing the dots' sizes according to the third variable