# Data visualistation with matplotlib


## 1. Import matplotlib modules

In [1]:
import matplotlib

Matplotlib has many different backends, usually its fine to stick to the default. But sometimes it is necessary to change the backend (before importing the plotting module ```pyplot```). The full list of available backends reads:

[u'pgf', u'cairo', u'MacOSX', u'CocoaAgg', u'gdk', u'ps', u'GTKAgg', u'nbAgg', u'GTK', u'Qt5Agg', u'template', u'emf', u'GTK3Cairo', u'GTK3Agg', u'WX', u'Qt4Agg', u'TkAgg', u'agg', u'svg', u'GTKCairo', u'WXAgg', u'WebAgg', u'pdf']

In [2]:
matplotlib.use('MacOSX')

When working from within a jupyter notebook, we can choose *matplotlib* backend with a magic function

In [3]:
%matplotlib --list

Available matplotlib backends: ['tk', 'gtk', 'gtk3', 'gtk4', 'wx', 'qt4', 'qt5', 'qt6', 'qt', 'osx', 'nbagg', 'webagg', 'notebook', 'agg', 'svg', 'pdf', 'ps', 'inline', 'ipympl', 'widget']


In [4]:
#% matplotlib inline
%matplotlib notebook

Now, we import ```pyplot``` the main plotting module of *matplotlib*

In [5]:
import matplotlib.pyplot as plt

## 2. The default plot

Let's create some data (normal distribution)

In [6]:
import numpy as np
data = np.random.normal(loc=5., scale=2., size=(10000,))

and plot a simple histogram of our data

In [8]:
fig = plt.figure()
ax  = fig.add_subplot(111)
ax.hist(data, range=(-5,15), bins=20, density=True)
plt.show()

<IPython.core.display.Javascript object>

## 3. Adding more to the plot

First, we are going to fit a Gaussian profile to our function

In [9]:
from scipy.stats import norm
mu,sigma = norm.fit(data)

and plot it on top of our histogram

In [10]:
fig = plt.figure()
ax  = fig.add_subplot(111)
ax.hist(data, range=(-5,15), bins=20, density=True)
ax.plot(np.arange(-5,15,0.1),norm.pdf(np.arange(-5,15,0.1),mu,sigma))
plt.show()

<IPython.core.display.Javascript object>

let's make a bit easier to read (colors, legends, axis lables, etc..)

In [13]:
fig = plt.figure()
ax  = fig.add_subplot(111)
ax.set_title('My fancy plot')
ax.hist(data, range=(-5,15), bins=20, density=True, label='data', color='0.75', lw=0.2)
ax.plot(np.arange(-5,15,0.1),norm.pdf(np.arange(-5,15,0.1),mu,sigma), label='fit', color='r')
ax.set_xlabel(r'Some variable $\varphi$')
ax.set_ylabel(r'Probability density')
ax.legend(loc=1)
plt.show()

<IPython.core.display.Javascript object>

## 4. More changes to the plot

Our plot looks already quite good, but some of the features don't give any extra information so why not removing them

In [14]:
fig = plt.figure()
ax  = fig.add_subplot(111)
ax.set_title('My fancy plot')
ax.hist(data, range=(-5,15), bins=20, density=True, label='data', color='0.75', lw=0.2)
ax.plot(np.arange(-5,15,0.1),norm.pdf(np.arange(-5,15,0.1),mu,sigma), label='fit', color='r')
ax.set_xlabel(r'Some variable $\varphi$')
ax.legend(loc=2, frameon=False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='x', which='both', bottom='on', top='off')
ax.tick_params(axis='y', which='both', left='off', right='off', labelleft='off')
plt.show()

<IPython.core.display.Javascript object>

## 5. Saving the plot as a PDF

In [16]:
fig = plt.figure()
ax  = fig.add_subplot(111)
ax.set_title('My fancy plot')
ax.hist(data, range=(-5,15), bins=20, density=True, label='data', color='0.75', lw=0.2)
ax.plot(np.arange(-5,15,0.1),norm.pdf(np.arange(-5,15,0.1),mu,sigma), label='fit', color='r')
ax.set_xlabel(r'Some variable $\varphi$')
ax.legend(loc=2, frameon=False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='x', which='both', bottom='on', top='off')
ax.tick_params(axis='y', which='both', left='off', right='off', labelleft='off')
fig.savefig('my_fancy_plot.pdf', format='pdf', bbox_inches='tight')

<IPython.core.display.Javascript object>

In [49]:
# Fixing random state for reproducibility
np.random.seed(48726587)

#generate data
x1 = np.random.randn(10000)
y1 = np.random.randn(10000)
x2 = np.random.randn(1000)
y2 = np.random.randn(1000)
x3 = np.random.randn(100)
y3 = np.random.randn(100)


#make figure and add a grid spec to assign space for each plot
fig = plt.figure(figsize=(8, 6))
gs = fig.add_gridspec(2, 2,  width_ratios=(3, 1), height_ratios=(1, 3),
                      left=0.1, right=0.9, bottom=0.1, top=0.9,
                      wspace=0.15, hspace=0.15)
# Create the Axes
ax_data  = fig.add_subplot(gs[1, 0])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histy = fig.add_subplot(gs[1, 1], sharey=ax)   

#plot the data
ax_data.scatter(x1, y1,c="b",label='sample: 10⁴')
ax_data.scatter(x2, y2,c="r",label='sample: 10³')
ax_data.scatter(x3, y3,c="g",label='sample: 10²')
ax_data.legend()

ax_histx.hist(x1,density=True,color="b",label='sample: 10⁴')
ax_histx.hist(x2,density=True,color="r",label='sample: 10³')
ax_histx.hist(x3,density=True,color="g",label='sample: 10²')
ax_histx.legend()

ax_histy.hist(x1,density=True,color="b",label='sample: 10⁴',orientation='horizontal')
ax_histy.hist(x2,density=True,color="r",label='sample: 10³',orientation='horizontal')
ax_histy.hist(x3,density=True,color="g",label='sample: 10²',orientation='horizontal')
ax_histy.legend()
  

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x2b1598cfc70>