# Data visualization in Python
## with Matplotlib

Setting the notebook figure embedding

*inline* prints the plots as png files in the notebook

*notebook* allows for intervactive figures within the notebook 

In [None]:
%matplotlib notebook

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
import matplotlib as mpl

Making a simple plot

In [None]:
x = np.linspace(0, 2, 100)
y = x**2
fig = plt.figure(figsize=(8,4)) 
ax = plt.gca()
ax.plot(x,y)

Adding more lines to the plot

In [None]:
ax.plot(x, x)  
ax.plot(x, x**0.5)  

Labeling the lines

In [None]:
ax.lines[0].set_label('quadratic')
ax.lines[1].set_label('linear')
ax.lines[2].set_label('square root')

Add a legend

In [None]:
ax.legend()

Adding labels

In [None]:
ax.set(
    xlabel='x value',
    ylabel='y value',
    title='Line example')

Changing the line colors

In [None]:
from colorsys import hsv_to_rgb

ax.lines[0].set_color('red')
ax.lines[1].set_color((0.1,0.7,0.05))
ax.lines[2].set_color(hsv_to_rgb(44/360,0.62,0.95))

Adding dashes to the lines

In [None]:
ax.lines[0].set_dashes([4,4])
ax.lines[1].set_dashes([2,2,4,2])

Increase the line thickness

In [None]:
for line in ax.lines:
    line.set_linewidth(2)

Restoring the legend

In [None]:
ax.legend(framealpha=0,
          loc='upper left')

Set the axis limits

In [None]:
ax.set(xlim=(0,2),
       ylim=(0,4))

Set the axis ticks

In [None]:
ax.set(xticks=np.linspace(0, 2, 3),
       yticks=np.linspace(0, 4, 5))

Removing the box

In [None]:
ax.spines["right"].set_visible(False) 
ax.spines["top"].set_visible(False) 

Changing the fonts

In [None]:
ax.title.set_fontsize(20)
ax.xaxis.label.set_fontsize(16)
ax.yaxis.label.set_fontsize(16)

Scaling the figure to include the larger labels

In [None]:
fig.tight_layout()

Background patches

In [None]:
ax.patch.set_color([0.9,0.9,0.9])
fig.patch.set_color([0.8,0.8,0.8])

Removing the background

In [None]:
ax.patch.set_visible(False)
fig.patch.set_visible(False)

Exporting the figure

In [None]:
plt.savefig('line_example.png', 
            dpi=600, 
            facecolor='none', 
            edgecolor='none', 
            pad_inches=0.1)

## Plotting directly from a dataframe

In [None]:
df = pd.DataFrame({
    'apples': [3e6, 2e6, 0, 1e6], 
    'oranges': [6e6, 1e6, 3e6, 3e6], 
    'bananas': [0, 3e6, 7e6, 2e6]},index=['Monday','Tuesday','Wednesday','Thursday'])

In [None]:
df.plot()

With more markup

In [None]:
ax = df.plot(y=['apples','oranges'], 
             kind='bar', 
             subplots=True, 
             sharey=True, 
             legend=False, 
             color=[hsv_to_rgb(110/360,0.55,0.86), 
                    hsv_to_rgb(35/360,0.80,0.96)])

Formatting the y-label

In [None]:
ax[0].yaxis.set_major_formatter(mpl.ticker.EngFormatter(unit='€', places=0))

Resizing the figure to show the full labels

In [None]:
fig = plt.gcf()
fig.tight_layout()

## Making a graph with uncertainty bounds

In [None]:
import datetime as dtime

y = np.linspace(0, 2, 101)
t = pd.Series([dtime.datetime(2020,1,1) + dtime.timedelta(days=i*60) for i in y])

fig, ax = plt.subplots() 
ax.plot(t, 
        y, 
        label='median', 
        linewidth=3, 
        color=hsv_to_rgb(3/360,0.55,0.86))
ax.fill_between(t, 
                0.85*y, 
                1.2*y, 
                color=hsv_to_rgb(3/360,0.55,0.86), 
                alpha= 0.1, 
                label='40-60%')
ax.fill_between(t, 
                0.75*y, 
                1.4*y, 
                color=hsv_to_rgb(3/360,0.55,0.86), 
                alpha= 0.1, 
                label='25-75%')
ax.fill_between(t, 
                0.5*y, 
                1.8*y, 
                    color=hsv_to_rgb(3/360,0.55,0.86), 
                alpha= 0.1, 
                label='10-90%')

Change the layout

In [None]:
ax.set(
    ylabel='y value',
    title='Line with bounds example',
    xlim=(dtime.datetime(2020,1,1), dtime.datetime(2020,5,1)),
    ylim=(0,3),
    yticks=np.linspace(0, 3, 4))

ax.spines["right"].set_visible(False) 
ax.spines["top"].set_visible(False) 

legend = ax.legend(loc='upper left', framealpha=0.0) 

fix the legend

In [None]:
for idx, legend_entry in enumerate(legend.get_patches()):
    legend_entry.set_alpha(0.1 * (3-idx))

format the dates

In [None]:
ax.xaxis.set_major_locator(mpl.dates.MonthLocator(interval=1))

## Scatter plot

In [None]:
from sklearn.datasets import load_boston

data = load_boston()
df = pd.DataFrame(data['data'], columns=data['feature_names'])
df.head()

Plot the number of rooms versus the distances from the employment centres

In [None]:
fig, ax = plt.subplots()
ax.plot(df['DIS'], 
        df['RM'], 
        'o', 
        markersize=6, 
        label='Neighbourhoods', 
        color='dimgray', 
        alpha=0.2)

Highlight a certain point

In [None]:
ax.plot(df['DIS'][88], 
        df['RM'][88], 
        'o', 
        markersize=8,  
        label='Interesting neighbourhood', 
        color=hsv_to_rgb(3/360,0.55,0.86)) 

Do some layout changes

In [None]:
ax.set(
    xlabel='weighted distances to five Boston employment centres',
    ylabel='average number of rooms per dwelling')

ax.yaxis.set_major_locator(plt.MaxNLocator(5))
ax.xaxis.set_major_locator(plt.MaxNLocator(5))

ax.set_title(label='Boston housing prices dataset', pad=20)

ax.spines["right"].set_visible(False) 
ax.spines["top"].set_visible(False) 

legend = ax.legend(loc='lower center', framealpha=0.9)

## Horizontal bar chart

In [None]:
x = np.array([38, 48, 56, 23, 16, 27, 11])
y = np.array(['Python','C++','Java','C','R','Rust','Julia'])

Generate a sorting vector and a list of colors

In [None]:
x_ind = np.argsort(x)
color_list = ['silver' for i in y]
color_list[np.where(y[x_ind]=='R')[0][0]] = hsv_to_rgb(3/360,0.55,0.85)

Plot the bar chart

In [None]:
fig, ax = plt.subplots()  
bars = ax.barh(y[x_ind], 
               x[x_ind], 
               linewidth=0, 
               color=color_list)
ax.set_title(label='Programming languages')

Removing the box around the plot

In [None]:
ax.spines["right"].set_visible(False) 
ax.spines["top"].set_visible(False) 
ax.spines["bottom"].set_visible(False) 

Removing all the thicks, but keeping the labels

In [None]:
ax.tick_params(top=False, 
               bottom=False, 
               left=False, 
               right=False, 
               labelleft=True, 
               labelbottom=False)

Adding the value to the bars

In [None]:
for bar in bars:
        text_x = bar.get_width() - ax.get_xlim()[1] * 0.06
        text_y = bar.get_y() + bar.get_height() / 2
        ax.text(text_x, 
                text_y, 
                bar.get_width(), 
                va='center', 
                color='white', 
                fontsize=12)

## Making a waterfall chart

In [None]:
x_val = np.array([405, -195, -260, -190, 360, 120]).astype(np.float)

x0 = x_val.copy()
x0[1:5] = np.nan

x1 = np.cumsum(x_val)

x2 = -1*(x_val)
x2[[0,5]] = np.nan

y = np.array(['EBITDA','Amortization','Depreciation','Interest','Taxes','Profit'])

Plotting the bar charts

In [None]:
fig, ax = plt.subplots(constrained_layout=True)  

ax.bar(y, 
       x0, 
       color='silver')

bars = ax.bar(y, 
              x2, 
              bottom=x1, 
              color=['k',
                     hsv_to_rgb(3/360,0.55,0.85),
                     hsv_to_rgb(3/360,0.55,0.85),
                     hsv_to_rgb(3/360,0.55,0.85),
                     hsv_to_rgb(116/360,0.35,0.70),
                     'k'])

Set layout

In [None]:
ax.set_title(label='EBITDA to profit')

ax.set_yticks(ticks=[])
ax.set_ylim([-150, 450])

ax.spines["right"].set_visible(False) 
ax.spines["top"].set_visible(False) 
ax.spines["left"].set_visible(False)
ax.spines['bottom'].set_visible(False)

Rotate the labels

In [None]:
plt.tick_params(axis='x', 
                which='both', 
                bottom=False, 
                top=False, 
                labelbottom=True, 
                rotation=45)

Adding the labels to the bars

In [None]:
x_pos = x_val.copy()
x_pos[1:5] = x1[0:4]
x_pos[4] = x1[4]

for index, val in enumerate(x_val):
    ax.annotate(f'{int(val)}',
                (index,x_pos[index]+np.max(x_pos)*0.02),
                ha='center')

Adding a horizontal line at the origin

In [None]:
ax.axhline(0, color='grey', lw=0.5) 

Move the line to the back

In [None]:
ax.lines[0].set_zorder(0)

## Subplot histograms

In [None]:
np.random.seed = 22

x1 = np.random.randn(500000)
x2 = 2*np.random.randn(500000)
x3 = 1.7*np.random.randn(500000)
x4 = 0.75*np.random.randn(500000)
x5 = 1.2*np.random.randn(500000)
x6 = 1.45*np.random.randn(500000)
x = [x1,x2,x3,x4,x5,x6]

Setup the subplots

In [None]:
fig, axes = plt.subplots(ncols=3, 
                         nrows=2, 
                         constrained_layout=False, 
                         sharex=True, 
                         sharey=True)
flat_axes = [ax for ax_list in axes for ax in ax_list]
for idx, ax in enumerate(flat_axes):
    bins = ax.hist(x[idx], 
                   color=plt.cm.bone(np.linspace(0.1,0.9,len(flat_axes)+1)[idx]),
                   bins=100, 
                   density=True, 
                   label='data A', 
                   alpha=0.2) 
    ax.hist(x[idx], 
            color=plt.cm.bone(np.linspace(0.1,0.9,len(flat_axes)+1)[idx]),
            bins=bins[1], 
            density=True, 
            label='data A', 
            alpha=1, 
            histtype=u'step') 


Set the titles of each of the histograms

In [None]:
axes[0,0].set_title(label='Data A')
axes[0,1].set_title(label='Data B')
axes[0,2].set_title(label='Data C')
axes[1,0].set_title(label='Data D')
axes[1,1].set_title(label='Data E')
axes[1,2].set_title(label='Data F')

Adjust the layout of the rest of the figure

In [None]:
for ax in flat_axes:
    ax.set(
        xlim=(-6, 6),
        xticks=np.linspace(-6, 6, 5),
        yticks=[])
    ax.spines["right"].set_visible(False) 
    ax.spines["left"].set_visible(False) 
    ax.spines["top"].set_visible(False) 

Set a title for the figure

In [None]:
fig.suptitle("Some normal distributions", fontsize=14)

Fixed the figure size

In [None]:
 fig.subplots_adjust(right=0.8)

Add a legend

In [None]:
colors = [plt.cm.bone(np.linspace(0.1,0.9,len(flat_axes)+1)[idx]) for idx in range(len(flat_axes))]
labels = ['Data ' + chr(65 + idx) for idx in range(len(flat_axes))]

handles = [plt.Rectangle((0,0),1,1, color = color_label, alpha=0.3) for color_label in colors]
fig.legend(handles, labels, loc='center right', bbox_to_anchor=(1 , 0.5), ncol=1, prop={'size': 10})

## Interactive plots with Matplotlib

In [None]:
import ipywidgets as widgets

### Sampling data for a histogram

Generate the buttons and the color pickers

In [None]:
button_left = widgets.Button(
    description='New distribution',
    disabled=False,
    button_style='', 
    value=0,
    tooltip='Generate a new distribution',
    layout=widgets.Layout(width='100pt', grid_area='button_left'))

button_right = widgets.Button(
    description='New distribution',
    disabled=False,
    button_style='',
    value=1,
    tooltip='Generate a new distribution',
    layout=widgets.Layout(width='100pt', grid_area='button_right'))

color_left = widgets.ColorPicker(
    concise=False,
    description='',
    value='blue',
    disabled=False,
    tooltip='Pick a color',
    layout=widgets.Layout(width='100pt', grid_area='color_left'))

color_right = widgets.ColorPicker(
    concise=False,
    description='',
    value='red',
    disabled=False,
    tooltip='Pick a color',
    layout=widgets.Layout(width='100pt', grid_area='color_right'))

color_pickers = [color_left, color_right]

gridbox = widgets.GridBox(
        children=[button_left, button_right, color_left, color_right],
        layout=widgets.Layout(
            justify_items='center',
            width='680pt',
            grid_template_rows='auto',
            grid_template_columns='50% 50%',
            grid_template_areas="""
            'button_left button_right'            
            'color_left color_right'
            """))

Generate the inital plot of the histograms from before

In [None]:
fig, axes = plt.subplots(ncols=2, 
                         nrows=1, 
                         constrained_layout=True, 
                         sharex=True, 
                         sharey=True, 
                         figsize=(9,3))

np.random.seed = 22
x1 = np.random.randn(1000)
x2 = np.random.randn(1000)
x = [x1,x2]

for idx, ax in enumerate(axes):
    bins = ax.hist(x[idx], 
                   color=color_pickers[idx].value, 
                   bins=100, 
                   density=True, 
                   alpha=0.2) 
    ax.hist(x[idx], 
            color=color_pickers[idx].value, 
            bins=bins[1], 
            density=True, 
            alpha=1, 
            histtype=u'step') 

for ax in axes:
    ax.set(
        xlim=(-6, 6),
        xticks=np.linspace(-6, 6, 5),
        yticks=[])
    ax.spines["right"].set_visible(False) 
    ax.spines["left"].set_visible(False) 
    ax.spines["top"].set_visible(False) 

Show the buttons

In [None]:
gridbox

Add a callback which redraws the graphs

In [None]:
def generate_new_hist(b):
    global x1
    global x2
    if b.layout.grid_area == 'button_left' : 
        ind = 0
        x1 = np.random.randn(1000)
    elif b.layout.grid_area == 'button_right' : 
        ind =1
        x2 = np.random.randn(1000)
    x = [x1,x2]
    axes[ind].clear()
    bins = axes[ind].hist(x[ind], 
                   color=color_pickers[ind].value, 
                   bins=100, 
                   density=True, 
                   alpha=0.2) 
    axes[ind].hist(x[ind], 
            color=color_pickers[ind].value, 
            bins=bins[1], 
            density=True, 
            alpha=1, 
            histtype=u'step') 
    
    axes[ind].set(
        xlim=(-6, 6),
        xticks=np.linspace(-6, 6, 5),
        yticks=[])
    axes[ind].spines["right"].set_visible(False) 
    axes[ind].spines["left"].set_visible(False) 
    axes[ind].spines["top"].set_visible(False) 

button_left.on_click(generate_new_hist)
button_right.on_click(generate_new_hist)

### Adjusting a sine wave

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))
ax.set_ylim([-4, 4])
ax.grid(False)
ax.spines["right"].set_visible(False) 
ax.spines["top"].set_visible(False) 
ax.spines["left"].set_visible(False) 
ax.spines["bottom"].set_visible(False) 
ax.set(yticks=[],
       xticks=[])
plt.show()
ax.axvline(np.pi, color=[0.7,0.7,0.7])

In [None]:
@widgets.interact(length=(0.1, 10, 0.1), 
                  amp=(0.1, 4, .1), 
                  shift=(0, 2*np.pi, 0.01*np.pi))

def update(length = 10, amp=1, shift=0):
    if len(ax.lines) > 1:
        del ax.lines[1]
    ax.plot(np.linspace(0, 2 * np.pi, 1000), 
            amp*np.sin((10/length) * (np.linspace(0, 2 * np.pi, 1000)-shift)), 
            color='blue')

## Setting the parameters for all the plots

In [None]:
df.plot(x='AGE', y=['RM','NOX','DIS'], marker='o', lineWidth=0, alpha=0.2)

Setting the font and colors according to the DNB housestyle

In [None]:
DNB_black = [43/255, 50/255, 60/255]

mpl.rcParams['text.color'] = DNB_black
mpl.rcParams['xtick.color'] = DNB_black
mpl.rcParams['ytick.color'] = DNB_black
mpl.rcParams['axes.labelcolor'] = DNB_black
mpl.rcParams['axes.edgecolor'] = DNB_black
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['Verdana']
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['figure.autolayout'] = True
mpl.rcParams['legend.frameon'] = False

from cycler import cycler
mpl.rcParams['axes.prop_cycle'] = cycler(color=[[254/255, 203/255 ,0/255], 
                                                [89/255, 71/255 ,70/255], 
                                                [155/255, 144/255 ,0/255], 
                                                [22/255, 34/255 ,87/255], 
                                                [184/255, 69/255 ,14/255]])

In [None]:
df.plot(x='AGE', y=['RM','NOX','DIS'], marker='o', lineWidth=0, alpha=0.2)

Get a list of all the options

In [None]:
mpl.rcParams.keys() 

Adjustments can also be made in a *matplotlibrc* file
An example of the file can be found in: 

*C:\Users\{user_name}\AppData\Local\Continuum\anaconda3\pkgs\matplotlib-3.1.1-py37hc8f65d3_0\Lib\site-packages\matplotlib\mpl-data\*