Practicing and experimenting with Python and Data Visualization.

In [1]:
# (*) To communicate with Plotly's server, sign in with credentials file
import plotly.plotly as py  

# (*) Useful Python/Plotly tools
import plotly.tools as tls   

# (*) Graph objects to piece together plots
from plotly.graph_objs import *

import numpy as np  # (*) numpy for math functions and arrays

Simple csv reader function adapted to our CSV data files:

In [2]:
# (*) csv file read/write
import csv         

# Define a csv reader function
def get_csv_data(filepath, row_id):
    ''' 
    Read row of csv file, return a numpy array where
    each entry corresp. to a particular month from January to December
    pos. arg (1) filepath: relative path to csv file 
    pos. arg (2) row_id: id of row requested, found in first column (a string)
    '''
    with open(filepath, 'r') as data_file:
        reader = csv.reader(data_file)        # define reader object
        for row in reader:                    # loop through rows in csv file
            if len(row) and row_id in row[0]: # test for empty lines and row id
                # Trim 1st and last 2 entries (not corresp. to months) 
                # and return a numpy array
                return np.array([float(x) for x in row[1:-2]])
            
# (-) The 'with' statement automatically closes  
#    'filepath' at the end of its block of code

### Bar Chart with Error Bars

In [3]:
# Make a list of month names
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Get a 1d-array of daily average temperatures, 
#  1 entry per month using get_csv_data()
T_avg = get_csv_data('montreal.csv', 'Daily Average')

# Similarly, get a 1d-array of daily temperature standard deviations
T_std = get_csv_data('montreal.csv', 'Standard Deviation')

In [9]:
# Make a Bar trace object
trace1 = Bar(
    x=months,  # a list of string as x-coords
    y=T_avg,   # 1d array of numbers as y-coords
    marker=Marker(color='#E3BA22'),  # set bar color (hex color model)
    error_y=ErrorY(
        type='data',     # or 'percent', 'sqrt', 'constant'
        array=T_std,     # values of error bars
        color='#E6842A'  # set error bar color
    )
)

# Make Data object
data = Data([trace1])

title = "<b>Fig 2.1: Montreal 1981-2010</b><br>\
Daily Average Temperature and Standard Deviation"  # plot's title

# Make Layout object
layout = Layout(
    title=title,       # set plot title
    showlegend=False,  # remove legend
    yaxis= YAxis(
        title='Temperature [in deg. C]', # y-axis title
        range=[-15.5,25.5],              # set range
        zeroline=False,                  # remove thick line at y=0
        gridcolor='white'                # set grid color to white
    ),
    paper_bgcolor='rgb(233,233,233)',  # set paper (outside plot) 
    plot_bgcolor='rgb(233,233,233)',   #   and plot color to grey
)


# Make Figure object
fig = Figure(data=data, layout=layout)

# (@) Send to Plotly and show in notebook
py.iplot(fig, filename='s2_avg-temp')

### Stacked Bars

In [10]:
# (0) Get average daily minima and average daily maxima from 'montreal.csv' 
T_min = get_csv_data('montreal.csv', 'Daily Minimum')
T_max = get_csv_data('montreal.csv', 'Daily Maximum')

# Truncate T_* to summer only, convert to list
T_min_s = T_min[5:8]
T_avg_s = T_avg[5:8]   # (!) T_avg was defined in subsection 2.1
T_max_s = T_max[5:8]

In [21]:
summer = ['June', 'July', 'August']  # list of summer month names

# Define a trace-generating function (returns a Bar object)
def make_trace(y, name, color):
    return Bar(
        x=summer,       # (!) x-coords are the summer month names (global variable)
        y=y,            # take in the y-coordinates
        name=name,      # label for legend/hover
        marker=Marker(
            color=color,        # set bar colors
            line=Line(
                color='white',  # set bar border color
                width= 2      # set bar border width
            )
        )
    )

In [22]:
# (1) Make Data object using make_trace()
data = Data([
    make_trace(T_max_s, 'Daily Maximum', '#BD8F22'),
    make_trace(T_avg_s, 'Daily Average', '#E3BA22'),
    make_trace(T_min_s, 'Daily Minimum', '#F2DA57')
])

In [27]:
# Define an annotation-generating function
def make_annotation(x, y):         
    return Annotation(
        text=str(y),     # text is the y-coord
        showarrow=False, # annotation w/o arrows, default is True
        x=x,               # set x position
        xref='x',          # position text horizontally with x-coords
        xanchor='center',  # x position corresp. to center of text
        yref='y',            # set y position 
        yanchor='top',       # position text vertically with y-coords
        y=y,                 # y position corresp. to top of text
        font=Font(
            color='#262626',  # set font color
            size=13           #   and size   
        )
    )

In [28]:
# Make Annotations object (list-like) with make_annotation()
annotations = Annotations(
    [make_annotation(x, y) for x, y in zip(range(3), T_max_s)] +
    [make_annotation(x, y) for x, y in zip(range(3), T_avg_s)] + 
    [make_annotation(x, y) for x, y in zip(range(3), T_min_s)]
)

In [29]:
title = 'Fig 2.2: Montreal 1981-2010 \
temperature climatology [in deg. C]<br>'  # plot's title  

# (2) Make Layout object
layout = Layout(
    barmode='overlay',  # (!) bars are overlaid on this plot
    title=title,        # set plot title
    yaxis=YAxis(
        zeroline=False,          # no thick y=0 line
        showgrid=False,          # no horizontal grid lines
        showticklabels=False     # no y-axis tick labels
    ),  
    legend=Legend(
        x=0,     # set legend x position in norm. plotting area coord.  
        y=1,     # set legend y postion in " " " "
        yanchor='middle'   # y position corresp. to middle of text
    ),   
    annotations=annotations # link the Annotations object
)

In [30]:
# (3) Make Figure object
fig = Figure(data=data, layout=layout)

# (@) Send to Plotly and show in notebook
py.iplot(fig, filename='s2_avg-min-max-temp')

### Horizontal Stacked Bar Chart

In [36]:
mtl_P = get_csv_data('montreal.csv', 'Precipitation (mm)')
van_P = get_csv_data('vancouver.csv', 'Precipitation (mm)')


In [32]:
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Define colors for each set of 3 months starting in January
colors = ['#42A5B3']*3 + ['#D15A86']*3 + ['#5C8100']*3 + ['#E58429']*3

# City names
cities = ['Montreal', 'Vancouver']

In [46]:
# Define a trace-generating function (returns a Bar object)
def make_trace(x, name, color):
    return Bar(
        y=cities,      # cities name on the y-axis
        x=x,           # monthly total on x-axis
        name=name,  # label for hover 
        orientation='h',     # default is 'v'
        marker=Marker(
            color=color,
            line=Line(
                color='white',
                width=1
            )
        )
    )

In [47]:
# (1) Build a data object containing 12 Bar objects (1 for each month)
data = Data([
    make_trace([mtl_P[i], van_P[i]], months[i], colors[i]) 
    for i in range(12)
])

In [52]:
title = "Fig 2.3: Montreal and Vancouver 1981-2010 \
Monthly Average Precipitation<br>\
<i>hover with cursor to see each month's total</i>"  # plot's title 

# (2) Make Layout object
layout = Layout(
    barmode='stack',  # (!) bars are stacked on this plot
    bargap=0.6,       # (!) spacing (norm. w.r.t axis) between bars
    title=title,        # set plot title
    showlegend=False,   # remove legend
    hovermode='closest', 
    xaxis= XAxis(
        title='Precipitation [in mm of rain]', # x-axis title 
        gridcolor='white',  # white grid lines
        gridwidth=2,        # bigger grid lines
        zeroline=False,     # remove thick zero line
        ticks='outside',    # draw ticks outside axes
        autotick=False,     # (!) overwrite default tick options
        dtick=100,          # (!) set distance between ticks  
        ticklen=8,          # (!) set tick length
        tickwidth=1.5       #     and width
    ),     
    plot_bgcolor='rgb(233,233,233)',  # set plot color to grey
)     

In [53]:
# (3) Make Figure oubject
fig = Figure(data=data, layout=layout)

# Send to Plotly and show in notebook
py.iplot(fig, filename='s2_mtl-van-precip')

### A Publication-Worthy Figure with Subplots

In [54]:
# Subplots will share an a-axis (month)
# Generate Figure object with 2 axes on 2 rows, print axis grid to stdout
fig = tls.make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True
)

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x1,y2 ]



In [55]:
# Get Data
mtl_t_avg = get_csv_data('montreal.csv', 'Daily Average')
van_t_avg = get_csv_data('vancouver.csv', 'Daily Average')

col_mtl = '#0F8C79'
col_van = '#BD2D28'

# The degree Celsius symbol in unicode
degC = u"\u2103"

In [57]:
# Define a trace-generating function (returns a Bar object)
def make_trace(y, color, name, sbplt):
    return Bar(
        x=months,
        y=y,
        name=name,
        marker=Marker(color=color),
        xaxis='x1',    # both subplots on same x-axis
        yaxis='y{}'.format(sbplt)    # plot on y-axis of 'sbplt'
    )

In [59]:
# Build the data object
# Assign temp traces to top axis
traces_T = [make_trace(mtl_t_avg, col_mtl, 'Montreal', 2),
           make_trace(van_t_avg, col_van, 'Vancouver', 2)]

# Assign precips to bottom axis
traces_P = [make_trace(mtl_P, col_mtl, 'Montreal', 1),
           make_trace(van_P, col_van, 'Vancouver', 1)]

# Concat the 4 traces and set the 'data' key in the Figure object
fig['data'] = Data(traces_T + traces_P)

In [60]:
# Layout Options
# Add the bar chart options to the 'layout' key in the Figure object
fig['layout'].update(
    barmode='group',   # bars are in groups on this plot
    bargroupgap=0,     # norm. spacing between group members
    bargap=0.25        # norm. spacing between groups
)

# Add frame options to the 'layout' key
fig['layout'].update(
    showlegend=False,
    autosize=False,
    height=500,
    width=650,
    margin=Margin(
        t=100,
        b=100,
        r=25,
        l=70
    ),
    plot_bgcolor='#EFECEA',
    paper_bgcolor='#EFECEA'
)

# Global font to 'layout'
fig['layout'].update(
    font=Font(
        family="Droid Serif, serif",
        color='#635F5D'
    )
)

In [61]:
# Axis Formatting
def update_axis(title, tickangle):
    return dict(
        title=title,
        tickfont=dict(size=13),
        tickangle=tickangle,
        gridcolor='#FFFFFF',
        zeroline=False
    )

fig['layout']['xaxis1'].update(
    update_axis('', 45)
)

fig['layout']['yaxis2'].update(
    update_axis('<b>Temperature</b> [' + degC + ']', 0)
)

fig['layout']['yaxis1'].update(
    update_axis('<b>Precipitation</b> [mm]', 0)
)

In [63]:
# Annotations
def make_anno1(text, fontsize, x, y):
    return Annotation(
        text=text,
        xref='paper',
        yref='paper',
        x=x,
        y=y,
        font=Font(size=fontsize),
        showarrow=False,
        bgcolor='#F5F3F2',
        bordercolor='#FFFFFF',
        borderwidth=1,
        borderpad=fontsize
    )

annotations = Annotations([
    make_anno1('Fig. 2.4a: <b>Climate of 1981-2010</b>', 20, 0, 1.3),
    make_anno1('<b>Source:</b> Environment Canada', 10, 0, -0.3)
])

def make_anno2(text, fontcolor, x):
    return Annotation(
        text=text,
        xref='paper',
        yref='paper',
        x=x,
        y=1.2,
        xanchor='right',
        font=Font(
            size=12,
            color=fontcolor
        ),
        showarrow=False,
        bgcolor='#F5F3F2',
        borderpad=10
    )

annotations += [
    make_anno2('<b>Montreal</b>', col_mtl, 0.8),
    make_anno2('<b>Vancouver</b>', col_van, 1)
]

fig['layout'].update(
    annotations=annotations
)

In [64]:
# Add invisible 'title' to be placed in the plot URL
fig['layout'].update(
    title="montreal=vancouver-1981-2010-climates",
    titlefont=Font(color='rgba(0,0,0,0)')
)

In [65]:
# (@) Send to Plotly and show in notebook
py.iplot(fig, filename='s2_mtl-van')

### Some Add-On Features

In [66]:
# Define a 3rd annotation-generating function (for arrow annotations)
def make_anno3(text, yref, x, y , ax, ay):
    return Annotation(
        text=text,  # annotation text
        xref='x1',    # (!) use axis coordinates
        yref=yref,    #     for both x and y coords
        x=x,  # (!) x position of the arrow
        y=y,  # (!) y position of the arrow
        showarrow=True,       # show arrow (optional here) 
        ax=ax,        # (!) x text/arrow offset (<0: left, >0: right, in pixels)
        ay=ay,        # (!) y text/arrow offset (<0: below, >0: above, in pixels)
        arrowhead=7,            # arrow head style
        arrowsize=1.5,          # arrow head size (in pixels)
        arrowcolor='#C0C0BB',   # grey arrow color
        bgcolor='#FFFFFF',      # white background
        bordercolor='#C0C0BB',  # grey border color
        borderwidth=1,          # set border width (in pixels)
        borderpad=4             # set border/text space (in pixels)
    )

In [69]:
# (4.1a) Find index and value of coldest month
tmp = np.vstack((mtl_t_avg, van_t_avg))
i_cold, cold = np.unravel_index(tmp.argmin(), tmp.shape)[1], tmp.min()

# (4.1b) Find index and value of wettest month
tmp = np.vstack((mtl_P, van_P))
i_wet, wet = np.unravel_index(tmp.argmax(), tmp.shape)[1], tmp.max()

# (-) np.vstack((x,y)) stacks x and y as column of a new array
#     np.unravel_index(out, tmp.shape) converts a 1d index from out to 2d 
#     np.unravel_index(tmp.argmin(), tmp.shape)[1] outputs the months index

# (4.1c) Append annotation list with 2 on-plot annotations
fig['layout']['annotations'] += Annotations([
    make_anno3('<em>Coldest month</em><br>{}'.format(cold) + degC,
               'y2', i_cold, cold, 100, 15),
    make_anno3('<em>Wettest month</em><br>{}'.format(wet) + ' mm',
               'y1', i_wet, wet, -100, 15)
])

In [70]:
# (4.2) Update title annotation (the 1st in Annotations)
fig['layout']['annotations'][0].update(text='Fig. 2.4b: <b>Climate of 1981-2010</b>')

In [71]:
# (@) Send to Plotly and show in notebook
py.iplot(fig, filename='s2_mtl-van-arrows')