In [0]:
# Basic imports

import pandas as pd
import numpy as np
import scipy.signal as sp
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

plt.style.use('ggplot')
sns.set_style('white')

In [0]:
# Necessary imports for connecting with Google Drive
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Create connection with Google Drive & authenticate
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
# Load data into environment
days_data = drive.CreateFile({'id':'17gACLzcbGFy4ystQ96eRFI1eHycrXWp0'})
days_data.GetContentFile('data_days')

eighteen_data = drive.CreateFile({'id':'1wcb5ZyF94r464_ulr01bdzB1NXyj2cCl'})
eighteen_data.GetContentFile('data_18th')

twentyeight_data = drive.CreateFile({'id':'1DQXl2uwHvDjEU0G5-yaSOi-lWz9l5aUc'})
twentyeight_data.GetContentFile('data_28th')

In [0]:
# DAY OF WEEK: loads and cleans the days_df for visualising the different travel patterns for each day of week

# Read csv as pandas dataframe
days_df = pd.read_csv('data_days')

# Replace SQL's day of week number (0 is Sunday, 6 is Saturday) with the actual day name
days_dict = {
    0:'Sunday',
    1:'Monday',
    2:'Tuesday',
    3:'Wednesday',
    4:'Thursday',
    5:'Friday',
    6:'Saturday'
}
days_df.replace({'start_day':days_dict}, inplace=True)

# Create datetime because it produces nice plots
# Note: the date itself is unimportant and added in for ease
days_df['time'] = days_df.apply(lambda row: pd.Timestamp('2018-01-01 {}:{}'.format(row.start_hour, row.start_minute)), axis=1)

# Smooth the minute-by-minute data to cut noise
days_df['minutely_count'] = sp.savgol_filter(days_df.twentyeight_count, 41, 3)

# Make the data easier to plot
days_df = days_df.pivot(index='time', columns='start_day', values='minutely_count')[['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']]

In [0]:
# MATCH DAYS: loads and cleans the days_df for visualising the different travel patterns for each day of week

# Read csv as pandas dataframe
eighteen_df = pd.read_csv('data_18th')
twentyeight_df = pd.read_csv('data_28th')

# Set parameters for the savgol_filter smoothing
windows = 11
extra = 120
polynomial = 3

# Create datetime and set as index because it produces nice plots
# Note: the date itself is unimportant and added in for ease
eighteen_df['time'] = eighteen_df.apply(lambda row: pd.Timestamp('2018-01-01 {}:{}'.format(row.start_hour, row.start_minute)), axis=1)
eighteen_df.set_index('time', inplace=True)
twentyeight_df['time'] = twentyeight_df.apply(lambda row: pd.Timestamp('2018-01-01 {}:{}'.format(row.start_hour, row.start_minute)), axis=1)
twentyeight_df.set_index('time', inplace=True)

# Smooth the minute-by-minute data to cut noise
eighteen_df['match_day'] = sp.savgol_filter(eighteen_df.eighteen_count, windows, polynomial)
twentyeight_df['match_day'] = sp.savgol_filter(twentyeight_df.twentyeight_count, windows, polynomial)

# Smooth the minute-by-minute data to cut noise
# (apply more smoothing here because it makes the visual comparison easier)
eighteen_df['average_day'] = sp.savgol_filter(eighteen_df[['twentyfive_count','eleven_count']].mean(axis=1), windows+extra, polynomial)
twentyeight_df['average_day'] = sp.savgol_filter(twentyeight_df[['twentyone_count','fourteen_count']].mean(axis=1), windows+extra, polynomial)

In [0]:
# DAY OF WEEK: plot shows different travel patterns for each day of week

# Note: the date itself is unimportant and added in for ease
days_df.loc['2018-01-01 01:10:00':'2018-01-01 22:50:00'].plot(figsize = (10,5),
                                                              fontsize=15,
                                                              linewidth = 2,
                                                              legend=False)

# Add legend, axis labels, title
plt.legend(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],fontsize=15)
plt.xlabel('Time', fontsize=15)
plt.ylabel('Number of London commuters \n starting their journey home', fontsize=15)
plt.title('Home commutes on an average June day \n separated by day of week', fontsize=18)

# Add text & arrow annotation
plt.annotate('Different days have \n different travel patterns', xytext=(0.5,0.7), xy=(0.74,0.64), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=10, xycoords='figure fraction', fontname='Helvetica')

In [0]:
# TUNISIA: plot match day vs average day for the Tunisia game

# Plot just between relevant hours of 18:00 and 22:00
# Note: the date itself is unimportant and added in for ease
eighteen_df[['match_day', 'average_day']].loc['2018-01-01 18:00':'2018-01-01 22:00'].plot(figsize = (10,5), 
                                                                                          fontsize=15,
                                                                                          linewidth = 3,
                                                                                          legend=False)

# Add legend, axis labels, title
plt.legend(['Tunisia Match', 'Average Monday in June'],fontsize=15)
plt.xlabel('Time', fontsize=15)
plt.ylabel('Number of London commuters \n starting their journey home', fontsize=15)
plt.title('Home commutes on the day of the England-Tunisa match', fontsize=18)

# Add vertical lines to signify match start and end
plt.axvline('2018-01-01 19:00', alpha=0.3, linestyle='dashed')
plt.axvline('2018-01-01 20:45', alpha=0.3, linestyle='dashed')

# Add text & arrow annotations
plt.annotate('It begins!', xytext=(0.24,0.2), xy=(0.355,0.125), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=13, xycoords='figure fraction', fontname='Helvetica')
plt.annotate('Final whistle', xytext=(0.5,0.17), xy=(0.695,0.125), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=13, xycoords='figure fraction', fontname='Helvetica')
plt.annotate('Supporters stay near work', xytext=(0.3,0.75), xy=(0.25,0.78), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=10, xycoords='figure fraction', fontname='Helvetica')
plt.annotate('Supporters flood the streets', xytext=(0.72,0.4), xy=(0.795,0.33), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=10, xycoords='figure fraction', fontname='Helvetica')

In [0]:
# BELGIUM: plot match day vs average day for the Belgium game
# Same code as for Tunisia above - couldn't be bothered to write a function for just two calls

# Plot just between relevant hours of 18:00 and 22:00
# Note: the date itself is unimportant and added in for ease
twentyeight_df[['match_day', 'average_day']].loc['2018-01-01 18:00':'2018-01-01 22:00'].plot(figsize = (10,5), 
                                                                                          fontsize=15,
                                                                                          linewidth = 3,
                                                                                          legend=False)

# Add legend, axis labels, title
plt.legend(['Belgium Match', 'Average Thursday in June'],fontsize=15)
plt.xlabel('Time', fontsize=15)
plt.ylabel('Number of London commuters \n starting their journey home', fontsize=15)
plt.title('Home commutes on the day of the England-Belgium match', fontsize=18)

# Add vertical lines to signify match start and end
plt.axvline('2018-01-01 19:00', alpha=0.3, linestyle='dashed')
plt.axvline('2018-01-01 20:45', alpha=0.3, linestyle='dashed')

# Add text & arrow annotations
plt.annotate('Kick-off', xytext=(0.26,0.2), xy=(0.36,0.125), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=13, xycoords='figure fraction', fontname='Helvetica')
plt.annotate('Final whistle', xytext=(0.5,0.15), xy=(0.71,0.125), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=13, xycoords='figure fraction', fontname='Helvetica')
plt.annotate('More supporters this time \n who congregate earlier', xytext=(0.27,0.8), xy=(0.21,0.83), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=10, xycoords='figure fraction', fontname='Helvetica')
plt.annotate('Gap is bigger than the Tunisia \n game #cominghome', xytext=(0.43,0.47), xy=(0.5,0.28), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=10, xycoords='figure fraction', fontname='Helvetica')
plt.annotate('More supporters flood \n more streets', xytext=(0.72,0.48), xy=(0.795,0.41), 
             arrowprops=dict(facecolor='black', shrink=0.05), 
             fontsize=10, xycoords='figure fraction', fontname='Helvetica')