In [1]:
import pandas as pd 
import numpy as np
from datetime import datetime

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

# Mobility Data

In [21]:
mobility = pd.read_csv('data/COVID-19-transport-use-statistics.csv')

# mobility plot 
fig = px.line(mobility, x=mobility.Date, y=mobility.drop(['Date'], axis=1).columns,
              title='Mobility during covid')

# add baseline
fig.add_shape(type="line",
        x0=mobility.index.min(),
        y0=1,
        x1=mobility.index.max(),
        y1=1,
    name = 'baseline',
    line=dict(color='Red'))

fig.update_layout(margin={"r":0,"t":50,"l":0,"b":30})
fig.show()

In [22]:
# convert strings to datetime
for i in range(len(mobility['Date'])):
    mobility['Date'][i] = datetime.strptime(mobility['Date'][i], '%d/%m/%Y')



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [77]:
def mobility_mean(start_date, end_date): 
    '''Average mobility of cars and total vehicles during specified dates'''
    
    cars_covid  = mobility[(mobility['Date'] > start_date) & (mobility['Date'] < end_date) ]['Cars']
    lcv_covid = mobility[(mobility['Date'] > start_date) & (mobility['Date'] < end_date)]['Light Commercial Vehicles']
    hgv_covid = mobility[(mobility['Date'] > start_date) & (mobility['Date'] < end_date)]['Heavy Goods Vehicles']
    others_covid = mobility[(mobility['Date'] > start_date) & (mobility['Date'] < end_date)]['All motor vehicles']
    sum_vehicles = (cars_covid+lcv_covid+hgv_covid+others_covid)/4

    return(np.mean(cars_covid)*100, np.mean(sum_vehicles)*100)
    
#-------------------------------    
car_mean, all_vehicles_mean = mobility_mean(datetime(2020,4,1), datetime(2020,4,3))
print('Cars mobility mean during April 2020: ', car_mean)
print('All vehicles mean during April 2020: ', all_vehicles_mean)

Cars mobility mean during April 2020:  32.0
All vehicles mean during April 2020:  41.25


In [78]:
def mobibiliy_change(start_date, end_date):
    '''Percentage change of mobility between given dates in comparison to baseline'''
    
    cars_covid  = mobility[(mobility['Date'] > start_date) & (mobility['Date'] < end_date) ]['Cars']
    lcv_covid = mobility[(mobility['Date'] > start_date) & (mobility['Date'] < end_date)]['Light Commercial Vehicles']
    hgv_covid = mobility[(mobility['Date'] > start_date) & (mobility['Date'] < end_date)]['Heavy Goods Vehicles']
    others_covid = mobility[(mobility['Date'] > start_date) & (mobility['Date'] < end_date)]['All motor vehicles']
    sum_vehicles = (cars_covid+lcv_covid+hgv_covid+others_covid)/4

    change_cars = ((np.mean(cars_covid)-1)/1)*100
    change_all = ((np.mean(sum_vehicles)-1)/1)*100
    return(change_cars , change_all)
    
#------------------------------- 
change_cars, change_all = mobibiliy_change(datetime(2020,4,1), datetime(2020,4,30))
print('Car mobility decrese in April 2020:' , change_cars)
print('All vehicles mobility decrese in April 2020:' , change_all)

Car mobility decrese in April 2020: -66.92857142857143
All vehicles mobility decrese in April 2020: -57.169642857142854


# Carbon Emission

https://carbonmonitor.org  
data filtered for UK, for Ground Transport sector from 1st January 2019 to 31st December 2020

In [85]:
co2 = pd.read_csv('data/carbon-monitor-UK.csv')

# plot emissions over time
fig = px.line(co2, x=co2.date, y=co2.drop(['date', 'sector'], axis=1).columns,
              title='Carbon emission during covid')
fig.update_layout(margin={"r":0,"t":50,"l":0,"b":30})
fig.show()

In [26]:
# convert strings to datetime
for i in range(len(co2['date'])):
    co2['date'][i] = datetime.strptime(co2['date'][i], '%d/%m/%Y')



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [106]:
# average pre covid mean
old_co2 = np.mean(co2[co2['date'] < datetime(2020, 3, 1)]['MtCO2 per day'])
print("Daily CO2 emission mean pre-covid: {} MtCO2".format(old_co2))

Daily CO2 emission mean pre-covid: 0.31364421374117674 MtCO2


In [93]:
def carbon_change(start_date, end_date):
    # average pre covid
    old_co2 = np.mean(co2[co2['date'] < datetime(2020, 3, 1)]['MtCO2 per day'])
    #average over given time
    new_co2 = np.mean(co2[(co2['date'] > start_date) & (co2['date'] < end_date)]['MtCO2 per day'])

    percentage_change = ((new_co2-old_co2)/old_co2)*100

    return new_co2, percentage_change

# -----------

mean_april, change_april =  carbon_change(datetime(2020, 4, 1), datetime(2020, 4, 30))
print('Carbon mean in April 2020:' , mean_april)
print('Carbon change in April 2020:' , change_april)

Carbon mean in April 2020: 0.1827151192857143
Carbon change in April 2020: -41.74446354158054


# Homeworking
https://www.ons.gov.uk/peoplepopulationandcommunity/healthandsocialcare/conditionsanddiseases/bulletins/coronavirustheukeconomyandsocietyfasterindicators/1october2020#measuring-the-data

In [94]:
homeworking_ons = pd.read_csv('data/homework_ons.csv')

fig = px.line(homeworking_ons, x=homeworking_ons.Date, y=homeworking_ons.drop(['Date'], axis=1).columns,
              title='Homeworking')
fig.update_layout(margin={"r":0,"t":50,"l":0,"b":30})
fig.show()

In [95]:
# get dates from homeworking ONS dataset
dates = []
for i in homeworking_ons['Date']:
    split = i.strip().split(' ')
    if split[-1] == 'May':
        dates.append( (datetime(2020,5,int(split[0])), datetime(2020,5,int(split[2]))) )
    if split[-1] == 'June':
        dates.append( (datetime(2020,6,int(split[0])), datetime(2020,6,int(split[2]))) )
    if split[-1] == 'July':
        dates.append( (datetime(2020,7,int(split[0])), datetime(2020,7,int(split[2]))) )
    if split[-1] == 'August':
        dates.append( (datetime(2020,8,int(split[0])), datetime(2020,8,int(split[2]))) )
    if split[-1] == 'September':
        dates.append( (datetime(2020,9,int(split[0])), datetime(2020,9,int(split[2]))) )
        
dates[11] = (datetime(2020, 7, 29, 0, 0), datetime(2020, 8, 2, 0, 0))

# Homeworking baseline
https://data.europa.eu/euodp/en/data/dataset/orJJzGDF3cnXimvsoKDfXw

In [37]:
# clean the file as its messy

homeworking1 = pd.read_csv('data/homeworking.tsv')
name = 'geo\\time\t2019 \t2018 \t2017 \t2016 \t2015 \t2014 \t2013 \t2012 \t2011 \t2010 \t2009 \t2008 \t2007 \t2006 \t2005 \t2004 \t2003 \t2002 \t2001 \t2000 \t1999 \t1998 \t1997 \t1996 \t1995 \t1994 \t1993 \t1992 '

# get geo location
geo = []
for i in range(len(homeworking1[name])):
    geo.append(homeworking1[name][i][0:2])

homeworking2 = pd.read_csv('data/homeworking.tsv', delimiter='\t')

# first 5 columns from here
homeworking = homeworking1.iloc[:, :5]
# add geo location
homeworking['geo'] = geo
# add year columns
homeworking = pd.concat([homeworking, homeworking2.iloc[:,1:]], axis=1, join="inner")

# filter data for for employed in UK
employed_homeworking = homeworking[(homeworking['geo']=='UK') & (homeworking['wstatus']=='EMP') & (homeworking['age'] =='Y20-64')]
employed_homeworking.head()

Unnamed: 0,unit,sex,frequenc,age,wstatus,geo,2019,2018,2017,2016,...,2001,2000,1999,1998,1997,1996,1995,1994,1993,1992
2382,PC,F,NVR,Y20-64,EMP,UK,73.0,75.4,75.6,75.8,...,75.6,75.6,76.1 b,75.5,72.1,72.9,73.7,73.8,73.2,74.5
10227,PC,F,SMT,Y20-64,EMP,UK,21.5,19.3,19.3,19.1,...,20.6,20.5,20.1 b,20.4,23.8,22.9,22.2,21.8,22.3,18.5
18016,PC,F,USU,Y20-64,EMP,UK,5.5,5.2,5.1,5.1,...,3.8,3.9,3.8 b,4.1,4.0,4.2,4.1,4.4,4.4,7.0
25851,PC,M,NVR,Y20-64,EMP,UK,72.8,75.7,75.3,75.6,...,71.5,71.5,71.2 b,71.2,66.8,67.8,68.1,68.1,67.7,66.7
33698,PC,M,SMT,Y20-64,EMP,UK,23.1,20.5,21.4,20.9,...,27.0,27.0,27.6 b,27.4,31.9,30.8,30.5,30.4,30.9,24.5


In [88]:
# get baseline 
homeworking_year_stat = []
for i in np.array(employed_homeworking)[0, 6::]:
    homeworking_year_stat.append(float(i[0:4]))
       
homeorking_mean_old =  np.mean(homeworking_years)
print('UK average of people going to work is {} %'.format(homeorking_mean_old))

UK average of people going to work is 75.15357142857144 %


In [45]:
# get change over years
homeworking_change = []
for i in homeworking_ons['Travelled to work']:
    change = ((i-homeorking_mean_old)/homeorking_mean_old)*100
    homeworking_change.append(change) 

# Summary

In [99]:
car_mobility_means = []
all_mobility_means = []
car_mobility_changes = []
all_mobility_changes = []
co2_change = []
co2_means = []


for date in dates:
    car_mean, all_vehicles_mean = mobility_mean(date[0], date[1])
    change_cars, change_all =  mobibiliy_change(date[0], date[1])
    mean_co2, change_co2 = carbon_change(date[0], date[1])
    
    car_mobility_means.append(car_mean)
    all_mobility_means.append(all_vehicles_mean)
    car_mobility_changes.append(change_cars)
    all_mobility_changes.append(change_all)
    co2_change.append(change_co2)
    co2_means.append(mean_co2)
    
# combine above into dataframe    
statistics_df = pd.DataFrame({'period':homeworking_ons['Date'],
                             'car_mobility_mean': car_mobility_means,
                             'all_vehicles_mobility_mean':all_mobility_means,
                             'car_mobility_baseline_change':car_mobility_changes,
                             'all_mobility_baseline_change':all_mobility_changes,
                             'co2_change': co2_change,
                             'co2 means':co2_means, 
                             'travelled to work':(homeworking_ons['Travelled to work']),
                             'travelled to work change':homeworking_change})

In [98]:
# plot mobility and emisison over time

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=statistics_df.period,
    y=statistics_df['car_mobility_mean'],
    mode="lines+markers+text",
    name="Car mobility",
    text=round(statistics_df['car_mobility_mean'], 2),
    textposition="bottom center"
))

fig.add_trace(go.Scatter(
    x=statistics_df.period,
    y=statistics_df['travelled to work'],
    mode="lines+markers+text",
    name="Travelled to work",
    text=round(statistics_df['travelled to work'], 2),
    textposition="bottom center"
))

fig.add_trace(go.Scatter(
    x=statistics_df.period,
    y=statistics_df['co2 means'],
    mode="lines+markers+text",
    name="CO2 emission",
    text=round(statistics_df['co2 means'], 2),
    textposition="bottom center"
))


fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [101]:
# plot change in statistics over time

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=statistics_df.period,
    y=statistics_df['co2_change'],
    mode="lines+markers+text",
    name="CO2 emission (change)",
    text=round(statistics_df['co2_change'], 2),
    textposition="top center"
))

fig.add_trace(go.Scatter(
    x=statistics_df.period,
    y=statistics_df['car_mobility_baseline_change'],
    mode="lines+markers+text",
    name="Car mobility (change)",
    text=round(statistics_df['car_mobility_baseline_change'], 2),
    textposition="top center"
))

fig.add_trace(go.Scatter(
    x=statistics_df.period,
    y=statistics_df['travelled to work change'],
    mode="lines+markers+text",
    name="Travelled to work (change)",
    text=round(statistics_df['travelled to work change'], 2),
    textposition="bottom center"
))


fig.update_yaxes(title="percentage %")

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

# July (50/50 scheme) statistics

In [102]:
def print_monthly_stats(month_name):
    '''Month name from a list [May, June, July, August, September]'''
    
    if month_name=='May':
        for i in statistics_df.columns[1::]:
            print(i, "----", np.mean(statistics_df[i][:3]))
            
    if month_name=='June':
        for i in statistics_df.columns[1::]:
            print(i, "----", np.mean(statistics_df[i][3:7]))
    
    if month_name=='July':
        for i in statistics_df.columns[1::]:
            print(i, "----", np.mean(statistics_df[i][7:11]))
            
    if month_name=='August':
        for i in statistics_df.columns[1::]:
            print(i, "----", np.mean(statistics_df[i][11:15]))
    
    if month_name=='September':
        for i in statistics_df.columns[1::]:
            print(i, "----", np.mean(statistics_df[i][-3::]))

In [103]:
print_monthly_stats('July')

car_mobility_mean ---- 83.45833333333333
all_vehicles_mobility_mean ---- 90.84375
car_mobility_baseline_change ---- -16.54166666666667
all_mobility_baseline_change ---- -9.156249999999998
co2_change ---- -4.8403557787417135
co2 means ---- 0.2984627179166667
travelled to work ---- 51.25
travelled to work change ---- -31.80630138288268
