# Setup

In [1]:
import pandas as pd
import numpy as np
import random
from os import path
DATA_DIR = path.join('..', 'data')
NP_TYPE = np.double
np.seterr(all='raise')

{'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'}

# Initial state

In [2]:
def check_state(S, I, R, N):
    assert (N == S + I + R).all() # Checks both value and shapes
    assert (S >= 0).all()
    assert (R >= 0).all()
    assert (I >= 0).all()
    assert np.isclose(N.sum(), POP_SIZE)

In [3]:
boroughs = pd.read_csv(path.join(DATA_DIR, 'borough_pop.csv'))
stations = pd.read_csv(path.join(DATA_DIR, 'station_borough.csv'))
POP_SIZE = boroughs['Population'].sum()
POP_SIZE

7947055

In [4]:
borough_count = stations['Local authority'].value_counts().to_frame()
borough_count.columns = ['Station count']
boroughs_pop_count = boroughs.merge(borough_count, left_on='Local authority',
                                    right_index=True, validate='one_to_one')
boroughs_pop_count['Station population'] = boroughs_pop_count['Population'] / boroughs_pop_count['Station count']
boroughs_pop_count.head()

Unnamed: 0,Local authority,Population,Station count,Station population
0,Havering,234127,6,39021.166667
1,Wandsworth,299347,8,37418.375
2,Harrow,233495,12,19457.916667
3,Croydon,352763,7,50394.714286
4,Lewisham,270418,8,33802.25


In [5]:
stations_pop = stations.merge(boroughs_pop_count).sort_values('Station')
N = stations_pop['Station population'].values

In [6]:
def random_array(total_sum, array_size):
    ret = np.zeros(array_size, NP_TYPE)
    for _ in range(total_sum):
        ret[random.randrange(0, array_size)] += 1
    return ret
        
INITIAL_INFECTED = 100
I = random_array(INITIAL_INFECTED, len(N))
S = N - I
R = np.zeros(len(N), NP_TYPE)
state = (S, I, R, N)
check_state(*state)

# Transition matrices

## Load data

In [7]:
DAY_LOOKUP = {
    'Mon': 0,
    'Tue': 1,
    'Wed': 2,
    'Thu': 3,
    'Fri': 4,
    'Sat': 5,
    'Sun': 6,
}

In [8]:
move_data = pd.read_csv(path.join(DATA_DIR, 'journey_count.csv'))
move_data['Day'].replace(DAY_LOOKUP, inplace=True)
move_data.columns = ['Start', 'End', 'Day', 'Hour', 'Journeys']
move_data.loc[move_data['Hour'] > 23, 'Day'] += 1
move_data.loc[move_data['Hour'] > 23, 'Hour'] -= 24
move_data.head()

Unnamed: 0,Start,End,Day,Hour,Journeys
0,Acton Central,Acton Central,5,18,1
1,Acton Central,Acton Central,6,7,1
2,Acton Central,Acton Central,3,13,1
3,Acton Central,Acton Central,2,17,1
4,Acton Central,Baker Street,0,7,1


In [9]:
STATION_LOOKUP = {
    name: i for i, name in enumerate(move_data['Start'].unique())
}

## Create matrices

In [10]:
def calc_hour(day, hour):
    return day * 24 + hour

max_day = move_data['Day'].max()
max_day_max_hour = move_data[move_data['Day'] == max_day]['Hour'].max()
hourly_F = [
    np.zeros((len(STATION_LOOKUP), len(STATION_LOOKUP)))
    for _ in range(calc_hour(max_day, max_day_max_hour) + 1)
]

STATION_POP = {
    row['Station']: row['Station population'] for _, row in stations_pop.iterrows()
}

for row in move_data.itertuples():
    start = STATION_LOOKUP[row.Start]
    end = STATION_LOOKUP[row.End]
    hourly_F[calc_hour(row.Day, row.Hour)][start][end] = row.Journeys / STATION_POP[row.Start]

In [11]:
def check_F(F):
    assert F.shape == (len(N), len(N))
    assert (F.sum(axis=1) < 1).all()
for F in hourly_F:
    check_F(F)

# Constants

In [12]:
BETA = NP_TYPE(0.5 / 24)
GAMMA = NP_TYPE((1/3) / 24)
assert np.isclose(BETA / GAMMA, 1.5)

# Main

In [13]:
def update_state(F, S, I, R, N):
    S_I_interaction = BETA * S * I * 1/N
    Fdash = F.sum(axis=1)
    Snew = -S_I_interaction + F.T.dot(S) - Fdash * S + S
    Inew = S_I_interaction + F.T.dot(I) - Fdash * I + (1-GAMMA) * I
    Rnew = GAMMA * I + F.T.dot(R) - Fdash * R + R
    Nnew = Snew + Inew + Rnew
    return (Snew, Inew, Rnew, Nnew)

In [14]:
def run_simulation(timesteps):
    state = (S, I, R, N)
    for t in range(timesteps):
        F = hourly_F[t % len(hourly_F)]
        new_state = update_state(F, *state)
        check_state(*new_state)
        state = new_state
run_simulation(1000)

In [15]:
F = hourly_F[29]
S_I_interaction = BETA * S * I * 1/N
Fdash = F.sum(axis=1)

In [17]:
%timeit F.dot(S)

58.8 µs ± 14.8 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [21]:
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])

In [22]:
np.concatenate((a, b, a, b),axis=1)

array([[1, 2, 5, 6, 1, 2, 5, 6],
       [3, 4, 7, 8, 3, 4, 7, 8]])

In [23]:
def matrix_concatenate(a, b):
    assert a.shape == b.shape
    assert len(a.shape) == 2
    assert a.shape[0] == a.shape[1]
    a2 = np.concatenate((a, np.zeros(a.shape)), axis=1)
    b2 = np.concatenate((np.zeros(a.shape), b), axis=1)
    return np.concatenate((a2, b2), axis=0)

matrix_concatenate(a, b)

array([[1., 2., 0., 0.],
       [3., 4., 0., 0.],
       [0., 0., 5., 6.],
       [0., 0., 7., 8.]])

In [24]:
Snew = F.dot(S)
Inew = F.dot(I)
SInew1 = np.concatenate((Snew, Inew))

Fbig = matrix_concatenate(F, F)
SInew2 = Fbig.dot(np.concatenate((S, I)))

assert (SInew1 == SInew2).all()

In [25]:
%timeit Fbig.dot(np.concatenate((S, I)))

640 µs ± 154 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [26]:
%timeit F.dot(S); F.dot(I)

93.5 µs ± 12.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
