## Preliminaries

### Paths

In [1]:
import os
import pathlib
import sys

In [2]:
if not 'google.colab' in str(get_ipython()):
    
    notebooks = os.getcwd()
    parent = str(pathlib.Path(notebooks).parent)
    sys.path.append(parent)

<br>

Warehouse

```python
warehouse = os.path.join(parent, 'warehouse')
if not os.path.exists(warehouse):
    os.makedirs(warehouse)
```

<br>
<br>

### Libraries

In [3]:
%matplotlib inline

import logging
import collections

import numpy as np
import pandas as pd
import xlrd

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns


<br>
<br>

## Exploration

In [4]:
age_groups = ['0-4', '5-9', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44',
              '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80-84', '85-89', '90+']

In [5]:
year = '2011'

<br>

### Data

#### Patients

In [6]:
uri = '../warehouse/patients/{year}.csv'.format(year=year)

try:
    patients = pd.read_csv(filepath_or_buffer=uri, header=0, encoding='utf-8')
except RuntimeError as err:
    raise Exception(err)

<br>

Fraction of patients from MSOA to trust

In [7]:
patients.loc[:, 'fp_from_msoa_to_trust'] = np.true_divide(patients.patients_from_msoa_to_trust, patients.total_patients_of_msoa)

In [8]:
patients.head()

Unnamed: 0,catchment_year,msoa,trust_code,patients_from_msoa_to_trust,total_patients_of_msoa,fp_from_msoa_to_trust
0,2011,E02001214,R0A,1981,3418,0.579579
1,2011,E02001240,R0A,1181,2944,0.401155
2,2011,E02001057,R0A,1752,3421,0.512131
3,2011,E02006902,R0A,906,1791,0.505863
4,2011,E02001056,R0A,2002,4350,0.46023


In [25]:
patients.trust_code.unique().shape[0]

139

<br>

#### Populations

In [9]:
uri = '../warehouse/populations/group/{year}.csv'.format(year=year)

try:
    populations = pd.read_csv(filepath_or_buffer=uri, header=0, encoding='utf-8')
except RuntimeError as err:
    raise Exception(err)

In [10]:
populations.head()

Unnamed: 0,msoa,sex,0-4,5-9,10-14,15-19,20-24,25-29,30-34,35-39,...,45-49,50-54,55-59,60-64,65-69,70-74,75-79,80-84,85-89,90+
0,E02004297,female,206,155,177,224,185,240,262,268,...,340,321,260,270,241,179,146,111,73,36
1,E02004290,female,159,172,162,170,141,158,182,204,...,290,216,230,220,185,143,92,42,30,15
2,E02004298,female,255,222,275,262,180,226,247,325,...,411,397,349,385,272,248,215,137,106,68
3,E02004299,female,235,194,201,247,244,260,262,253,...,307,314,298,277,233,193,149,133,113,55
4,E02004291,female,195,178,191,199,176,220,210,205,...,289,251,202,222,185,178,133,111,57,38


<br>

### Arithmetic

Population Aggregates

In [11]:
aggregates = populations.drop(columns='sex').groupby(by='msoa').agg('sum').sum(axis=1)
aggregates.rename('ppln_msoa', inplace=True)
aggregates = aggregates.to_frame().reset_index(drop=False)
aggregates.head()

Unnamed: 0,msoa,ppln_msoa
0,E02000001,7412
1,E02000002,6783
2,E02000003,10088
3,E02000004,6185
4,E02000005,8588


<br>

Append Aggregates to `populations`

In [12]:
populations = populations.merge(aggregates, how='left', on='msoa')
populations.head()

Unnamed: 0,msoa,sex,0-4,5-9,10-14,15-19,20-24,25-29,30-34,35-39,...,50-54,55-59,60-64,65-69,70-74,75-79,80-84,85-89,90+,ppln_msoa
0,E02004297,female,206,155,177,224,185,240,262,268,...,321,260,270,241,179,146,111,73,36,7900
1,E02004290,female,159,172,162,170,141,158,182,204,...,216,230,220,185,143,92,42,30,15,5990
2,E02004298,female,255,222,275,262,180,226,247,325,...,397,349,385,272,248,215,137,106,68,9706
3,E02004299,female,235,194,201,247,244,260,262,253,...,314,298,277,233,193,149,133,113,55,8455
4,E02004291,female,195,178,191,199,176,220,210,205,...,251,202,222,185,178,133,111,57,38,6788


<br>

Patients & Populations

In [13]:
merged = patients.merge(populations, how='left', on='msoa')
merged

Unnamed: 0,catchment_year,msoa,trust_code,patients_from_msoa_to_trust,total_patients_of_msoa,fp_from_msoa_to_trust,sex,0-4,5-9,10-14,...,50-54,55-59,60-64,65-69,70-74,75-79,80-84,85-89,90+,ppln_msoa
0,2011,E02001214,R0A,1981,3418,0.579579,female,181,198,230,...,274,208,292,245,190,199,168,107,53,7445
1,2011,E02001214,R0A,1981,3418,0.579579,male,224,216,215,...,264,229,251,198,200,148,123,65,25,7445
2,2011,E02001240,R0A,1181,2944,0.401155,female,177,138,199,...,215,182,186,140,143,109,58,32,22,6190
3,2011,E02001240,R0A,1181,2944,0.401155,male,197,184,176,...,219,170,207,144,113,94,50,18,6,6190
4,2011,E02001057,R0A,1752,3421,0.512131,female,221,210,168,...,192,142,133,107,98,82,54,38,17,8119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110573,2011,E02006383,RYR,8,3090,0.002589,male,299,281,377,...,272,241,256,238,149,150,114,78,46,9438
110574,2011,E02005137,RYR,8,3204,0.002497,female,318,302,282,...,228,240,241,189,163,141,96,65,45,7781
110575,2011,E02005137,RYR,8,3204,0.002497,male,292,282,349,...,216,215,208,156,138,101,61,33,12,7781
110576,2011,E02004740,RYR,8,2374,0.003370,female,159,167,189,...,271,265,306,297,221,243,169,103,59,7144


<br>

### Experiment

Experimenting via a single trust

In [14]:
trust_code = 'R0A'

In [15]:
sample = merged.copy().loc[merged.trust_code == trust_code, :]
sample.drop(columns=['catchment_year', 'trust_code'], inplace=True)
sample

Unnamed: 0,msoa,patients_from_msoa_to_trust,total_patients_of_msoa,fp_from_msoa_to_trust,sex,0-4,5-9,10-14,15-19,20-24,...,50-54,55-59,60-64,65-69,70-74,75-79,80-84,85-89,90+,ppln_msoa
0,E02001214,1981,3418,0.579579,female,181,198,230,234,158,...,274,208,292,245,190,199,168,107,53,7445
1,E02001214,1981,3418,0.579579,male,224,216,215,251,221,...,264,229,251,198,200,148,123,65,25,7445
2,E02001240,1181,2944,0.401155,female,177,138,199,204,214,...,215,182,186,140,143,109,58,32,22,6190
3,E02001240,1181,2944,0.401155,male,197,184,176,197,197,...,219,170,207,144,113,94,50,18,6,6190
4,E02001057,1752,3421,0.512131,female,221,210,168,190,542,...,192,142,133,107,98,82,54,38,17,8119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1657,E02006169,8,2203,0.003631,male,160,149,152,568,1233,...,155,138,169,145,104,77,50,20,14,8051
1658,E02001505,8,2771,0.002887,female,162,178,197,182,170,...,210,214,226,203,170,128,105,85,52,6423
1659,E02001505,8,2771,0.002887,male,193,204,208,190,171,...,199,174,211,164,153,106,85,45,20,6423
1660,E02001342,8,3747,0.002135,female,122,155,180,257,208,...,314,277,228,194,210,210,196,113,39,7146


<br>

Theoretical catchment from a middle super output area (MSOA) to a trust

In [16]:
sample.loc[:, 'tc_from_msoa_to_trust'] = np.multiply(sample.fp_from_msoa_to_trust , sample.ppln_msoa)
sample.head()

Unnamed: 0,msoa,patients_from_msoa_to_trust,total_patients_of_msoa,fp_from_msoa_to_trust,sex,0-4,5-9,10-14,15-19,20-24,...,55-59,60-64,65-69,70-74,75-79,80-84,85-89,90+,ppln_msoa,tc_from_msoa_to_trust
0,E02001214,1981,3418,0.579579,female,181,198,230,234,158,...,208,292,245,190,199,168,107,53,7445,4314.963429
1,E02001214,1981,3418,0.579579,male,224,216,215,251,221,...,229,251,198,200,148,123,65,25,7445,4314.963429
2,E02001240,1181,2944,0.401155,female,177,138,199,204,214,...,182,186,140,143,109,58,32,22,6190,2483.148777
3,E02001240,1181,2944,0.401155,male,197,184,176,197,197,...,170,207,144,113,94,50,18,6,6190,2483.148777
4,E02001057,1752,3421,0.512131,female,221,210,168,190,542,...,142,133,107,98,82,54,38,17,8119,4157.991231


In [17]:
melted = sample.copy().melt(id_vars=['msoa', 'patients_from_msoa_to_trust', 'total_patients_of_msoa', 'fp_from_msoa_to_trust', 'ppln_msoa', 'tc_from_msoa_to_trust', 'sex'], 
                     var_name='age_group', value_name='age_group_ppln_msoa')
melted.head()

Unnamed: 0,msoa,patients_from_msoa_to_trust,total_patients_of_msoa,fp_from_msoa_to_trust,ppln_msoa,tc_from_msoa_to_trust,sex,age_group,age_group_ppln_msoa
0,E02001214,1981,3418,0.579579,7445,4314.963429,female,0-4,181
1,E02001214,1981,3418,0.579579,7445,4314.963429,male,0-4,224
2,E02001240,1181,2944,0.401155,6190,2483.148777,female,0-4,177
3,E02001240,1181,2944,0.401155,6190,2483.148777,male,0-4,197
4,E02001057,1752,3421,0.512131,8119,4157.991231,female,0-4,221


In [18]:
melted.loc[:, 'agf_ppln_msoa'] = np.true_divide(melted.age_group_ppln_msoa, melted.ppln_msoa)
melted.loc[:, 'age_group_trust_factor'] = np.multiply(melted.fp_from_msoa_to_trust, melted.agf_ppln_msoa)
melted.head()

Unnamed: 0,msoa,patients_from_msoa_to_trust,total_patients_of_msoa,fp_from_msoa_to_trust,ppln_msoa,tc_from_msoa_to_trust,sex,age_group,age_group_ppln_msoa,agf_ppln_msoa,age_group_trust_factor
0,E02001214,1981,3418,0.579579,7445,4314.963429,female,0-4,181,0.024312,0.01409
1,E02001214,1981,3418,0.579579,7445,4314.963429,male,0-4,224,0.030087,0.017438
2,E02001240,1181,2944,0.401155,6190,2483.148777,female,0-4,177,0.028595,0.011471
3,E02001240,1181,2944,0.401155,6190,2483.148777,male,0-4,197,0.031826,0.012767
4,E02001057,1752,3421,0.512131,8119,4157.991231,female,0-4,221,0.02722,0.01394


<br>

Test

In [19]:
test = melted.loc[melted.msoa == 'E02001214', :]
test

Unnamed: 0,msoa,patients_from_msoa_to_trust,total_patients_of_msoa,fp_from_msoa_to_trust,ppln_msoa,tc_from_msoa_to_trust,sex,age_group,age_group_ppln_msoa,agf_ppln_msoa,age_group_trust_factor
0,E02001214,1981,3418,0.579579,7445,4314.963429,female,0-4,181,0.024312,0.01409
1,E02001214,1981,3418,0.579579,7445,4314.963429,male,0-4,224,0.030087,0.017438
1662,E02001214,1981,3418,0.579579,7445,4314.963429,female,5-9,198,0.026595,0.015414
1663,E02001214,1981,3418,0.579579,7445,4314.963429,male,5-9,216,0.029013,0.016815
3324,E02001214,1981,3418,0.579579,7445,4314.963429,female,10-14,230,0.030893,0.017905
3325,E02001214,1981,3418,0.579579,7445,4314.963429,male,10-14,215,0.028878,0.016737
4986,E02001214,1981,3418,0.579579,7445,4314.963429,female,15-19,234,0.03143,0.018216
4987,E02001214,1981,3418,0.579579,7445,4314.963429,male,15-19,251,0.033714,0.01954
6648,E02001214,1981,3418,0.579579,7445,4314.963429,female,20-24,158,0.021222,0.0123
6649,E02001214,1981,3418,0.579579,7445,4314.963429,male,20-24,221,0.029684,0.017204


In [20]:
(test.total_patients_of_msoa * test.age_group_trust_factor).sum()

1981.0

In [21]:
(test.ppln_msoa * test.age_group_trust_factor).sum()

4314.963428905792