In [10]:
import pandas as pd
import geopandas as gp 
import json
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize':(12,8)})
%matplotlib inline
from tqdm import tqdm
import glob
import numpy as np
from scipy.stats import pearsonr
from scipy.stats import linregress
import pickle as pkl

In [11]:
with open('../mappings/Bipartite_Drug_graph.pkl','rb') as f:
    drug_association_graph  = pkl.load(f)

with open('../mappings/Bipartite_Drug_category_graph.pkl','rb') as f:
    drug_cat_association_graph  = pkl.load(f)

In [69]:
def cleanStringofUTF(string):
    cleaned = string.encode('utf-8').replace('\xe8','e').replace('\xf6','o')
    return cleaned

def enrichdrugs(chem_dict , drugs):
    diabetes_drug_words = [drugs[k]['name'].lower() for k in drugs]
    for drug in chem_dict:
        Name = chem_dict[drug]['name'].replace('(','').replace(')','')
        slot1 = Name.lower().split('/')
        slot2 = Name.lower().split(' ')
        slot3 = Name.lower().split(' & ')
        common1 = set(diabetes_drug_words).intersection(slot1)
        common2 = set(diabetes_drug_words).intersection(slot2)
        common3 = set(diabetes_drug_words).intersection(slot3)
        
        if len(common1) > 0 or len(common2) > 0 or len(common3) > 0:
#             print common1 , common2 , common3
            drugs[chem_dict[drug]['code']] = {'disease':'' , 'disease_given_drug':0.0 , 'matched_disease':'', 'name':chem_dict[drug]['name'].strip() }

            
            
def makeChemDict(BNF_Chem):
    chem_dict = {}
    for index, row in BNF_Chem.iterrows():
        chem_dict[row['UNII_drugbank']] = {}
        chem_dict[row['UNII_drugbank']]['name'] = row['NAME']
        chem_dict[row['UNII_drugbank']]['code'] = row['BNF_code']
    return chem_dict
    
def getDrugCategory(categorylist, BNF_Chem, drugbankDict):
    allMatched = []
    drugs = {}
    chem_dict = makeChemDict(BNF_Chem)
    
    for k in drugbankDict:
        if len(drugbankDict[k]['Categories']) > 0:
            for cat in drugbankDict[k]['Categories']:
                matched_memo = []
                catString = cat.values()[0]#.split('\u2014')[-1]
                t = catString.lower().strip()
                for categoryString in categorylist:
                    categoryString = categoryString.lower()
                    if t.find(categoryString) >= 0:
                        matched_memo.append(categoryString)
                if k in chem_dict:
                    if len(matched_memo) > 0:# == len(categorylist):
                        allMatched.append(k)
#                         print chem_dict[k]
                        drugs[chem_dict[k]['code']] = {}
                        drugs[chem_dict[k]['code']]['name'] = chem_dict[k]['name']
                        drugs[chem_dict[k]['code']]['matched_cat'] = categorylist
    enrichdrugs(chem_dict,drugs)               
    return list(set(allMatched)) , drugs


def getDrugforDiseaseDrugbank(categorylist, BNF_Chem, drugbankDict):
    allMatched = []
    drugs = {}
    chem_dict = makeChemDict(BNF_Chem)
    
    for k in drugbankDict:
        if len(drugbankDict[k]['Associations']) > 0:
            for cat in drugbankDict[k]['Associations']:
                matched_memo = []
                catString = cat.values()[0]
                t = catString.lower().strip()
                for categoryString in categorylist:
                    categoryString = categoryString.lower()
                    if t.find(categoryString) >= 0:
                        matched_memo.append(categoryString)
                if k in chem_dict:
                    if len(matched_memo) > 0:
                        allMatched.append(k)
#                         print chem_dict[k]
                        drugs[chem_dict[k]['code']] = {}
                        drugs[chem_dict[k]['code']]['name'] = chem_dict[k]['name']
                        drugs[chem_dict[k]['code']]['matched_cat'] = categorylist
    enrichdrugs(chem_dict,drugs)               
    return  allMatched , drugs


def findDrugsForDisease(Graph, Disease, BNF_Chem ):#,threshProb):
    chem_dict = makeChemDict(BNF_Chem)
    drugs = {}
    for e in Graph.edges(data=True):
        if (cleanStringofUTF(e[1]).lower().find(Disease.lower()) >=0) or (cleanStringofUTF(e[0]).lower().find(Disease.lower()) >= 0) :
            drugNode = ''
            matchedDisease = ''
            if Graph.node[e[0]]['type'] == 'symptom':
                drugNode = e[1]
                matchedDisease = e[0]
            else:
                drugNode = e[0]
                matchedDisease = e[1]
            drugs[Graph.node[drugNode]['Id']] = {}
            drugs[Graph.node[drugNode]['Id']]['name'] = drugNode
            drugs[Graph.node[drugNode]['Id']]['matched_disease'] = matchedDisease
            drugs[Graph.node[drugNode]['Id']]['disease'] = Disease
    enrichdrugs(chem_dict,drugs)
    return drugs


def generateConfidence(drugs,Graph):
    shared = []
    All = []
    denom = max(Graph.degree().values())
    for d in drugs:
        name = drugs[d]['name']
        for e in Graph.edges(data=True):
            if Graph.node[e[0]]['type'] == 'symptom':
                if e[1] == name:
                    shared.append(Graph.degree()[e[1]]-1)
                else:
                    continue
            else:
                
                if e[0] == name:
                    shared.append(Graph.degree()[e[0]]-1)
                else:
                    continue
#     shared = [float(k) for k in shared]
    num = [k for k in shared if k > 1]

    return float(len(num)+1.0)/float(len(shared)+1.0)
#     return float(len(num))/float(len(shared)) * 10.0
#     return len(num)
                
                
def findDrugsForCategory(Graph, Cat, BNF_Chem ):#,threshProb):
    chem_dict = makeChemDict(BNF_Chem)
    drugs = {}
    for e in Graph.edges(data=True):
        if (cleanStringofUTF(e[1]).lower().find(Cat.lower()) >=0) or (cleanStringofUTF(e[0]).lower().find(Cat.lower()) >= 0) :
            drugNode = ''
            matchedDisease = ''
            if Graph.node[e[0]]['type'] == 'category':
                drugNode = e[1]
                matchedDisease = e[0]
            else:
                drugNode = e[0]
                matchedDisease = e[1]
            print Graph.node[drugNode]['Id']
            drugs[Graph.node[drugNode]['Id']] = {}
            drugs[Graph.node[drugNode]['Id']]['name'] = drugNode
            drugs[Graph.node[drugNode]['Id']]['matched_cat'] = matchedDisease
            drugs[Graph.node[drugNode]['Id']]['category'] = Cat
    enrichdrugs(chem_dict,drugs)
    return drugs


In [14]:
files = glob.glob('../../BL_Work/openPrescribe/serialized/*.gz')
print(files)

['../../BL_Work/openPrescribe/serialized/201810.gz', '../../BL_Work/openPrescribe/serialized/201710.gz', '../../BL_Work/openPrescribe/serialized/201203.gz', '../../BL_Work/openPrescribe/serialized/201110.gz', '../../BL_Work/openPrescribe/serialized/202010.gz', '../../BL_Work/openPrescribe/serialized/201804.gz', '../../BL_Work/openPrescribe/serialized/201911.gz', '../../BL_Work/openPrescribe/serialized/201308.gz', '../../BL_Work/openPrescribe/serialized/201708.gz', '../../BL_Work/openPrescribe/serialized/202005.gz', '../../BL_Work/openPrescribe/serialized/201211.gz', '../../BL_Work/openPrescribe/serialized/201707.gz', '../../BL_Work/openPrescribe/serialized/201803.gz', '../../BL_Work/openPrescribe/serialized/201410.gz', '../../BL_Work/openPrescribe/serialized/201301.gz', '../../BL_Work/openPrescribe/serialized/201201.gz', '../../BL_Work/openPrescribe/serialized/201409.gz', '../../BL_Work/openPrescribe/serialized/201812.gz', '../../BL_Work/openPrescribe/serialized/201603.gz', '../../BL_W

In [70]:
chem = pd.read_csv('../mappings/CHEM_MASTER_MAP.csv')
len(chem)

# chem = chem.dropna()

matched = chem[chem['UNII_drugbank']!='[]']

matchedMap = {}
for index,row in matched.iterrows():
    if row['UNII_drugbank'] not in matchedMap:
         matchedMap[row['UNII_drugbank']] = []
    matchedMap[row['UNII_drugbank']].append(row['BNF_code'])

# diseases = [
#  "anxiety",
#  "rheumatoid",
#  "osteoporosis",
#  "depression",
#  "diabetes",
#  "stroke",
#  "hypertension",
#  "chronic obstructive pulmonary disease", 
#  "dementia",
#  "asthma",
#  "sleeplessness",
# ]

# DiseaseDrugs = {}
# for d in diseases:
#     drugs = findDrugsForDisease(drug_association_graph,d ,chem)
# #     _ , drugs = getDrugforDiseaseDrugbank([d] ,chem,drugbank_dict)
#     for drug in drugs:
#         DiseaseDrugs[drug] = {}
#         DiseaseDrugs[drug]['chemName'] = drugs[drug]['name']
#         DiseaseDrugs[drug]['disease'] = d


categories = ["antibiotics",
              "antiallergic"
]

DiseaseDrugs = {}
for d in categories:
    drugs = findDrugsForCategory(drug_cat_association_graph,d ,chem)
    for drug in drugs:
        DiseaseDrugs[drug] = {}
        DiseaseDrugs[drug]['chemName'] = drugs[drug]['name']
        DiseaseDrugs[drug]['disease'] = d

0501090R0
0501090C0
0802010M0
0501090Q0
0501090E0
0801020P0
0801020P0
0501070X0
1310011Z0
1310012I0
1202010N0
0801020K0
/categories/DBCAT004732
/categories/DBCAT000873
0801020D0
0801020Q0
0802020U0
0801060A0
0802010M0
0801020N0
0801020Q0
/categories/DBCAT002288
/categories/DBCAT002363
1310012P0
0501030F0
0801020K0
/categories/DBCAT004732
0801020R0
0801020D0
0801060A0
0801020N0
1310012F0
1310011AA
0501030V0
1201010R0
/categories/DBCAT004737
1104020R0
1202010P0
1202010Q0
1104020P0
1202010A0
1104020Z0
1202010P0
1104020AC
1104020X0
1105000T0
1202010Q0
1104020Y0
1104020P0
1202020H0
1104020Z0
1202020P0
1202020G0
1202010A0
1104020M0


In [68]:
DiseaseDrugs

{u'/categories/DBCAT000873': {'chemName': 'Daunorubicin',
  'disease': 'antibiotics'},
 u'/categories/DBCAT002288': {'chemName': u'Cytotoxic Antibiotics and Related Substances',
  'disease': 'antibiotics'},
 u'/categories/DBCAT002363': {'chemName': u'Antibiotics for Topical Use',
  'disease': 'antibiotics'},
 u'/categories/DBCAT004732': {'chemName': 'Bleomycin',
  'disease': 'antibiotics'},
 u'/categories/DBCAT004737': {'chemName': 'Mupirocin',
  'disease': 'antibiotics'},
 '0501030F0': {'chemName': 'Demeclocycline Hydrochloride',
  'disease': 'antibiotics'},
 '0501030V0': {'chemName': 'Tetracycline', 'disease': 'antibiotics'},
 '0501070X0': {'chemName': 'Rifaximin', 'disease': 'antibiotics'},
 '0501090C0': {'chemName': 'Capreomycin', 'disease': 'antibiotics'},
 '0501090E0': {'chemName': 'Cycloserine', 'disease': 'antibiotics'},
 '0501090Q0': {'chemName': 'Rifabutin', 'disease': 'antibiotics'},
 '0501090R0': {'chemName': 'Rifampicin', 'disease': 'antibiotics'},
 '0501090S0': {'chemName

In [22]:
%store -r ome_map
ome = pd.read_csv('mappings/ome_rossano.csv')

In [23]:
ome

Unnamed: 0,bnf_name,bnf,mg_per_unit,ome_multiplier
0,Abstral_Tab Sublingual 100mcg,0407020A0BJAAAW,0.10,130.0
1,Abstral_Tab Sublingual 200mcg,0407020A0BJABAX,0.20,130.0
2,Abstral_Tab Sublingual 300mcg,0407020A0BJACAY,0.30,130.0
3,Abstral_Tab Sublingual 400mcg,0407020A0BJADAZ,0.40,130.0
4,Abstral_Tab Sublingual 600mcg,0407020A0BJAEBA,0.60,130.0
5,Abstral_Tab Sublingual 800mcg,0407020A0BJAFBB,0.80,130.0
6,Abtard_Tab 10mg M/R,0407020ADBMABAF,10.00,2.0
7,Abtard_Tab 15mg M/R,0407020ADBMACAS,15.00,2.0
8,Abtard_Tab 20mg M/R,0407020ADBMADAG,20.00,2.0
9,Abtard_Tab 30mg M/R,0407020ADBMAFAR,30.00,2.0


In [19]:
ome_map

{'040702020AAAAAA': 2.0,
 '040702020AAABAB': 2.0,
 '040702020BBAAAA': 2.0,
 '040702020BBABAB': 2.0,
 '040702040AAAAAA': 0.1,
 '040702040AAABAB': 0.1,
 '040702040AAACAC': 0.1,
 '040702040AAADAD': 0.1,
 '040702040AAAEAE': 0.1,
 '040702040AAAFAF': 0.1,
 '040702040AAAGAG': 0.1,
 '040702040AAAHAH': 0.1,
 '040702040AAAIAI': 0.1,
 '040702040AAAJAJ': 0.1,
 '040702040AAAMAM': 0.1,
 '040702040AAANAN': 0.1,
 '040702040AAAPAP': 0.1,
 '040702040AAATAT': 0.1,
 '040702040AAAUAU': 0.1,
 '040702040AAAVAV': 0.1,
 '040702040AAAWAW': 0.1,
 '040702040AAAXAX': 0.1,
 '040702040AAAYAY': 0.1,
 '040702040AAAZAZ': 0.1,
 '040702040AABABA': 0.1,
 '040702040AABBBB': 0.1,
 '040702040AABCBC': 0.1,
 '040702040AABDBD': 0.1,
 '040702040BBAAAA': 0.1,
 '040702040BBABAB': 0.1,
 '040702040BBACAC': 0.1,
 '040702040BBADAD': 0.1,
 '040702040BBAEAE': 0.1,
 '040702040BBAFAF': 0.1,
 '040702040BBAGAD': 0.1,
 '040702040BBAHAE': 0.1,
 '040702040BBAIAM': 0.1,
 '040702040BBAJAN': 0.1,
 '040702040BBAKAY': 0.1,
 '040702040BCAAAA': 0.1,


In [24]:
def func_ome(df,drugBNF,ome_map):
    df['presc_ome'] = df['8'] *df['15']*ome_map[drugBNF]
    return df

def calculateOME(pdp,ome_map):
    pdp['presc_ome'] = 0.0
    return pdp.groupby('3',as_index=False).apply(lambda df: func_ome(df , df.name, ome_map ))

In [25]:
# findDrugsForDisease(drug_association_graph,'sleeplessness',chem)

In [26]:
DiseaseDrugs

{u'/categories/DBCAT000873': {'chemName': 'Daunorubicin',
  'disease': 'antibiotics'},
 u'/categories/DBCAT002288': {'chemName': u'Cytotoxic Antibiotics and Related Substances',
  'disease': 'antibiotics'},
 u'/categories/DBCAT002363': {'chemName': u'Antibiotics for Topical Use',
  'disease': 'antibiotics'},
 u'/categories/DBCAT004732': {'chemName': 'Bleomycin',
  'disease': 'antibiotics'},
 u'/categories/DBCAT004737': {'chemName': 'Mupirocin',
  'disease': 'antibiotics'},
 '0501030F0': {'chemName': 'Demeclocycline Hydrochloride',
  'disease': 'antibiotics'},
 '0501030V0': {'chemName': 'Tetracycline', 'disease': 'antibiotics'},
 '0501070X0': {'chemName': 'Rifaximin', 'disease': 'antibiotics'},
 '0501090C0': {'chemName': 'Capreomycin', 'disease': 'antibiotics'},
 '0501090E0': {'chemName': 'Cycloserine', 'disease': 'antibiotics'},
 '0501090Q0': {'chemName': 'Rifabutin', 'disease': 'antibiotics'},
 '0501090R0': {'chemName': 'Rifampicin', 'disease': 'antibiotics'},
 '0501090S0': {'chemName

In [27]:
disease_drug_map = {}
for k in DiseaseDrugs:
    if DiseaseDrugs[k]['disease'] not in disease_drug_map:
        disease_drug_map[DiseaseDrugs[k]['disease']] = []
    disease_drug_map[DiseaseDrugs[k]['disease']].append(k)

In [28]:
drug_map_dict = {'BNF_code':[], 'Drug_name':[] , 'Mapped_Condition': []}
for k in DiseaseDrugs:
    drug_map_dict['BNF_code'].append(k)
    drug_map_dict['Drug_name'].append(DiseaseDrugs[k]['chemName'])
    drug_map_dict['Mapped_Condition'].append(DiseaseDrugs[k]['disease'])
drug_map_df = pd.DataFrame.from_dict(drug_map_dict)    
drug_map_df.to_csv('data_prep/Drugs_categories.csv',index=False)

In [29]:
disease_drug_map.keys()

['antibiotics', 'antiallergic']

In [30]:
LSOA_dist = json.load(open('../mappings/GP_LSOA_PATIENTSDIST.json','rb'))

In [31]:
LSOA_dist_2021 = json.load(open('mappings/GP_LSOA_PATIENTSDIST_2021.json','rb'))

In [32]:
LSOA_dist['A81001']['E01033477']

0.11856400566839868

In [33]:
LSOA_dist_2021['A81001']['E01033477']

0.1600780868716447

In [34]:
# LSOA_dist_new = pd.read_csv('mappings/gp-reg-pat-prac-lsoa-all.csv')

In [35]:
# LSOA_dist_new.head()

In [36]:
# LSOA_dist_2021 = {}
# for name , group in LSOA_dist_new.groupby('PRACTICE_CODE'):
#     LSOA_dist_2021[name] = {}
#     total = sum(group['Number of Patients'])
#     for index , row in group.iterrows():
#         LSOA_dist_2021[name][row['LSOA_CODE']] = float(row['Number of Patients'])/float(total)
    
        

In [37]:
# json.dump(LSOA_dist_2021 , open('mappings/GP_LSOA_PATIENTSDIST_2021.json','w'))

In [18]:
# taxonomyDict%store -r taxonomyDict

In [19]:
# json.dump(taxonomyDict, open('mappings/taxomomy_dict.json','w'))

In [38]:
# %storcityMape -r cityMap

In [39]:
# json.dump(cityMap, open('mappings/City_map_dict.json','w'))

In [40]:
ward_pop = pd.read_csv('mappings/ward_pop.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [41]:
population = {}
for index, row in ward_pop.iterrows():
    population[row['Ward Code 1']] = float(row['All Ages'].replace(',',''))

In [42]:
df_city = pd.read_csv('../mappings/lower_layer_super_output_area_2011_to_major_towns_and_cities_december_2015_lookup_in_england_and_wales.csv')

In [43]:
df_city.head()

Unnamed: 0,LSOA11CD,LSOA11NM,TCITY15CD,TCITY15NM,FID
0,E01002351,Havering 016C,J01000055,London,2001
1,E01002352,Havering 016D,J01000055,London,2002
2,E01002100,Haringey 008B,J01000055,London,2003
3,E01002301,Havering 003A,J01000055,London,2004
4,E01002353,Havering 013B,J01000055,London,2005


In [44]:
cityMap = {}
for name , group in df_city.groupby('TCITY15NM'):
        cityMap[name] = list(group['LSOA11CD'])

In [45]:
LSOA_survey_takers = json.load(open('../mappings/LSOA_suvery_pop.json'))

In [46]:
disease_drugs = json.load(open("../mappings/Disease_Drug_DrugBank.json",'rb'))

In [47]:
drugbank_dict = json.load(open('../mappings/Drugbank_drugs_data.json','rb'))

In [48]:
# cityMap

In [49]:
IMD_df = pd.read_csv('../../BL_Work/File_7_ID_2015_All_ranks__deciles_and_scores_for_the_Indices_of_Deprivation__and_population_denominators.csv')

In [50]:
IMD_df.head()

Unnamed: 0,LSOA code (2011),LSOA name (2011),Local Authority District code (2013),Local Authority District name (2013),Index of Multiple Deprivation (IMD) Score,Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived),Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs),Income Score (rate),Income Rank (where 1 is most deprived),Income Decile (where 1 is most deprived 10% of LSOAs),...,Indoors Sub-domain Rank (where 1 is most deprived),Indoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs),Outdoors Sub-domain Score,Outdoors Sub-domain Rank (where 1 is most deprived),Outdoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs),Total population: mid 2012 (excluding prisoners),Dependent Children aged 0-15: mid 2012 (excluding prisoners),Population aged 16-59: mid 2012 (excluding prisoners),Older population aged 60 and over: mid 2012 (excluding prisoners),Working age population 18-59/64: for use with Employment Deprivation Domain (excluding prisoners)
0,E01031349,Adur 001A,E07000223,Adur,12.389,21352,7,0.096,18992,6,...,20379,7,0.312,11318,4,1318,206,694,418,702.75
1,E01031350,Adur 001B,E07000223,Adur,28.619,8864,3,0.187,9233,3,...,16285,5,0.234,12445,4,1212,232,712,268,720.75
2,E01031351,Adur 001C,E07000223,Adur,11.713,22143,7,0.065,24539,8,...,25054,8,0.208,12820,4,1577,290,829,458,838.25
3,E01031352,Adur 001D,E07000223,Adur,16.446,17252,6,0.117,16087,5,...,24455,8,0.109,14350,5,1453,233,739,481,748.25
4,E01031370,Adur 001E,E07000223,Adur,18.265,15643,5,0.102,17918,6,...,20214,7,0.321,11202,4,1443,306,799,338,795.5


In [51]:
LSOA_pop = {}
LSOA_IMD = {}
for index, row in IMD_df.iterrows():
    LSOA_pop[row['LSOA code (2011)']] = row['Total population: mid 2012 (excluding prisoners)']
    LSOA_IMD[row['LSOA code (2011)']] = row['Index of Multiple Deprivation (IMD) Score']

In [52]:
len(LSOA_pop.keys())

32844

In [53]:
cityPop = {}
city_IMD = {}
city_survey_pop = {}
for k in cityMap:
    pop = 0
    surveypop = 0
    IMD = []
    for j in cityMap[k]:
        try:
            pop += LSOA_pop[j]
            surveypop += LSOA_survey_takers[j]
            IMD.append(LSOA_IMD[j])
        except:
            print("could not find LSOA",j)
    city_IMD[k] = {}
    if pop > 0:
        cityPop[k] = pop
        city_survey_pop[k] = surveypop
        city_IMD[k]['median_IMD'] = np.median(IMD)
        city_IMD[k]['mean_IMD'] = np.mean(IMD)

('could not find LSOA', 'W01001912')
('could not find LSOA', 'W01001913')
('could not find LSOA', 'W01001600')
('could not find LSOA', 'W01001651')
('could not find LSOA', 'W01001601')
('could not find LSOA', 'W01001652')
('could not find LSOA', 'W01001602')
('could not find LSOA', 'W01001653')
('could not find LSOA', 'W01001654')
('could not find LSOA', 'W01001603')
('could not find LSOA', 'W01001604')
('could not find LSOA', 'W01001655')
('could not find LSOA', 'W01001605')
('could not find LSOA', 'W01001606')
('could not find LSOA', 'W01001607')
('could not find LSOA', 'W01001659')
('could not find LSOA', 'W01001608')
('could not find LSOA', 'W01001660')
('could not find LSOA', 'W01001609')
('could not find LSOA', 'W01001661')
('could not find LSOA', 'W01001610')
('could not find LSOA', 'W01001662')
('could not find LSOA', 'W01001611')
('could not find LSOA', 'W01001663')
('could not find LSOA', 'W01001612')
('could not find LSOA', 'W01001664')
('could not find LSOA', 'W01001613')
(

In [54]:
LSOA_patient_pop = {}
LSOA_patients_map = json.load(open('data_prep/GPs.json','r'))
for GP in tqdm(LSOA_patients_map):
    for lsoa in LSOA_patients_map[GP]['Patient_registry_LSOA']:
        if lsoa not in LSOA_patient_pop:
            LSOA_patient_pop[lsoa] = LSOA_patients_map[GP]['Patient_registry_LSOA'][lsoa]
        else:
            LSOA_patient_pop[lsoa] += LSOA_patients_map[GP]['Patient_registry_LSOA'][lsoa]

100%|██████████| 6623/6623 [00:01<00:00, 3876.14it/s]


In [56]:
sum(LSOA_patient_pop.values())

60744002.0

In [57]:
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='dataPrep_postcovid.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.INFO)

In [59]:
def calculateTemporalMetrics_LSOA(all_presc , old = True):
    LSOA_dosage = {}
    LSOA_costs = {}
    LSOA_patient_count = {}
    fail = 0.0
    LSOA_map = {}
    if old:
        dosageField = '8'
        costField = '19'
        practiceField = '2'
        LSOA_map = LSOA_dist_2021
    else:
        dosageField = 'TOTAL_QUANTITY'
        costField = '19'
        practiceField = 'PRACTICE_CODE'
        LSOA_map = LSOA_dist_2021

    for name, group in all_presc.groupby(practiceField):
        total_dosage = np.sum(group[dosageField])
        total_cost = np.sum(group[costField])
        if name in LSOA_map:        
            for k in LSOA_map[name]:
                if k not in LSOA_dosage:
                    LSOA_dosage[k] = 0.0
                    LSOA_costs[k] = 0.0
                LSOA_dosage[k]+= float(total_dosage)*float(LSOA_map[name][k])
                LSOA_costs[k]+= float(total_cost)*float(LSOA_map[name][k])
                
#     for lsoa in LSOA_dosage:
#         LSOA_dosage[lsoa] = (LSOA_dosage[lsoa]/LSOA_patient_pop[lsoa]) * 1000
#         LSOA_costs[lsoa] = (LSOA_costs[lsoa]/LSOA_patient_pop[lsoa]) * 1000
    
    return  LSOA_dosage , LSOA_costs

def calculateTemporalMetrics_LSOA_opioids(all_presc , old = True):
    LSOA_dosage = {}
    LSOA_costs = {}
    LSOA_patient_count = {}
    fail = 0.0
    LSOA_map = {}
    if old:
        dosageField = '8'
        costField = 'presc_ome'
        practiceField = '2'
        LSOA_map = LSOA_dist_2021
    else:
        dosageField = 'TOTAL_QUANTITY'
        costField = 'presc_ome'
        practiceField = 'PRACTICE_CODE'
        LSOA_map = LSOA_dist_2021

    for name, group in all_presc.groupby(practiceField):
        total_dosage = np.sum(group[dosageField])
        total_cost = np.sum(group[costField])
        if name in LSOA_map:        
            for k in LSOA_map[name]:
                if k not in LSOA_dosage:
                    LSOA_dosage[k] = 0.0
                    LSOA_costs[k] = 0.0
                LSOA_dosage[k]+= float(total_dosage)*float(LSOA_map[name][k])
                LSOA_costs[k]+= float(total_cost)*float(LSOA_map[name][k])
                
#     for lsoa in LSOA_dosage:
#         LSOA_dosage[lsoa] = (LSOA_dosage[lsoa]/LSOA_patient_pop[lsoa]) * 1000
#         LSOA_costs[lsoa] = (LSOA_costs[lsoa]/LSOA_patient_pop[lsoa]) * 1000
    
    return  LSOA_dosage , LSOA_costs

In [60]:
# monthly_borough_dosage = {}
# monthly_borough_costs = {}

monthly_borough_dosage_new = {}
monthly_borough_costs_new = {}

In [61]:
files.sort()

In [62]:
files[53:]

['../../BL_Work/openPrescribe/serialized/201501.gz',
 '../../BL_Work/openPrescribe/serialized/201502.gz',
 '../../BL_Work/openPrescribe/serialized/201503.gz',
 '../../BL_Work/openPrescribe/serialized/201504.gz',
 '../../BL_Work/openPrescribe/serialized/201505.gz',
 '../../BL_Work/openPrescribe/serialized/201506.gz',
 '../../BL_Work/openPrescribe/serialized/201507.gz',
 '../../BL_Work/openPrescribe/serialized/201508.gz',
 '../../BL_Work/openPrescribe/serialized/201509.gz',
 '../../BL_Work/openPrescribe/serialized/201510.gz',
 '../../BL_Work/openPrescribe/serialized/201511.gz',
 '../../BL_Work/openPrescribe/serialized/201512.gz',
 '../../BL_Work/openPrescribe/serialized/201601.gz',
 '../../BL_Work/openPrescribe/serialized/201602.gz',
 '../../BL_Work/openPrescribe/serialized/201603.gz',
 '../../BL_Work/openPrescribe/serialized/201604.gz',
 '../../BL_Work/openPrescribe/serialized/201605.gz',
 '../../BL_Work/openPrescribe/serialized/201606.gz',
 '../../BL_Work/openPrescribe/serialized/20160

In [71]:
# disease_drug_map

In [72]:
for f in tqdm(files[53:]):
    month = f.split('/')[-1].split('.')[0]
    logging.debug("Working with month  " + month)
    if int(month) > 201906:
        old = False
    else:
        old = True
    
    monthly_borough_dosage_new[month] = {}
    monthly_borough_costs_new[month] = {}
    pdp = pd.read_csv(f,compression='gzip')
    for disease in tqdm(disease_drug_map):
        print "Working with disease  " + disease
        logging.debug("Working with disease  " + disease)
        monthly_borough_dosage_new[month][disease] = {}
        monthly_borough_costs_new[month][disease] = {}
        drugs = disease_drug_map[disease]
        opioids = pdp.loc[pdp['16'].isin(drugs)] #Original opioids

        monthly_borough_dosage_new[month][disease] , monthly_borough_costs_new[month][disease] = calculateTemporalMetrics_LSOA(opioids, old)
   

  0%|          | 0/74 [00:00<?, ?it/s]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.92s/it][A

Working with disease  antiallergic



  1%|▏         | 1/74 [01:33<1:53:45, 93.49s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.05s/it][A

Working with disease  antiallergic



  3%|▎         | 2/74 [03:05<1:51:44, 93.12s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.98s/it][A

Working with disease  antiallergic



  4%|▍         | 3/74 [04:41<1:51:03, 93.86s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.25s/it][A

Working with disease  antiallergic



  5%|▌         | 4/74 [06:17<1:50:11, 94.45s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.97s/it][A

Working with disease  antiallergic



  7%|▋         | 5/74 [07:52<1:48:51, 94.65s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.32s/it][A

Working with disease  antiallergic



  8%|▊         | 6/74 [09:30<1:48:27, 95.70s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.07s/it][A

Working with disease  antiallergic



  9%|▉         | 7/74 [11:08<1:47:45, 96.50s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.89s/it][A

Working with disease  antiallergic



 11%|█         | 8/74 [12:42<1:45:20, 95.76s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:12, 13.00s/it][A

Working with disease  antiallergic



 12%|█▏        | 9/74 [14:19<1:43:56, 95.95s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.26s/it][A

Working with disease  antiallergic



 14%|█▎        | 10/74 [15:56<1:42:40, 96.26s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.97s/it][A

Working with disease  antiallergic



 15%|█▍        | 11/74 [17:31<1:40:55, 96.11s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.04s/it][A

Working with disease  antiallergic



 16%|█▌        | 12/74 [19:10<1:39:56, 96.72s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.44s/it][A

Working with disease  antiallergic



 18%|█▊        | 13/74 [20:44<1:37:43, 96.12s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.05s/it][A

Working with disease  antiallergic



 19%|█▉        | 14/74 [22:23<1:36:45, 96.75s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.92s/it][A

Working with disease  antiallergic



 20%|██        | 15/74 [23:59<1:35:07, 96.73s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.42s/it][A

Working with disease  antiallergic



 22%|██▏       | 16/74 [25:39<1:34:29, 97.76s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.01s/it][A

Working with disease  antiallergic



 23%|██▎       | 17/74 [27:17<1:32:46, 97.66s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.02s/it][A

Working with disease  antiallergic



 24%|██▍       | 18/74 [28:56<1:31:35, 98.13s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:14<00:14, 14.65s/it][A

Working with disease  antiallergic



 26%|██▌       | 19/74 [30:37<1:30:38, 98.88s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.95s/it][A

Working with disease  antiallergic



 27%|██▋       | 20/74 [32:15<1:28:52, 98.75s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.92s/it][A

Working with disease  antiallergic



 28%|██▊       | 21/74 [33:53<1:27:00, 98.50s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:09<00:09,  9.64s/it][A

Working with disease  antiallergic



 30%|██▉       | 22/74 [35:20<1:22:14, 94.89s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.87s/it][A

Working with disease  antiallergic



 31%|███       | 23/74 [36:55<1:20:49, 95.09s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.94s/it][A

Working with disease  antiallergic



 32%|███▏      | 24/74 [38:35<1:20:29, 96.59s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:12, 13.00s/it][A

Working with disease  antiallergic



 34%|███▍      | 25/74 [40:14<1:19:19, 97.14s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.07s/it][A

Working with disease  antiallergic



 35%|███▌      | 26/74 [41:50<1:17:34, 96.96s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.01s/it][A

Working with disease  antiallergic



 36%|███▋      | 27/74 [43:30<1:16:39, 97.86s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.93s/it][A

Working with disease  antiallergic



 38%|███▊      | 28/74 [45:07<1:14:46, 97.53s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.99s/it][A

Working with disease  antiallergic



 39%|███▉      | 29/74 [46:47<1:13:39, 98.22s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.99s/it][A

Working with disease  antiallergic



 41%|████      | 30/74 [48:27<1:12:35, 98.99s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.32s/it][A

Working with disease  antiallergic



 42%|████▏     | 31/74 [50:07<1:10:59, 99.06s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:14<00:14, 14.30s/it][A

Working with disease  antiallergic



 43%|████▎     | 32/74 [51:47<1:09:32, 99.34s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.29s/it][A

Working with disease  antiallergic



 45%|████▍     | 33/74 [53:26<1:07:47, 99.20s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.69s/it][A

Working with disease  antiallergic



 46%|████▌     | 34/74 [55:06<1:06:17, 99.43s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.12s/it][A

Working with disease  antiallergic



 47%|████▋     | 35/74 [56:45<1:04:38, 99.45s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.29s/it][A

Working with disease  antiallergic



 49%|████▊     | 36/74 [58:24<1:02:50, 99.23s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.19s/it][A

Working with disease  antiallergic



 50%|█████     | 37/74 [1:00:03<1:01:14, 99.31s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.95s/it][A

Working with disease  antiallergic



 51%|█████▏    | 38/74 [1:01:40<59:05, 98.49s/it]  
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.39s/it][A

Working with disease  antiallergic



 53%|█████▎    | 39/74 [1:03:19<57:37, 98.79s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.04s/it][A

Working with disease  antiallergic



 54%|█████▍    | 40/74 [1:04:59<56:12, 99.20s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.14s/it][A

Working with disease  antiallergic



 55%|█████▌    | 41/74 [1:06:41<54:52, 99.76s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.13s/it][A

Working with disease  antiallergic



 57%|█████▋    | 42/74 [1:08:21<53:14, 99.84s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.07s/it][A

Working with disease  antiallergic



 58%|█████▊    | 43/74 [1:10:00<51:31, 99.74s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.06s/it][A

Working with disease  antiallergic



 59%|█████▉    | 44/74 [1:11:39<49:44, 99.47s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.11s/it][A

Working with disease  antiallergic



 61%|██████    | 45/74 [1:13:18<47:58, 99.26s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.19s/it][A

Working with disease  antiallergic



 62%|██████▏   | 46/74 [1:14:58<46:30, 99.66s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.32s/it][A

Working with disease  antiallergic



 64%|██████▎   | 47/74 [1:16:38<44:52, 99.71s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.96s/it][A

Working with disease  antiallergic



 65%|██████▍   | 48/74 [1:18:14<42:46, 98.71s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.08s/it][A

Working with disease  antiallergic



 66%|██████▌   | 49/74 [1:19:52<40:58, 98.33s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:09<00:09,  9.26s/it][A

Working with disease  antiallergic



 68%|██████▊   | 50/74 [1:21:18<37:50, 94.60s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.07s/it][A

Working with disease  antiallergic



 69%|██████▉   | 51/74 [1:22:44<35:15, 91.97s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.14s/it][A

Working with disease  antiallergic



 70%|███████   | 52/74 [1:24:19<34:08, 93.10s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.08s/it][A

Working with disease  antiallergic



 72%|███████▏  | 53/74 [1:25:56<32:58, 94.20s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:13<00:13, 13.08s/it][A

Working with disease  antiallergic



  interactivity=interactivity, compiler=compiler, result=result)

  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.50s/it][A

Working with disease  antiallergic



 74%|███████▍  | 55/74 [1:31:28<43:19, 136.83s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:10<00:10, 10.81s/it][A

Working with disease  antiallergic



 76%|███████▌  | 56/74 [1:35:22<49:49, 166.08s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:10<00:10, 10.74s/it][A

Working with disease  antiallergic



 77%|███████▋  | 57/74 [1:39:12<52:31, 185.38s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.59s/it][A

Working with disease  antiallergic



 78%|███████▊  | 58/74 [1:43:09<53:34, 200.89s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.05s/it][A

Working with disease  antiallergic



 80%|███████▉  | 59/74 [1:47:01<52:33, 210.20s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.50s/it][A

Working with disease  antiallergic



 81%|████████  | 60/74 [1:50:56<50:46, 217.59s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.11s/it][A

Working with disease  antiallergic



 82%|████████▏ | 61/74 [1:54:53<48:25, 223.51s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.85s/it][A

Working with disease  antiallergic



 84%|████████▍ | 62/74 [1:58:43<45:05, 225.44s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.84s/it][A

Working with disease  antiallergic



 85%|████████▌ | 63/74 [2:02:51<42:31, 231.99s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.84s/it][A

Working with disease  antiallergic



 86%|████████▋ | 64/74 [2:06:45<38:45, 232.59s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:10<00:10, 10.62s/it][A

Working with disease  antiallergic



 88%|████████▊ | 65/74 [2:10:30<34:34, 230.52s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:12<00:12, 12.22s/it][A

Working with disease  antiallergic



 89%|████████▉ | 66/74 [2:14:24<30:51, 231.44s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.39s/it][A

Working with disease  antiallergic



 91%|█████████ | 67/74 [2:18:17<27:03, 231.88s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:10<00:10, 10.53s/it][A

Working with disease  antiallergic



 92%|█████████▏| 68/74 [2:22:00<22:56, 229.35s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.50s/it][A

Working with disease  antiallergic



 93%|█████████▎| 69/74 [2:25:52<19:10, 230.01s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:10<00:10, 10.96s/it][A

Working with disease  antiallergic



 95%|█████████▍| 70/74 [2:29:45<15:23, 230.96s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.01s/it][A

Working with disease  antiallergic



 96%|█████████▌| 71/74 [2:33:35<11:32, 230.79s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:11<00:11, 11.13s/it][A

Working with disease  antiallergic



 97%|█████████▋| 72/74 [2:37:30<07:44, 232.06s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:10<00:10, 10.98s/it][A

Working with disease  antiallergic



 99%|█████████▊| 73/74 [2:41:22<03:51, 231.94s/it]
  0%|          | 0/2 [00:00<?, ?it/s][A

Working with disease  antibiotics



 50%|█████     | 1/2 [00:10<00:10, 10.97s/it][A

Working with disease  antiallergic



100%|██████████| 74/74 [2:45:06<00:00, 229.57s/it]


In [73]:
pdp.head(n=5)

Unnamed: 0.1,Unnamed: 0,YEAR_MONTH,REGIONAL_OFFICE_NAME,REGIONAL_OFFICE_CODE,STP_NAME,STP_CODE,PCO_NAME,PCO_CODE,PRACTICE_NAME,PRACTICE_CODE,...,12,13,14,15,16,17,18,19,20,21
0,0,202102,SOUTH EAST,Y59,"BUCKS, OXFORDSHIRE & BERKSHIRE WEST STP",QU9,OXFORDSHIRE CCG,10Q00,MORLAND HOUSE SURGERY,K84014,...,,,,0.0,130201100,500.0,1.0,500.0,BB,0.0
1,1,202102,SOUTH EAST,Y59,"BUCKS, OXFORDSHIRE & BERKSHIRE WEST STP",QU9,OXFORDSHIRE CCG,10Q00,MORLAND HOUSE SURGERY,K84014,...,,,,0.0,130201100,400.0,1.0,400.0,BB,0.0
2,2,202102,SOUTH EAST,Y59,"BUCKS, OXFORDSHIRE & BERKSHIRE WEST STP",QU9,OXFORDSHIRE CCG,10Q00,MORLAND HOUSE SURGERY,K84014,...,,,,0.0,130201100,200.0,1.0,200.0,BB,0.0
3,3,202102,SOUTH EAST,Y59,"BUCKS, OXFORDSHIRE & BERKSHIRE WEST STP",QU9,OXFORDSHIRE CCG,10Q00,MORLAND HOUSE SURGERY,K84014,...,,,,0.0,1303000AA,1000.0,1.0,1000.0,BC,0.0
4,4,202102,SOUTH EAST,Y59,"BUCKS, OXFORDSHIRE & BERKSHIRE WEST STP",QU9,OXFORDSHIRE CCG,10Q00,MORLAND HOUSE SURGERY,K84014,...,,,,0.0,1303000AA,100.0,1.0,100.0,BC,0.0


In [74]:
monthly_borough_dosage_new['202002']['antiallergic'].keys()

[u'E01009300',
 u'E01014080',
 u'E01014081',
 u'E01014082',
 u'E01014083',
 u'E01014084',
 u'E01014085',
 u'E01014086',
 u'E01014087',
 u'E01014088',
 u'E01014089',
 u'E01015977',
 u'E01015976',
 u'E01015975',
 u'E01015974',
 u'E01015973',
 u'E01015972',
 u'E01015971',
 u'E01015970',
 u'E01015979',
 u'E01015978',
 u'E01010259',
 u'E01010258',
 u'E01010251',
 u'E01010250',
 u'E01010253',
 u'E01010252',
 u'E01010255',
 u'E01010254',
 u'E01010257',
 u'E01010256',
 u'E01024308',
 u'E01024309',
 u'E01024300',
 u'E01024301',
 u'E01024302',
 u'E01024303',
 u'E01024304',
 u'E01024305',
 u'E01024306',
 u'E01024307',
 u'E01014928',
 u'E01030039',
 u'E01015029',
 u'E01015028',
 u'E01004235',
 u'E01015021',
 u'E01015020',
 u'E01015023',
 u'E01015022',
 u'E01015025',
 u'E01015024',
 u'E01015027',
 u'E01015026',
 u'E01022229',
 u'E01022228',
 u'E01022225',
 u'E01022224',
 u'E01022227',
 u'E01022226',
 u'E01022221',
 u'E01022220',
 u'E01022223',
 u'E01022222',
 u'E01008962',
 u'E01006408',
 u'E010089

In [None]:
monthly_borough_dosage_new['202002']['antibiotics']['E01015028']

In [75]:
monthly_borough_dosage_total= monthly_borough_dosage_new.copy()
# for yyyymm in tqdm(monthly_borough_dosage_total):
#     for d in monthly_borough_dosage_total[yyyymm]:
#         for lsoa in  monthly_borough_dosage_total[yyyymm][d]:
#             monthly_borough_dosage_total[yyyymm][d][lsoa] = (monthly_borough_dosage_total[yyyymm][d][lsoa]/1000.0)*LSOA_patient_pop[lsoa]

In [76]:
for disease in tqdm(disease_drug_map.keys()):
    disease_dict = {'YYYYMM':[] , 'LSOA_CODE' : [] , 'Total_prescriptions' : [] ,'Dosage_ratio' :[] , 'Patient_count' : []}
    for yyyymm in monthly_borough_dosage_total:
        for LSOA_CODE in monthly_borough_dosage_total[yyyymm][disease]:
            if LSOA_CODE[0] == 'E':
                disease_dict['YYYYMM'].append(yyyymm)
                disease_dict['LSOA_CODE'].append(LSOA_CODE)
                disease_dict['Total_prescriptions'].append(monthly_borough_dosage_total[yyyymm][disease][LSOA_CODE])
                disease_dict['Dosage_ratio'].append(monthly_borough_costs_new[yyyymm][disease][LSOA_CODE])
                disease_dict['Patient_count'].append(LSOA_patient_pop[LSOA_CODE])
    disease_df = pd.DataFrame.from_dict(disease_dict)
    filename = 'data_prep/'+disease+'_V2.csv.gz'
    disease_df.to_csv(filename,index=False,compression='gzip')
            

100%|██████████| 2/2 [03:26<00:00, 102.95s/it]


In [None]:
disease_drug_map

In [None]:

for bnf in disease_drug_map['anxiety']:
    print(DiseaseDrugs[bnf])

In [None]:
# json.dump(monthly_borough_dosage_new,open('../mappings/pre_post_monthy_presc_pre072019.json','w'))

In [None]:
# json.dump(monthly_borough_costs_new,open('../mappings/pre_post_monthy_cost_pre072019.json','w'))

In [None]:
# json.dump(monthly_borough_dosage_new,open('../mappings/pre_post_monthy_presc_post072019.json','w'))

In [None]:
# json.dump(monthly_borough_costs_new,open('../mappings/pre_post_monthy_cost_post072019.json','w'))

In [None]:
monthly_dosage_BL = json.load(open('../mappings/pre_post_monthy_presc_pre072019.json','r'))
monthly_dosage_COVID = json.load(open('../mappings/pre_post_monthy_presc_post072019.json','r'))

In [None]:
monthly_dosage_COVID['202001'].keys()

In [None]:
BL_months = [
             #['201101', '201102', '201103', '201104', '201105', '201106', '201107', '201108', '201109', '201110', '201111', '201112'],
             #['201201', '201202', '201203', '201204', '201205', '201206', '201207', '201208', '201209', '201210', '201211', '201212'],
             #['201301', '201302', '201303', '201304', '201305', '201306', '201307', '201308', '201309', '201310', '201311', '201312'],
             #['201401', '201402', '201403', '201404', '201405', '201406', '201407', '201408', '201409', '201410', '201411', '201412'],
             ['201501', '201502', '201503', '201504', '201505', '201506', '201507', '201508', '201509', '201510', '201511', '201512'],
             ['201601', '201602', '201603', '201604', '201605', '201606', '201607', '201608', '201609', '201610', '201611', '201612'],
             ['201701', '201702', '201703', '201704', '201705', '201706', '201707', '201708', '201709', '201710', '201711', '201712'],
             ['201801', '201802', '201803', '201804', '201805', '201806', '201807', '201808', '201809', '201810', '201811', '201812']]
#             

COVID_months = ['202001', '202002', '202003', '202004', '202005', '202006', '202007', '202008', '202009', '202010', '202011', '202012']

In [None]:
print sorted(monthly_dosage_COVID['202007']['diabetes'].keys())

In [None]:
from collections import OrderedDict

d_l = ['anxiety',
 'heart failure',
 'rheumatoid',
 'epilepsy',
 'dementia',
 'stroke',
 'hypertension',
 'diabetes',
 'chronic obstructive pulmonary disease',
#  'obesity',
 'coronary artery disease',
#  'kidney disease',
 'depression',
 'osteoporosis']

# d_l = ['anxiety',
#  'depression']
month_names = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
# selected_disease = 'stroke'
city = 'London'

diseaseStats = {}
for d in d_l:
    selected_disease = d
    monthly_BL_numbers = OrderedDict()
    for month in range(len(BL_months[0])):
        monthly_BL_numbers[month_names[month]] = []
        for year in range(len(BL_months)):
            if city in monthly_dosage_BL[BL_months[year][month]][selected_disease]:
                monthly_BL_numbers[month_names[month]].append(monthly_dosage_BL[BL_months[year][month]][selected_disease][city])
            else:
                print("no prescriptions for disease " + d)
                monthly_BL_numbers[month_names[month]].append(0.0)
    
    diseaseStats[d] = monthly_BL_numbers  
        
    


In [None]:
# monthly_dosage_COVID[COVID_months[0]]['depression'][city]

In [None]:
diseaseStats.keys()

In [None]:
# diseaseStats['depression']

In [None]:
diseaseZ_scores = {}
for d in d_l:
    Z_scores = []
    for i in range(len(COVID_months)):
        mean = np.mean(diseaseStats[d][month_names[i]])
        sigma = np.std(diseaseStats[d][month_names[i]])
        if sigma > 0:
            if city in monthly_dosage_COVID[COVID_months[i]][d]:
                Z_scores.append(10 + (monthly_dosage_COVID[COVID_months[i]][d][city]-mean)/sigma)
            else:
                print("no prescriptions for disease " + d)
                Z_scores.append(5 + (-mean)/sigma)
    diseaseZ_scores[d] = Z_scores

In [None]:
# diseaseZ_scores

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(16, 12)
sns.set_style("white")
legend = []
for d in diseaseZ_scores:
    sns.lineplot(y=diseaseZ_scores[d],x=np.arange(0,len(diseaseZ_scores[d]),1))
    legend.append('Z for ' + d)
ax = plt.gca()

plt.xticks(np.arange(0,12,1),month_names,rotation=90)

plt.legend(legend, fontsize = 10)
plt.xlabel("Months of the year 2020",fontsize=20)
plt.ylabel("Z scores for prescriptions per 1000",fontsize=20)
plt.title("Prescription Z scores for city of " + city, fontsize=20)

In [None]:
monthly_dosage_COVID

In [None]:
city = 'London'

d_l = ['anxiety',
 'heart failure',
 'rheumatoid',
 'epilepsy',
 'dementia',
 'stroke',
 'hypertension',
 'diabetes',
 'chronic obstructive pulmonary disease',
#  'obesity',
 'coronary artery disease',
#  'kidney disease',
 'depression',
 'osteoporosis']

# d_l = ['anxiety' , 'depression']

all_covid_months = ['201907','201908','201909','201910','201911','201912','202001', '202002', '202003', '202004', '202005', 
                    '202006', '202007', '202008', '202009', '202010', '202011', '202012','202101','202102',]

disease_timelines = {}
months = monthly_dosage_COVID.keys
for disease in d_l:
    disease_timelines[disease] = []
    for month in monthly_dosage_COVID:
        if city in monthly_dosage_COVID[month][disease]:
            disease_timelines[disease].append(    monthly_dosage_COVID[month][disease][city] )

In [None]:
# disease_timelines

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(16, 12)
sns.set_style("white")
legend = []
for d in disease_timelines:
    sns.lineplot(y=disease_timelines[d],x=np.arange(0,len(disease_timelines[d]),1))
    legend.append('Prescriptions for ' + d)
ax = plt.gca()

plt.xticks(np.arange(0,20,1),all_covid_months,rotation=90)

plt.legend(legend, fontsize = 10)
plt.xlabel("Months of the year 2020",fontsize=20)
plt.ylabel("Z scores for prescriptions per 1000",fontsize=20)
plt.title("Prescriptions for city of " + city, fontsize=20)

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(16, 12)
sns.set_style("white")
legend = []
for d in disease_timelines:
    if len(disease_timelines[d])> 0:
        mean = (np.mean(disease_timelines[d]))
        sigma = (np.std(disease_timelines[d]))
        Z_scores = [float(k - mean)/sigma for k in disease_timelines[d]]
        sns.lineplot(y=Z_scores,x=np.arange(0,len(disease_timelines[d]),1))
        legend.append('Vanilla Z score for ' + d)
ax = plt.gca()

plt.xticks(np.arange(0,20,1),all_covid_months,rotation=90)

plt.legend(legend, fontsize = 10)
plt.xlabel("Months of the year 2020",fontsize=20)
plt.ylabel("vanilla Z scores for prescriptions per 1000",fontsize=20)
plt.title("Prescription Z scores for city of " + city, fontsize=20)