In [1]:
import pandas as pd
from tqdm.notebook import tqdm_notebook
import gzip 
import json

# Extract Data from UKB flat files
These are the fields needed to define prevalent diabetes (e.g. at recruitment)

In [2]:
fields2extract_Touchscreen = {
    'diabetes_onlyGestational' : 4041,
    'diabetes_AgeDiagnosed' : 2976,
    'medication_forCholesterolBloodPressureDiabetes' : 6177,
    'medication_forCholesterolBloodPressureDiabetesExogenousHormones' : 6153,
    'medication_InsulinWithin1YrDiabetesDiagnosis' : 2986,
    'sr_NoncancerIllness' : 20002,
    'sr_NoncancerIllness_AgeFirstDiagnosed' : 20009,
    'medication_code': 20003 
}
with open('ExtractFields_Touchscreen.txt', 'w') as outf:
    outf.write('\n'.join(map(str, list(set(fields2extract_Touchscreen.values())))))
    
fields2extract_Baseline = {
    'sex' : 31,
    'HbA1c' : 30750,
    'ethnicity' : 21000,
    'AgeAtRecruitment': 21003,
    'Assessment_Date': 53,
    'DOB' : 33,
}
with open('ExtractFields_Baseline.txt', 'w') as outf:
    outf.write('\n'.join(map(str, list(set(fields2extract_Baseline.values())))))

## Find UKB data extract with all the fields we need
Basline demographics & biomarkers (e.g. HbA1c) can be extracted from _20210211_45386 
_20191009_37902 extract contains everthing (previous full extract [20180501] didn't contatin n_20009

In [31]:
summary_field2file = pd.read_csv('/rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/Summary.csv', index_col=0)
summary_field2file.head()

Unnamed: 0_level_0,NAME,DESC,CLASS_UKB,LATESTSOURCE,DROPPED,_20180501_21905,_20190322_27598,_20190328_27940,_20190430_29575,_20190618_32806,_20191009_37902,_20191105_38375,_20210211_45386
FIELD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
n_100010,n_100010_0_0,Portion size,\ Additional exposure data \ Diet by 24-hour r...,20180501_21905,,1.0,,,,,,,
n_100010,n_100010_1_0,Portion size,\ Additional exposure data \ Diet by 24-hour r...,20180501_21905,,1.0,,,,,,,
n_100010,n_100010_2_0,Portion size,\ Additional exposure data \ Diet by 24-hour r...,20180501_21905,,1.0,,,,,,,
n_100010,n_100010_3_0,Portion size,\ Additional exposure data \ Diet by 24-hour r...,20180501_21905,,1.0,,,,,,,
n_100010,n_100010_4_0,Portion size,\ Additional exposure data \ Diet by 24-hour r...,20180501_21905,,1.0,,,,,,,


In [37]:
summary_field2file.loc[['n_31','n_21000', 'n_30750', 'n_21003']]

Unnamed: 0_level_0,NAME,DESC,CLASS_UKB,LATESTSOURCE,DROPPED,_20180501_21905,_20190322_27598,_20190328_27940,_20190430_29575,_20190618_32806,_20191009_37902,_20191105_38375,_20210211_45386
FIELD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
n_31,n_31_0_0,Sex,\ Population characteristics \ Baseline charac...,20210211_45386,,1.0,,,,,,,1.0
n_21000,n_21000_0_0,Ethnic background,\ UK Biobank Assessment Centre \ Touchscreen \...,20210211_45386,,1.0,,,,,1.0,,1.0
n_21000,n_21000_1_0,Ethnic background,\ UK Biobank Assessment Centre \ Touchscreen \...,20210211_45386,,1.0,,,,,1.0,,1.0
n_21000,n_21000_2_0,Ethnic background,\ UK Biobank Assessment Centre \ Touchscreen \...,20210211_45386,,1.0,,,,,1.0,,1.0
n_30750,n_30750_0_0,Glycated haemoglobin (HbA1c),\ Biological samples \ Assay results \ Blood a...,20210211_45386,,,1.0,,,,,1.0,1.0
n_30750,n_30750_1_0,Glycated haemoglobin (HbA1c),\ Biological samples \ Assay results \ Blood a...,20210211_45386,,,1.0,,,,,1.0,1.0
n_21003,n_21003_0_0,Age when attended assessment centre,\ UK Biobank Assessment Centre \ Recruitment \...,20210211_45386,,1.0,,,,,,1.0,1.0
n_21003,n_21003_1_0,Age when attended assessment centre,\ UK Biobank Assessment Centre \ Recruitment \...,20210211_45386,,1.0,,,,,,1.0,1.0
n_21003,n_21003_2_0,Age when attended assessment centre,\ UK Biobank Assessment Centre \ Recruitment \...,20210211_45386,,1.0,,,,,,1.0,1.0
n_21003,n_21003_3_0,Age when attended assessment centre,\ UK Biobank Assessment Centre \ Recruitment \...,20210211_45386,,,,,,,,1.0,1.0


In [57]:
for field in fields2extract.values():
    print(summary_field2file.loc['n_{}'.format(field), ['NAME','_20191009_37902']])

NAME               n_31_0_0
_20191009_37902         NaN
Name: n_31, dtype: object
              NAME  _20191009_37902
FIELD                              
n_4041  n_4041_0_0              1.0
n_4041  n_4041_1_0              1.0
n_4041  n_4041_2_0              1.0
n_4041  n_4041_3_0              1.0
              NAME  _20191009_37902
FIELD                              
n_2976  n_2976_0_0              1.0
n_2976  n_2976_1_0              1.0
n_2976  n_2976_2_0              1.0
n_2976  n_2976_3_0              1.0
              NAME  _20191009_37902
FIELD                              
n_6177  n_6177_0_0              1.0
n_6177  n_6177_0_1              1.0
n_6177  n_6177_0_2              1.0
n_6177  n_6177_1_0              1.0
n_6177  n_6177_1_1              1.0
n_6177  n_6177_1_2              1.0
n_6177  n_6177_2_0              1.0
n_6177  n_6177_2_1              1.0
n_6177  n_6177_2_2              1.0
n_6177  n_6177_3_0              1.0
n_6177  n_6177_3_1              1.0
n_6177  n_6177_3_2

## (TO SLOW) EXTRACT phenotypes needed to decide prevalent diabetes status from UKB files
```python
from sas7bdat import SAS7BDAT #easier than custom extracts
diabetes_variables = {}
maxlines = 20000 # used for testing 
locs_sas7bdat = [('Touchscreen', '/rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/data_20191009_37902/p7439_20191009_37902.sas7bdat'), 
                 ('Demographics+Biomarkers', '/rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/data_20210211_45386/p7439_20210211_45386.sas7bdat')]
for name_extract, loc_extract in locs_sas7bdat:
    print('Extracting:', name_extract, loc_extract)
    with SAS7BDAT(loc_extract, skip_header=True) as reader:
        count = 0
        # Find columns with relevant info
        columns = {}
        column_names = [x.decode("utf-8") for x in reader.column_names]
        for name_field, field in fields2extract.items():
            columns[name_field] = [x for x in column_names if (x.startswith('n_{}_'.format(field)) or x == 'n_{}'.format(field))]
        #print(columns)
        
        # Loop through rows to extract columns
        for row in tqdm_notebook(reader, total=500000):
            eid = int(row[column_names.index('n_eid')])
            eid_dict = {}
            for name_field, clist in columns.items():
                for cname in clist:
                    eid_dict[cname] = row[column_names.index(cname)]
            if eid in diabetes_variables:
                diabetes_variables[eid].update(eid_dict)
            else:
                diabetes_variables[eid] = eid_dict
            count += 1
            if (maxlines is not None) and (count > maxlines):
                break
```

In [53]:
with gzip.open('ExtractedFields.json.gz', 'w') as outfile:
    json.dump(diabetes_variables, outfile)

TypeError: memoryview: a bytes-like object is required, not 'str'

## Extract from .ukb_enc files

In [3]:
# Extract some fields from UKB (faster than below)
!cp /rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/data_20191009_37902/additional/ukb37902.enc_ukb Touchscreen.enc_ukb
!cp /rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/data_20191009_37902/additional/encoding.ukb encoding_Touchscreen.ukb
!../ukbconv Touchscreen.enc_ukb csv -eencoding_Touchscreen.ukb -iExtractFields_Touchscreen.txt -oTouchscreen
!rm Touchscreen.enc_ukb
!rm encoding_Touchscreen.ukb

!cp /rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/data_20190322_27598/additional/ukb27598.enc_ukb HbA1c.enc_ukb
!cp /rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/data_20190322_27598/additional/encoding.ukb HbA1c.ukb
!../ukbconv HbA1c.enc_ukb csv -eHbA1c.ukb -s30750 -oHbA1c
!rm HbA1c.enc_ukb
!rm HbA1c.ukb

!cp /rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/data_20180501_21905/additional/ukb21905.enc_ukb Demos.enc_ukb
!cp /rds/project/asb38/rds-asb38-ceu-ukbiobank/phenotype/P7439/pre_qc_data/Data/data_20180501_21905/additional/encoding.ukb Demos.ukb
!../ukbconv Demos.enc_ukb csv -eDemos.ukb -iExtractFields_Baseline.txt -oDemos
!rm Demos.enc_ukb
!rm Demos.ukb


UKBiobank ukbconv_lx (c) CTSU. Compiled Mar 14 2018 14:21:31.

Loading encodings "encoding_Touchscreen.ukb" [timestamp:2019-09-08T15:49:06], 563 processed
Flagged 8 fields for inclusion
Opened logfile "Touchscreen.log"
5309 distinct data fields (505 columns) present in dataset
Field list output to "fields.ukb"
Headers loaded for 505 columns
Output data file: "Touchscreen.csv"
Starting data processing......
Done  10,000......
Done  20,000......
Done  30,000......
Done  40,000......
Done  50,000......
Done  60,000......
Done  70,000......
Done  80,000......
Done  90,000......
Done 100,000......
Done 110,000......
Done 120,000......
Done 130,000......
Done 140,000......
Done 150,000......
Done 160,000......
Done 170,000......
Done 180,000......
Done 190,000......
Done 200,000......
Done 210,000......
Done 220,000......
Done 230,000......
Done 240,000......
Done 250,000......
Done 260,000......
Done 270,000......
Done 280,000......
Done 290,000......
Done 300,000......
Done 310,000......


In [39]:
# Read UKB extracts into dictionarys
import csv, json

fields_floats = [30750, 2976, 20009, 21003]
fields_intcodes = [31, 4041, 6177, 6153, 2986, 20002, 20003, 21000]
fields_str = [53]

touchscreen_variables = {}
with open('Touchscreen.csv', 'r') as csvfile:
    ukbfile = csv.reader(csvfile)
    count = 0
    for line in tqdm_notebook(ukbfile, total=502000, desc='Touchscreen.csv'):
        if count == 0:
            h = line
            columns = line[1:]
        else:
            eid = line[0]
            eid_data = {}
            for f, v in zip(columns, line[1:]):
                if v == '':
                    eid_data[f] = None
                elif int(f.split('-')[0]) in fields_floats:
                    eid_data[f] = float(v)
                elif int(f.split('-')[0]) in fields_intcodes:
                    eid_data[f] = int(v)
                elif int(f.split('-')[0]) in fields_str:
                    eid_data[f] = v
                else:
                    print(f, v)
            touchscreen_variables[eid] = eid_data
        count += 1

demographic_variables = {}
with open('Demos.csv', 'r') as csvfile:
    ukbfile = csv.reader(csvfile)
    count = 0
    for line in tqdm_notebook(ukbfile, total=502000, desc='Demos.csv'):
        if count == 0:
            h = line
            columns = line[1:]
        else:
            eid = line[0]
            eid_data = {}
            for f, v in zip(columns, line[1:]):
                if v == '':
                    eid_data[f] = None
                elif int(f.split('-')[0]) in fields_floats:
                    eid_data[f] = float(v)
                elif int(f.split('-')[0]) in fields_intcodes:
                    eid_data[f] = int(v)
                elif int(f.split('-')[0]) in fields_str:
                    eid_data[f] = v
                else:
                    print(f, v)
            demographic_variables[eid] = eid_data
        count += 1
        
HbA1c_variables = {}     
with open('HbA1c.csv', 'r') as csvfile:
    ukbfile = csv.reader(csvfile)
    count = 0
    for line in tqdm_notebook(ukbfile, total=502000, desc='HbA1c.csv'):
        if count == 0:
            h = line
            columns = line[1:]
        else:
            eid = line[0]
            eid_data = {}
            for f, v in zip(columns, line[1:]):
                if v == '':
                    eid_data[f] = None
                elif int(f.split('-')[0]) in fields_floats:
                    eid_data[f] = float(v)
                elif int(f.split('-')[0]) in fields_intcodes:
                    eid_data[f] = int(v)
                elif int(f.split('-')[0]) in fields_str:
                    eid_data[f] = v
                else:
                    print(f, v)
            HbA1c_variables[eid] = eid_data
        count += 1

Touchscreen.csv:   0%|          | 0/502000 [00:00<?, ?it/s]

Demos.csv:   0%|          | 0/502000 [00:00<?, ?it/s]

HbA1c.csv:   0%|          | 0/502000 [00:00<?, ?it/s]

In [40]:
diabetes_variables = {}
for eid in set(touchscreen_variables.keys()).intersection(demographic_variables.keys()):
    d_touchscreen = touchscreen_variables[eid]
    d_touchscreen.update(demographic_variables[eid])
    diabetes_variables[eid] = d_touchscreen
print('Merged Dataset (Touchscreen + Demographic):', len(diabetes_variables))
print('Intersection with HbA1c = ', len(set(diabetes_variables.keys()).intersection(HbA1c_variables.keys())))

for eid in set(diabetes_variables.keys()).intersection(HbA1c_variables.keys()):
    diabetes_variables[eid].update(HbA1c_variables[eid])
print('Merged Dataset (Touchscreen + Demographic + HbA1c):', len(diabetes_variables))

# Possible to output, but slow (and lots of repeated strings/field-names)
# with open('MergedDataset_DiabetesVariables.json', 'w') as outf:
#     json.dump(diabetes_variables, outf)

Merged Dataset (Touchscreen + Demographic): 502520
Intersection with HbA1c =  502520
Merged Dataset (Touchscreen + Demographic + HbA1c): 502520


## DEFINE Variables (copying notation from Gad's script)

In [42]:
diabetes_defined = {}
for eid, extract in tqdm_notebook(diabetes_variables.items()):
    defined = {}
    # Sex
    sex_lookupfield = '31-0.0'
    sex_coding = {1 : 'Male', 0: 'Female'}
    defined['Sex'] = sex_coding[extract[sex_lookupfield]]
    #print(eid, extract['n_31_0_0'], defined['Sex'])
    
    # AgeAtAssessment/Date
    AgeAtAssessment_lookupfield = '21003-0.0'
    defined['AgeAtAssessment'] = extract[AgeAtAssessment_lookupfield]
    DateOfAssessment_lookupfield = '53-0.0'
    defined['DateOfAssessment'] = extract[DateOfAssessment_lookupfield]
    
    # Ethnicity (needed for 'age at diagnosis stratification')
    SRethnicity_lookupfield = '21000-0.0'
    SRethnicity_coding = {
        1 : 1, # White European (White)
        1001 : 1, # White European (British)
        1002 : 1, # White European (Irish)
        1003 : 1, # White European (Other white background)
        3 : 2, # South Asian (Asian/Asian-British)
        3001 : 2, # South Asian (Indian)
        3002 : 2, # South Asian (Pakistani)
        3003 : 2, # South Asian (Bangladeshi)
        4 : 3, # African/Caribbean (Black/Black-British)
        4001 : 3, # African Caribbean (Caribbean)
        4002 : 3, # African Caribbean (African)
        4003 : 3, # African Caribbean (Any other Black background)
        2 : 4, # Mixed/other 
        2001 : 4, # Mixed/other (White and Black Caribbean)
        2002 : 4, # Mixed/other (White and Black African)
        2003 : 4, # Mixed/other (White and Asian)
        2004 : 4, # Mixed/other (Any other mixed background)
        3004 : 4, # Mixed/other (Any other Asian background)
        5 : 4, # Mixed/other (Chinese)
        6 : 4, # Mixed/other (Other ethnic group)
        -1 : None, 
        -3 : None,
        None : None
    }
    defined['ethnicity_mapped'] = SRethnicity_coding[extract[SRethnicity_lookupfield]]
    #Sub-define is South Asian or African Ancestry
    if defined['ethnicity_mapped'] is None:
        defined['ethnicity_isSAorAfricanA'] = None
    elif defined['ethnicity_mapped'] in [2, 3]:
        defined['ethnicity_isSAorAfricanA'] = 1
    else:
        defined['ethnicity_isSAorAfricanA'] = 0
    
    # Gestational diabetes #
    gdm_lookupfield = '4041-0.0'
    gdm_coding = {
        1 : 1, #Yes
        0 : 0, #No
        -2 : 0, #Not applicable
        -1 : None, #Do not know
        -3 : None #Prefer not to answer
    }
    if extract[gdm_lookupfield] is None:
        defined['diabetes_TS_onlyGestational'] = None
    else:
        defined['diabetes_TS_onlyGestational'] = gdm_coding[extract[gdm_lookupfield]]

    # Age @ Diabetes Diagnosis (touchscreen)#
    age_lookup = '2976-0.0'
    age_extract = extract[age_lookup]
    defined['diabetes_TS_AgeDiagnosed'] = None
    if age_extract is not None:
        if age_extract < 0:
            defined['diabetes_TS_AgeDiagnosed'] = None
        else:
            defined['diabetes_TS_AgeDiagnosed'] = age_extract        
        
    # MEDICATIONS (touchscreen) - INSULIN #
    ## Current insulin
    ### Male field:6177
    ins_lookup_male = '6177-0'
    ins_value_male = 0 # [0, 1, None]
    ins_extract_male = [extract[x] for x in extract.keys() if x.startswith(ins_lookup_male)]
    if any([x in ins_extract_male for x in [-7, -1, -3]]):
        ins_value_male = None
    elif 3 in ins_extract_male:
        ins_value_male = 1
    ### Female field:6153
    ins_lookup_female = '6153-0'
    ins_value_female = 0 # [0, 1, None]
    ins_extract_female = [extract[x] for x in extract.keys() if x.startswith(ins_lookup_female)]
    if any([x in ins_extract_female for x in [-7, -1, -3]]):
        ins_value_female = None
    elif 3 in ins_extract_female:
        ins_value_female = 1
    ### Merged
    defined['medication_TS_OnInsulin'] = None
    if (ins_value_female == 0) or (ins_value_male == 0):
        defined['medication_TS_OnInsulin'] = 0
    if (ins_value_female == 1) or (ins_value_male == 1):
        defined['medication_TS_OnInsulin'] = 1
    #print(ins_value_male, ins_value_female, defined['medication_OnInsulin'])
    
    ## Insulin within 12 months of diagnosis
    insat1yr_lookup = '2986-0.0'
    insat1yr_extract = extract[insat1yr_lookup]
    insat1yr_coding = {
        1 : 1, #Yes
        0 : 0, #No
        -1 : None, #Do not know
        -3 : None #Prefer not to answer
    }
    if insat1yr_extract is not None:
        defined['medication_TS_InsulinWithin1YrDiabetesDiagnosis'] = insat1yr_coding[insat1yr_extract]
    else:
        defined['medication_TS_InsulinWithin1YrDiabetesDiagnosis'] = None
    
    # NURSES INTERVIEW
    ## Self-report non-cancer illness from interview
    sr_coding = {
        1220 : 'diabetes_NI_NonSpecific',
        1221 : 'diabetes_NI_Gestational',
        1222 : 'diabetes_NI_T1D',
        1223 : 'diabetes_NI_T2D'
    }
    # Set default to be non-diagnosed 
    for diabetes_phenotype in sr_coding.values():
        defined[diabetes_phenotype + '_01'] = 0
    ReportedDiabetes = []
    for x in range(0,29):
        phenotype_lookup = '20002-0.{}'.format(x)
        phenotype_code = extract[phenotype_lookup]
        if (phenotype_code is not None) and (phenotype_code in sr_coding):
            diabetes_phenotype = sr_coding[phenotype_code]
            defined[diabetes_phenotype + '_01'] = 1 # Set to case defined from nurses interview
            diabetes_phenotype_ageatdiagnosis = extract['20009-0.{}'.format(x)]
            #print(phenotype_extract, sr_coding[phenotype_extract], phenotype_extract_ageatdiagnosis)
            defined[diabetes_phenotype + '_age'] = diabetes_phenotype_ageatdiagnosis # Define age at diagnosis from related field
            ReportedDiabetes.append(diabetes_phenotype)
    
    # Find age of diabetes diagnosis (Nurses Interview self-report is preferred over touchsreen)
    if (len(ReportedDiabetes) > 0) or (defined['diabetes_TS_AgeDiagnosed'] != None) or (defined['diabetes_TS_onlyGestational'] == 1):
        defined['diabetes_NI_Any'] = 1
        
        # Gad: gen dm_agedm_ts_or_ni= dm_agediag_alldm_ni_bl
        SRDiabetes_Any_Age = defined.get('diabetes_NI_NonSpecific_age') # Set default from NI
        
        # Gad: replace dm_agedm_ts_or_ni=dm_agediag_sr_bl if dm_agediag_alldm_ni_bl==. & dm_agediag_t1dm_ni_bl==. & dm_agediag_t2dm_ni_bl==. & dm_gdmonly_sr_bl!=1
        if (SRDiabetes_Any_Age is None) and ('diabetes_NI_T1D' not in ReportedDiabetes) and ('diabetes_NI_T2D' not in ReportedDiabetes) and (defined['diabetes_TS_onlyGestational'] != 1):
            SRDiabetes_Any_Age = defined['diabetes_TS_AgeDiagnosed']
        
        # Gad: replace dm_agedm_ts_or_ni=dm_agediag_t1dm_ni_bl if dm_t1dm_ni_bl==1
        if 'diabetes_NI_T1D' in ReportedDiabetes:
            SRDiabetes_Any_Age = defined['diabetes_NI_T1D_age']
        
        # Gad: replace dm_agedm_ts_or_ni=dm_agediag_t2dm_ni_bl if dm_t2dm_ni_bl==1
        if 'diabetes_NI_T2D' in ReportedDiabetes:
            SRDiabetes_Any_Age = defined['diabetes_NI_T2D_age']
            
        # Gad : replace dm_agedm_ts_or_ni=. if dm_agediag_sr_bl==. &  dm_agediag_alldm_ni_bl==. & dm_agediag_t1dm_ni_bl==. & dm_agediag_t2dm_ni_bl==. 
        if (defined['diabetes_TS_AgeDiagnosed'] is None) and (defined.get('diabetes_NI_NonSpecific_01') == 0) and (defined.get('diabetes_NI_T1D_01') == 0) and (defined.get('diabetes_NI_T2D_01') == 0):
            SRDiabetes_Any_Age = None
        
        # Gad: replace dm_agedm_ts_or_ni=. if dm_agedm_ts_or_ni<0
        if (SRDiabetes_Any_Age is not None) and (SRDiabetes_Any_Age < 0):
            SRDiabetes_Any_Age = None
        
        defined['diabetes_TSNI_Any_AgeAtDiagnosis'] = SRDiabetes_Any_Age
        
        #if SRDiabetes_Any_Age is None:
            #print(SRDiabetes_Any_Age, ReportedDiabetes, defined)
    else:
        defined['diabetes_NI_Any'] = 0
        defined['diabetes_TSNI_Any_AgeAtDiagnosis'] = None
        
    # MEDICATIONS (Nurses Interview) #
    medication_coding = {
        'Insulin' : [1140883066], # Taking insulin, baseline nurse interview
        'Metformin' : [1140884600, 1140874686, 1141189090], # Taking metformin, baseline nurse interview
        'OAD_Sulfonylureas' : [1140874718, 1140874744, 1140874746, 1141152590 , 
                               1141156984, 1140874646, 1141157284 , 1140874652, 
                               1140874674, 1140874728 ], # Current non-metformin oral anti-diabetic receipt (Sulfonylureas)
        'OAD_Other' : [1140868902, 1140868908, 1140857508], # Taking other oral anti-diabetic (acarbose, guar gum), baseline nurse interview
        'Meglitinides' : [1141173882, 1141173786, 1141168660], # Taking meglitinide, baseline nurse interview
        'Glitazones' : [1141171646, 1141171652, 1141153254, 1141177600, 1141177606]
    }
    RecievedMedications = []
    for medicine, current_codes in medication_coding.items():
        recievedThisMedicine = 0
        for x in range(0,48):
            medicine_extract = extract['20003-0.{}'.format(x)]
            if (medicine_extract is not None) and (medicine_extract in current_codes):
                recievedThisMedicine = 1
        defined['medication_NI_{}'.format(medicine)] = recievedThisMedicine
        if recievedThisMedicine is 1:
            RecievedMedications.append(medicine)
    
    nonMetforminOADs = ['OAD_Sulfonylureas', 'OAD_Other', 'Meglitinides', 'Glitazones']
    if any([x in RecievedMedications for x in nonMetforminOADs]):
        defined['medication_NI_OnNonmetforminOADs'] = 1
    else:
        defined['medication_NI_OnNonmetforminOADs'] = 0
        
    if (len(RecievedMedications) > 0) or (defined['medication_TS_OnInsulin'] == 1):
        defined['medication_TSNI_AnyDiabetesMeds'] = 1
    else:
        defined['medication_TSNI_AnyDiabetesMeds'] = 0
                
    # HbA1C @ Baseline #
    HbA1C_lookupfield = '30750-0.0'
    if extract[HbA1C_lookupfield] is None:
        defined['baseline_HbA1C'] = None
    else:
        defined['baseline_HbA1C'] = extract[HbA1C_lookupfield]
        
    # Return Dictionary #
    diabetes_defined[eid] = defined

# Change to DF
diabetes_defined = pd.DataFrame.from_dict(diabetes_defined, orient='index')

  0%|          | 0/502520 [00:00<?, ?it/s]

In [43]:
diabetes_defined.to_csv('EastwoodDiabetes_DefinedVariables.csv')
#diabetes_defined = pd.read_csv('EastwoodDiabetes_DefinedVariables.csv', index_col=0)
diabetes_defined.head()

Unnamed: 0,Sex,AgeAtAssessment,DateOfAssessment,ethnicity_mapped,ethnicity_isSAorAfricanA,diabetes_TS_onlyGestational,diabetes_TS_AgeDiagnosed,medication_TS_OnInsulin,medication_TS_InsulinWithin1YrDiabetesDiagnosis,diabetes_NI_NonSpecific_01,...,medication_NI_OAD_Other,medication_NI_Meglitinides,medication_NI_Glitazones,medication_NI_OnNonmetforminOADs,medication_TSNI_AnyDiabetesMeds,baseline_HbA1C,diabetes_NI_NonSpecific_age,diabetes_NI_T2D_age,diabetes_NI_T1D_age,diabetes_NI_Gestational_age
2998608,Female,58.0,2010-01-29,1.0,0.0,,,0,,0,...,0,0,0,0,0,,,,,
3528657,Male,49.0,2009-01-05,1.0,0.0,,,0,,0,...,0,0,0,0,0,31.7,,,,
2536121,Female,63.0,2008-10-23,1.0,0.0,,,0,,0,...,0,0,0,0,0,34.8,,,,
2326999,Female,64.0,2010-04-01,1.0,0.0,,,0,,0,...,0,0,0,0,0,34.0,,,,
3566821,Male,62.0,2009-04-24,1.0,0.0,,58.0,0,0.0,1,...,0,0,0,1,1,54.6,58.5,,,


# Flowcharts to define prevalent diabetes
<img src="https://journals.plos.org/plosone/article/figure/image?size=large&id=info:doi/10.1371/journal.pone.0162388.g002"
     alt="Eastwood Figure 2"
     style="float: left; margin-right: 10px;" />

## Flowchart 1 - Possible Diabetes

In [44]:
diabetes_defined['diabetes_EastwoodAdjudicated'] = ''
diabetes_defined['Flowchart_1_Exited'] = ''
print('Total population:', diabetes_defined.shape[0])
# Rule 1.1
#replace rule_1_1_continue=1 if |  | dm_gdm_ni_bl==1| dm_t1dm_ni_bl==1 |dm_t2dm_ni_bl==1 | dm_drug_ins_ni_bl==1 |dm_drug_ins_bl_sr==1 | dm_drug_metf_ni_bl==1|dm_drug_nonmetf_oad_ni_bl==1

continue_1_1 = diabetes_defined[(diabetes_defined['diabetes_TS_onlyGestational'] == 1) |
                                (diabetes_defined['diabetes_NI_Gestational_01'] == 1) |
                                (diabetes_defined['diabetes_NI_NonSpecific_01'] == 1) |
                                (diabetes_defined['diabetes_NI_T1D_01'] == 1) |
                                (diabetes_defined['diabetes_NI_T2D_01'] == 1) |
                                (diabetes_defined['medication_TSNI_AnyDiabetesMeds'] == 1)]
diabetes_defined.loc[~diabetes_defined.index.isin(continue_1_1.index),['diabetes_EastwoodAdjudicated', 'Flowchart_1_Exited']] = ['Diabetes unlikely', '1.1']
print('Passed Rule 1.1 :', continue_1_1.shape[0], '({} remaining)'.format(diabetes_defined.shape[0]-continue_1_1.shape[0]))

Total population: 502520
Passed Rule 1.1 : 26689 (475831 remaining)


In [45]:
# Rule 1.2 
remove_1_2 = continue_1_1[(continue_1_1['Sex'] == 'Male') &
                          (continue_1_1['diabetes_NI_Gestational_01'] == 1) &
                          (continue_1_1['medication_TSNI_AnyDiabetesMeds'] == 0) &
                          (continue_1_1['diabetes_NI_T1D_01'] == 0) &
                          (continue_1_1['diabetes_NI_T2D_01'] == 0)]
diabetes_defined.loc[remove_1_2.index,['diabetes_EastwoodAdjudicated', 'Flowchart_1_Exited']] = ['Uncertain diabetes status', '1.2']
print('Removed by Rule 1.2 :', remove_1_2.shape[0], '({} remaining)'.format(sum(diabetes_defined['Flowchart_1_Exited'] == '')))

Removed by Rule 1.2 : 51 (26638 remaining)


In [46]:
# Rule 1.3
remove_1_3_a = (diabetes_defined['Flowchart_1_Exited'] == '') & \
               (diabetes_defined['diabetes_TS_onlyGestational'] == 1) & \
               (diabetes_defined['medication_TSNI_AnyDiabetesMeds'] == 0) & \
               (diabetes_defined['diabetes_NI_T1D_01'] == 0) & \
               (diabetes_defined['diabetes_NI_T2D_01'] == 0)

remove_1_3_b = (diabetes_defined['Flowchart_1_Exited'] == '') & \
               (diabetes_defined['diabetes_NI_Gestational_01'] == 1) & \
               (diabetes_defined['diabetes_NI_Gestational_age'] < 50) & \
               (diabetes_defined['medication_TSNI_AnyDiabetesMeds'] == 0) & \
               (diabetes_defined['diabetes_NI_T1D_01'] == 0) & \
               (diabetes_defined['diabetes_NI_T2D_01'] == 0)
remove_1_3 = diabetes_defined[remove_1_3_a | remove_1_3_b]
diabetes_defined.loc[remove_1_3.index,['diabetes_EastwoodAdjudicated', 'Flowchart_1_Exited']] = ['Possible gestational diabetes', '1.3']
print('Removed by Rule 1.3 :', remove_1_3.shape[0], '({} remaining)'.format(sum(diabetes_defined['Flowchart_1_Exited'] == '')))

Removed by Rule 1.3 : 793 (25845 remaining)


In [47]:
# Rule 1.4
remove_1_4 = diabetes_defined[(diabetes_defined['Flowchart_1_Exited'] == '') & (diabetes_defined['medication_NI_OnNonmetforminOADs'] == 1)]
diabetes_defined.loc[remove_1_4.index,['diabetes_EastwoodAdjudicated', 'Flowchart_1_Exited']] = ['Possible Type 2 diabetes', '1.4']
print('Removed by Rule 1.4 :', remove_1_4.shape[0], '({} remaining)'.format(sum(diabetes_defined['Flowchart_1_Exited'] == '')))

Removed by Rule 1.4 : 6808 (19037 remaining)


In [48]:
# Rule 1.5
start_1_5 = diabetes_defined[(diabetes_defined['diabetes_EastwoodAdjudicated'] == '')]
#replace rule_1_5_continue=1 if dm_agedm_ts_or_ni!=. & dm_agedm_ts_or_ni>0 & dm_agedm_ts_or_ni<31 & ethnic_sa_afc==1
continue_1_5_SAorAA = (start_1_5['ethnicity_isSAorAfricanA'] == 1) & \
                      (start_1_5['diabetes_TSNI_Any_AgeAtDiagnosis'].isnull() == False) & \
                      (start_1_5['diabetes_TSNI_Any_AgeAtDiagnosis'] > 0) & \
                      (start_1_5['diabetes_TSNI_Any_AgeAtDiagnosis'] < 31)
#replace rule_1_5_continue=1 if dm_agedm_ts_or_ni!=. & dm_agedm_ts_or_ni>0 & dm_agedm_ts_or_ni<37 & ethnic_sa_afc==0
continue_1_5_EurAndOthers = (start_1_5['ethnicity_isSAorAfricanA'] == 0) & \
                            (start_1_5['diabetes_TSNI_Any_AgeAtDiagnosis'].isnull() == False) & \
                            (start_1_5['diabetes_TSNI_Any_AgeAtDiagnosis'] > 0) & \
                            (start_1_5['diabetes_TSNI_Any_AgeAtDiagnosis'] < 37)

start_1_6 = start_1_5.loc[continue_1_5_SAorAA | continue_1_5_EurAndOthers,]
diabetes_defined.loc[(~diabetes_defined.index.isin(start_1_6.index)) & (diabetes_defined['Flowchart_1_Exited'] == ''), ['diabetes_EastwoodAdjudicated', 'Flowchart_1_Exited']] = ['Possible Type 2 diabetes', '1.5']
print('Removed by Rule 1.5 :', sum(diabetes_defined['Flowchart_1_Exited'] == '1.5'), '({} remaining)'.format(sum(diabetes_defined['Flowchart_1_Exited'] == '')))

Removed by Rule 1.5 : 17020 (2017 remaining)


In [49]:
# Rule 1.6 
continue_1_6 = (start_1_6['medication_TS_OnInsulin'] == 1) | \
               (start_1_6['medication_NI_Insulin'] == 1) | \
               (start_1_6['medication_TS_InsulinWithin1YrDiabetesDiagnosis'] == 1) | \
               (start_1_6['diabetes_NI_T1D_01'] == 1)

diabetes_defined.loc[continue_1_6[continue_1_6 == True].index, ['diabetes_EastwoodAdjudicated', 'Flowchart_1_Exited']] = ['Possible Type 1 diabetes', '1.6']
diabetes_defined.loc[continue_1_6[continue_1_6 == False].index, ['diabetes_EastwoodAdjudicated', 'Flowchart_1_Exited']] = ['Possible Type 2 diabetes', '1.6']

In [50]:
print('Results of Flowchart 1')
diabetes_defined['diabetes_EastwoodAdjudicated_F1'] = diabetes_defined['diabetes_EastwoodAdjudicated'] 
diabetes_defined['diabetes_EastwoodAdjudicated'].value_counts()

Results of Flowchart 1


Diabetes unlikely                475831
Possible Type 2 diabetes          24136
Possible Type 1 diabetes           1709
Possible gestational diabetes       793
Uncertain diabetes status            51
Name: diabetes_EastwoodAdjudicated, dtype: int64

# Flowchart 2 - T1D Possible vs. Probable

In [51]:
start_F2 = diabetes_defined.loc[diabetes_defined['diabetes_EastwoodAdjudicated_F1'] == 'Possible Type 1 diabetes'].copy()
start_F2['diabetes_EastwoodAdjudicated_F2'] = ''
start_F2['Flowchart_2_Exited'] = ''
start_F2.head()

Unnamed: 0,Sex,AgeAtAssessment,DateOfAssessment,ethnicity_mapped,ethnicity_isSAorAfricanA,diabetes_TS_onlyGestational,diabetes_TS_AgeDiagnosed,medication_TS_OnInsulin,medication_TS_InsulinWithin1YrDiabetesDiagnosis,diabetes_NI_NonSpecific_01,...,baseline_HbA1C,diabetes_NI_NonSpecific_age,diabetes_NI_T2D_age,diabetes_NI_T1D_age,diabetes_NI_Gestational_age,diabetes_EastwoodAdjudicated,Flowchart_1_Exited,diabetes_EastwoodAdjudicated_F1,diabetes_EastwoodAdjudicated_F2,Flowchart_2_Exited
1548423,Female,61.0,2009-11-20,1.0,0.0,0.0,8.0,1,1.0,1,...,62.0,8.5,,,,Possible Type 1 diabetes,1.6,Possible Type 1 diabetes,,
4099168,Female,49.0,2010-02-06,1.0,0.0,0.0,35.0,1,0.0,1,...,67.4,35.5,,,,Possible Type 1 diabetes,1.6,Possible Type 1 diabetes,,
4728373,Female,40.0,2008-09-05,1.0,0.0,0.0,21.0,1,1.0,1,...,55.4,21.5,,,,Possible Type 1 diabetes,1.6,Possible Type 1 diabetes,,
5494873,Female,45.0,2007-11-08,1.0,0.0,0.0,17.0,1,1.0,1,...,77.0,17.5,,,,Possible Type 1 diabetes,1.6,Possible Type 1 diabetes,,
4125163,Female,62.0,2008-02-07,4.0,0.0,1.0,,1,,0,...,129.3,,,35.5,,Possible Type 1 diabetes,1.6,Possible Type 1 diabetes,,


In [52]:
# Rule 2.1
start_F2.loc[start_F2['diabetes_NI_T1D_01'] == 1, ['diabetes_EastwoodAdjudicated_F2', 'Flowchart_2_Exited']] = ['Probable Type 1 diabetes', '2.1']

In [53]:
# Rule 2.2 - different than Gad's (must have converted to insulin and have at least 1 medicine)
start_F2['Rule2.2_Count'] = 0
start_F2.loc[start_F2['medication_TS_OnInsulin'] == 1, 'Rule2.2_Count'] += 1
start_F2.loc[start_F2['medication_NI_Insulin'] == 1, 'Rule2.2_Count'] += 1

# Gad's Way
#start_F2.loc[start_F2['medication_TS_InsulinWithin1YrDiabetesDiagnosis'] == 1, 'Rule2.2_Count'] += 1
#probable_2_2 = start_F2['Rule2.2_Count'] >= 2

#Eastwood way
probable_2_2 = (start_F2['Rule2.2_Count'] >= 1) & (start_F2['medication_TS_InsulinWithin1YrDiabetesDiagnosis'] == 1)

start_F2.loc[probable_2_2[probable_2_2 == True].index, ['diabetes_EastwoodAdjudicated_F2', 'Flowchart_2_Exited']] = ['Probable Type 1 diabetes', '2.2']
start_F2.loc[probable_2_2[probable_2_2 == False].index, ['diabetes_EastwoodAdjudicated_F2', 'Flowchart_2_Exited']] = ['Possible Type 1 diabetes', '2.2']

print('Results of Flowchart 2 (Only Probably T1D)')
start_F2['diabetes_EastwoodAdjudicated_F2'].value_counts()

Results of Flowchart 2 (Only Probably T1D)


Probable Type 1 diabetes    1276
Possible Type 1 diabetes     433
Name: diabetes_EastwoodAdjudicated_F2, dtype: int64

In [54]:
diabetes_defined.loc[start_F2.index, ['diabetes_EastwoodAdjudicated_F2', 'Flowchart_2_Exited']] = start_F2[['diabetes_EastwoodAdjudicated_F2', 'Flowchart_2_Exited']]
diabetes_defined.loc[start_F2.index, 'diabetes_EastwoodAdjudicated'] = start_F2['diabetes_EastwoodAdjudicated_F2']

print('Results of Flowchart 2 (All UKB)')
diabetes_defined['diabetes_EastwoodAdjudicated'].value_counts()

Results of Flowchart 2 (All UKB)


Diabetes unlikely                475831
Possible Type 2 diabetes          24136
Probable Type 1 diabetes           1276
Possible gestational diabetes       793
Possible Type 1 diabetes            433
Uncertain diabetes status            51
Name: diabetes_EastwoodAdjudicated, dtype: int64

# Flowchart 3 - Refine T2D phenotyping

In [55]:
start_F3 = diabetes_defined.loc[diabetes_defined['diabetes_EastwoodAdjudicated_F1'] == 'Possible Type 2 diabetes'].copy()
start_F3['diabetes_EastwoodAdjudicated_F3'] = 'Possible Type 2 diabetes'
start_F3['Flowchart_3_Exited'] = ''
start_F3

Unnamed: 0,Sex,AgeAtAssessment,DateOfAssessment,ethnicity_mapped,ethnicity_isSAorAfricanA,diabetes_TS_onlyGestational,diabetes_TS_AgeDiagnosed,medication_TS_OnInsulin,medication_TS_InsulinWithin1YrDiabetesDiagnosis,diabetes_NI_NonSpecific_01,...,diabetes_NI_T2D_age,diabetes_NI_T1D_age,diabetes_NI_Gestational_age,diabetes_EastwoodAdjudicated,Flowchart_1_Exited,diabetes_EastwoodAdjudicated_F1,diabetes_EastwoodAdjudicated_F2,Flowchart_2_Exited,diabetes_EastwoodAdjudicated_F3,Flowchart_3_Exited
3566821,Male,62.0,2009-04-24,1.0,0.0,,58.0,0,0.0,1,...,,,,Possible Type 2 diabetes,1.4,Possible Type 2 diabetes,,,Possible Type 2 diabetes,
1515062,Male,65.0,2009-03-26,1.0,0.0,,45.0,1,0.0,1,...,,,,Possible Type 2 diabetes,1.4,Possible Type 2 diabetes,,,Possible Type 2 diabetes,
2616974,Male,55.0,2009-11-19,1.0,0.0,,46.0,0,0.0,1,...,,,,Possible Type 2 diabetes,1.4,Possible Type 2 diabetes,,,Possible Type 2 diabetes,
2609133,Male,65.0,2008-05-01,1.0,0.0,,45.0,1,0.0,1,...,,,,Possible Type 2 diabetes,1.5,Possible Type 2 diabetes,,,Possible Type 2 diabetes,
2783362,Male,44.0,2008-04-22,1.0,0.0,,38.0,0,0.0,1,...,,,,Possible Type 2 diabetes,1.4,Possible Type 2 diabetes,,,Possible Type 2 diabetes,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1457025,Female,52.0,2008-03-15,1.0,0.0,0.0,47.0,0,0.0,1,...,,,,Possible Type 2 diabetes,1.5,Possible Type 2 diabetes,,,Possible Type 2 diabetes,
3907220,Female,65.0,2007-10-29,1.0,0.0,0.0,,0,0.0,1,...,,,,Possible Type 2 diabetes,1.5,Possible Type 2 diabetes,,,Possible Type 2 diabetes,
2403088,Female,67.0,2008-07-23,1.0,0.0,0.0,45.0,1,0.0,1,...,,,,Possible Type 2 diabetes,1.5,Possible Type 2 diabetes,,,Possible Type 2 diabetes,
2516089,Male,53.0,2008-09-23,1.0,0.0,,48.0,0,0.0,1,...,,,,Possible Type 2 diabetes,1.4,Possible Type 2 diabetes,,,Possible Type 2 diabetes,


In [56]:
# Rule 3.1 (only using metformin)
F3_metformin_users = start_F3[start_F3['medication_NI_Metformin'] == 1]
F3_metformin_only = F3_metformin_users[(F3_metformin_users['medication_TS_OnInsulin'] == 0) & \
                                       (F3_metformin_users['medication_NI_Insulin'] == 0) & \
                                       (F3_metformin_users['medication_NI_OnNonmetforminOADs'] == 0)]

# Rule 3.2 (applies to metformin only users)
continue_3_2 = F3_metformin_only[(F3_metformin_only['diabetes_NI_NonSpecific_01'] == 1) | \
                                 (F3_metformin_only['diabetes_NI_T1D_01'] == 1) | \
                                 (F3_metformin_only['diabetes_NI_T2D_01'] == 1) |
                                 (F3_metformin_only['diabetes_NI_Gestational_01'] == 1)]

i_exit_3_2 = F3_metformin_only[~F3_metformin_only.index.isin(continue_3_2.index)].index
start_F3.loc[i_exit_3_2,['diabetes_EastwoodAdjudicated_F3', 'Flowchart_3_Exited']] = ['Diabetes unlikely', '3.2']

In [57]:
# Rule 3.3
remove_3_3 = (start_F3['Flowchart_3_Exited'] == '') & \
             (start_F3['medication_NI_OnNonmetforminOADs'] == 1)
start_F3.loc[remove_3_3[remove_3_3 == True].index, ['diabetes_EastwoodAdjudicated_F3', 'Flowchart_3_Exited']] = ['Probable Type 2 diabetes', '3.3']

In [58]:
# Rule 3.4
remove_3_4 = (start_F3['Flowchart_3_Exited'] == '') & \
             (start_F3['medication_NI_Insulin'] != 1) & \
             (start_F3['medication_TS_OnInsulin'] != 1)
start_F3.loc[remove_3_4[remove_3_4 == True].index, ['diabetes_EastwoodAdjudicated_F3', 'Flowchart_3_Exited']] = ['Probable Type 2 diabetes', '3.4']

In [59]:
# Rule 3.5
t2d_3_5 = (start_F3['Flowchart_3_Exited'] == '') & \
          (start_F3['diabetes_NI_T1D_01'] == 0)

t1d_3_5 = (start_F3['Flowchart_3_Exited'] == '') & \
          (start_F3['diabetes_NI_T1D_01'] == 1) 
start_F3.loc[t2d_3_5[t2d_3_5 == True].index, ['diabetes_EastwoodAdjudicated_F3', 'Flowchart_3_Exited']] = ['Possible Type 2 diabetes', '3.5']
start_F3.loc[t1d_3_5[t1d_3_5 == True].index, ['diabetes_EastwoodAdjudicated_F3', 'Flowchart_3_Exited']] = ['Probable Type 1 diabetes', '3.5']

In [60]:
diabetes_defined.loc[start_F3.index, ['diabetes_EastwoodAdjudicated_F3', 'Flowchart_3_Exited']] = start_F3[['diabetes_EastwoodAdjudicated_F3', 'Flowchart_3_Exited']]
diabetes_defined.loc[start_F3.index, 'diabetes_EastwoodAdjudicated'] = start_F3['diabetes_EastwoodAdjudicated_F3']

print('Results of Flowchart 3 (All UKB)')
print(diabetes_defined['diabetes_EastwoodAdjudicated'].value_counts())
print(diabetes_defined['diabetes_EastwoodAdjudicated'].value_counts().sum())

Results of Flowchart 3 (All UKB)
Diabetes unlikely                476000
Probable Type 2 diabetes          20567
Possible Type 2 diabetes           3278
Probable Type 1 diabetes           1398
Possible gestational diabetes       793
Possible Type 1 diabetes            433
Uncertain diabetes status            51
Name: diabetes_EastwoodAdjudicated, dtype: int64
502520


In [61]:
diabetes_defined.to_csv('EastwoodDiabetes_PrevalentDiabetes.csv')

In [62]:
diabetes_defined

Unnamed: 0,Sex,AgeAtAssessment,DateOfAssessment,ethnicity_mapped,ethnicity_isSAorAfricanA,diabetes_TS_onlyGestational,diabetes_TS_AgeDiagnosed,medication_TS_OnInsulin,medication_TS_InsulinWithin1YrDiabetesDiagnosis,diabetes_NI_NonSpecific_01,...,diabetes_NI_T2D_age,diabetes_NI_T1D_age,diabetes_NI_Gestational_age,diabetes_EastwoodAdjudicated,Flowchart_1_Exited,diabetes_EastwoodAdjudicated_F1,diabetes_EastwoodAdjudicated_F2,Flowchart_2_Exited,diabetes_EastwoodAdjudicated_F3,Flowchart_3_Exited
2998608,Female,58.0,2010-01-29,1.0,0.0,,,0,,0,...,,,,Diabetes unlikely,1.1,Diabetes unlikely,,,,
3528657,Male,49.0,2009-01-05,1.0,0.0,,,0,,0,...,,,,Diabetes unlikely,1.1,Diabetes unlikely,,,,
2536121,Female,63.0,2008-10-23,1.0,0.0,,,0,,0,...,,,,Diabetes unlikely,1.1,Diabetes unlikely,,,,
2326999,Female,64.0,2010-04-01,1.0,0.0,,,0,,0,...,,,,Diabetes unlikely,1.1,Diabetes unlikely,,,,
3566821,Male,62.0,2009-04-24,1.0,0.0,,58.0,0,0.0,1,...,,,,Probable Type 2 diabetes,1.4,Possible Type 2 diabetes,,,Probable Type 2 diabetes,3.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3218054,Male,48.0,2009-05-23,1.0,0.0,,,0,,0,...,,,,Diabetes unlikely,1.1,Diabetes unlikely,,,,
5962373,Female,48.0,2009-10-09,1.0,0.0,,,0,,0,...,,,,Diabetes unlikely,1.1,Diabetes unlikely,,,,
3577173,Female,61.0,2008-01-30,1.0,0.0,,,0,,0,...,,,,Diabetes unlikely,1.1,Diabetes unlikely,,,,
3014568,Male,65.0,2009-06-26,1.0,0.0,,,0,,0,...,,,,Diabetes unlikely,1.1,Diabetes unlikely,,,,
