This Python notebook is meant to take in the entire CNS dataset and export a dataset with meaningful coding for classification and management of missing data such that every individual in the set can be classified on.

In [20]:
%reset -f
%matplotlib inline

import numpy as np
import pandas as pd
import pymc3 as pm
import seaborn as sns
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
dirToInclude=parentdir +'/features/'
sys.path.insert(0,dirToInclude)

import pandas
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 1000)

In [21]:
data_raw = pd.read_csv("../Data/cns_data.csv")

In [22]:
#These are from LS RFClassifier data cleaning
data_df = data_raw.drop(['critage','critfev', 'critcns','critinf','hivtest','incmin','admhr','admmin',\
                         'surnamenam','patinit','patientreg','mothinit','previnc','prevdate','prevident'
                         ],axis=1)
data_df['month'] = pd.DatetimeIndex(data_df['incdate']).month
data_df['day'] = pd.DatetimeIndex(data_df['incdate']).day
data_df['monthday'] = data_df['month']*30+data_df['day']

In [23]:
clin_df = data_df.loc[:,['clinjaund','clinhepato','clinspleno','clinconv','clindehyd',\
                          'clinoedem','clinlymph','clinresp','clinablung','clincyan','clincapref','clincoldext',\
                         'clinearinf','clinanemia','clinorcand','clinhemmor','clinaids',\
                         
                         'abdpain','vom','diar','head','muscle','conv','oth1sp','oth2sp',\
                         'sex','ageyrs','agemth','incdate','glasgtot','blanttot',\
                         'temp','card','resp','sbp','dbp','weight','height','muac',\
                         
                          'mhhosp', 'mhdev', 'mhsickle', 'mhchron','mhother1sp',\
                         'vacbcg','vacmeasles','vachib1','vachib2','vachib3','vacmen','vacpneumo',\
                         
                         'monthday','dimain','dimainsp', 'reason2','reason3','digast','diaids','dilrti', \
                         'disepsis','dimalnut','diother','diothersp','damain','damainsp','daaids','dalrti',\
                         'dasepsis','damalnut','daother','daothersp',\
                         
                         'rdtresult',\
                         
                         'incirrit', 'inchead', 'incphoto',\
                         'incfont', 'incneuro' , 'incseiza', 'incseizh', 'incpurp','incneck',\
                         
                         'bfmed','bfatb','bfvir','bfvir1','bfmal','bfinf','bfinf1','bfpyr','bfpyr1',\
                         
                         'diag']] #This is the final diagnosis - will be the 'label'

Bacterial meningitis elevated in children under 1 and HIV exposed children

The outcomes which I will classify as "other" will include "No lab diagnosis" (90%) and "Cryptococcal Mening", "TB Mening, and "Viral mening" (10%)

Cerebral Malaria includes cerebral and severe-but-not-cerebral

Mening: neck stiffness, bulging fontanel, Kernig or Brudzinski signs, all associated

Best individual marker for mening is leukocyte count, CSF lactate elevated



In [24]:
clin_df['incneck'].unique()

array(['No', 'Yes', 'Not applicable'], dtype=object)

In [25]:
#Mapping values, modified from LS RFClassifier
#Convert the date into a useful value. This just counts the days from the beginning of the dataset 
clin_df['incdate'] = pd.to_datetime(clin_df['incdate'])
clin_df['incyear'] = pd.DatetimeIndex(clin_df['incdate']).year
clin_df['incmonth'] = pd.DatetimeIndex(clin_df['incdate']).month

#Mapping yes/no to 1/0
clin_df['clinjaund'] = clin_df.clinjaund.map({'Yes': 1, 'No': 0})
clin_df['clinhepato'] = clin_df.clinhepato.map({'Yes': 1, 'No': 0})
clin_df['clinspleno'] = clin_df.clinspleno.map({'Yes': 1, 'No': 0})
clin_df['clinconv'] = clin_df.clinconv.map({'Yes': 1, 'No': 0})
clin_df['clindehyd'] = clin_df.clindehyd.map({'Yes': 1, 'No': 0})
clin_df['clinoedem'] = clin_df.clinoedem.map({'Yes': 1, 'No': 0})
clin_df['clinlymph'] = clin_df.clinlymph.map({'Yes': 1, 'No': 0})
clin_df['clinresp'] = clin_df.clinresp.map({'Yes': 1, 'No': 0})
clin_df['clinablung'] = clin_df.clinablung.map({'Yes': 1, 'No': 0})
clin_df['clincyan'] = clin_df.clincyan.map({'Yes': 1, 'No': 0})
clin_df['clincapref'] = clin_df.clincapref.map({'Yes': 1, 'No': 0})
clin_df['clincoldext'] = clin_df.clincoldext.map({'Yes': 1, 'No': 0})
clin_df['clinearinf'] = clin_df.clinearinf.map({'Yes': 1, 'No': 0})
clin_df['clinorcand'] = clin_df.clinorcand.map({'Yes': 1, 'No': 0})
clin_df['clinhemmor'] = clin_df.clinhemmor.map({'Yes': 1, 'No': 0})
clin_df['clinaids'] = clin_df.clinaids.map({'Yes': 1, 'No': 0})
clin_df['vom'] = clin_df.vom.map({'Yes': 1, 'No': 0})
clin_df['diar'] = clin_df.diar.map({'Yes': 1, 'No': 0})
clin_df['conv'] = clin_df.conv.map({'Yes': 1, 'No': 0})
clin_df['ageym'] = clin_df.ageyrs.astype(float)*12.
clin_df.agemth = clin_df.agemth.astype(float)
clin_df['age'] = clin_df.ageym + clin_df.agemth
clin_df['headache'] = clin_df['head'] #Since 'head' is a function
clin_df['digast'] = clin_df.digast.map({'Yes': 1, 'No': 0})
clin_df['diaids'] = clin_df.diaids.map({'Yes': 1, 'No': 0})
clin_df['dilrti'] = clin_df.dilrti.map({'Yes': 1, 'No': 0})
clin_df['disepsis'] = clin_df.disepsis.map({'Yes': 1, 'No': 0})
clin_df['dimalnut'] = clin_df.dimalnut.map({'Yes': 1, 'No': 0})
clin_df['diother'] = clin_df.diother.map({'Yes': 1, 'No': 0})
clin_df['rdtresult'] = clin_df.rdtresult.map({'Positive': 1 ,'Negative': 0, "Not done": np.nan})

#vaccinations
clin_df['vacbcg'] = clin_df.vacbcg.map({'Yes-verbal': 1,'Yes-card': 1 ,'No': 0, "Don't know": np.nan})
clin_df['vacmeasles'] = clin_df.vacmeasles.map({'Yes-verbal': 1,'Yes-card': 1 ,'No': 0, "Don't know": np.nan})
clin_df['vachib1'] = clin_df.vachib1.map({'Yes-verbal': 1,'Yes-card': 1 ,'No': 0, "Don't know": np.nan})
clin_df['vachib2'] = clin_df.vachib2.map({'Yes-verbal': 1,'Yes-card': 1 ,'No': 0, "Don't know": np.nan})
clin_df['vachib3'] = clin_df.vachib3.map({'Yes-verbal': 1,'Yes-card': 1 ,'No': 0, "Don't know": np.nan})
clin_df['vacmen'] = clin_df.vacmen.map({'Yes-verbal': 1,'Yes-card': 1 ,'No': 0, "Don't know": np.nan})
clin_df['vacpneumo'] = clin_df.vacpneumo.map({'Yes-verbal': 1,'Yes-card': 1 ,'No': 0, "Don't know": np.nan})

#Meds received prior to enrollment in study
clin_df['bfmed'] = clin_df.bfmed.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['bfatb'] = clin_df.bfatb.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['bfvir'] = clin_df.bfvir.map({"Don't know":np.nan, 'No':0, 'Yes':1})
clin_df['bfmal'] = clin_df.bfmal.map({"Don't know":np.nan, 'No':0, 'Yes':1})
clin_df['bfinf'] = clin_df.bfinf.map({"Don't know":np.nan, 'No':0, 'Yes':1})
clin_df['bfpyr'] = clin_df.bfpyr.map({"Don't know":np.nan, 'No':0, 'Yes':1})

#Medical history variables
clin_df['mhhosp'] = clin_df.mhhosp.map({'Yes': 1, 'No':0})
clin_df['mhdev'] = clin_df.mhdev.map({'Yes': 1, 'No':0, 3:np.nan})
clin_df['mhsickle'] = clin_df.mhsickle.map({'Yes': 1, 'No':0, 3:np.nan})
clin_df['mhchron'] = clin_df.mhchron.map({'Yes': 1, 'No':0, 3:np.nan})

#some slightly more compliated mappings
clin_df['abdpain'] = clin_df.abdpain.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['headache'] = clin_df.headache.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['muscle'] = clin_df.muscle.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})


#Make a more robust initial diagnosis with the diother variable
clin_df['dimain'] = clin_df.dimain.map({"Cerebral Malaria":0, "Meningoencephalitis":2, "Meningitis":1.,\
                                       "Other":2})
clin_df['dimainsp'] = clin_df.dimainsp.map({np.nan: 0, 'SEVERE MALARIA': 2, 'HEPATIC ENCEPHALOPATHY': 0,
       'BRONCHOPNEUMONIA': 0, 'EPILEPSY': 0, 'SEPTICAEMIA': 0,
       'MALARIA COMPLICATED SEVERE ANAEMIA': 2,
       'SIMPLE MALARIA WITH HYPERPEREXIA': 2, 'ACUTE FLACCID PARALYSIS': 0,
       'SEVERE MALARIA WITH SEVERE ANAEMIA': 2,
       'MALARIA WITH SEVERE ANAEMIA': 2, 'MALARIA WITH MULTIPLE SEIZURES': 2,
       'MALARIA, MULTIPLE CONVULSIONS': 2,
       'MALARIA WITH FABRILE CONVULSIONS': 2,
       'MALARIA WITH FEBRILE CONVULSIONS': 2, ' ': 0,
       'MALARIA WITH CONVULSIONS': 2, 'SEVERE MALARIA WITH ANAEMIA': 2,
       'SEVERE MALARIA WITH CONVULSIONS' : 2})
clin_df['dimainrobust'] = clin_df['dimain']-clin_df['dimainsp']

#This is if the Drs at the hospital decide to offer an initial diagnosis different from the one listed,  will likely not use
clin_df['damain'] = clin_df.damain.map({"Cerebral Malaria":0, "Meningoencephalitis":2, "Meningitis":1.,\
                                       "Other":2})
clin_df['damainsp'] = clin_df.damainsp.map({np.nan: 0, 'SEVERE PNEUMONIA': 0,
       'SEVERE MALNUTRITION, PERSISTENT DIARRHOEA': 0,
       'SEVERE DEHYDRATION, SECONDARY TO DIARRHOEA': 0, 'SEVERE MALARIA': 2,
       'MALARIA WITH FEBRILE CONVULSIONS': 2,
       'CEREBRAL VASCULAR ACCIDENT, VIRAL ENCEPHALITIS': 0,
       'MALARIA WITH HYPERPYREXIA': 2, 'MALARIA': 2, 'PNEUMONIA': 0,
       'MULTIPLE PYOGENIC ABCESSES': 0,
       'RENAL FAILURE/URAEMIC ENCEPHALOPATHY': 0,
       'SEVERE ANAEMIA, RENAL DISEASE': 0, 'HEMOLYTIC ANAEMIA': 0,
       'HYDROCEPHALUS': 0, 'BRONCHIOLITIS': 0, 'DYSENTRY': 0,
       'MALARIA WITH CONVULSIONS': 2, 'SEPSIS': 0, "HOGKIN'S DISEASE": 0,
       'SIMPLE MALARIA, SEPSIS': 2, 'PYOPERITONIUM': 0, 'SEPTICAEMIA': 0,
       'SIMPLE MALARIA': 2, 'BACTERIA PNEUMONIA': 0, 'GASTROENTERITIS': 0,
       'MALARIA AND SEVERE ANAEMIA': 2, 'BRONCHOPNEUMONIA': 0,
       'SEVERE MALNUTRITION': 0, 'ACUTE GASTRO ENTERITIS': 0, 'RABIES': 0,
       'EPILEPSY TRIGERRED BY MALARIA AND/OR MENINGITIS': 0,
       'ACUTE GASTROENTERITIS': 0, 'INFECTION ABSCESS': 0,
       'SEVERE MALARIA AND ANAEMIA': 2, 'BACTEREMIA': 0, 'GASTROENTRITIS': 0,
       'LABOUR PNEUMONIA': 0, 'SEVERE DEHYDRATION': 0})
clin_df['damainrobust'] = clin_df['damain']-clin_df['damainsp']

#Mapping of inclusion variables 6-8 MLW
clin_df['incirrit'] = clin_df.incirrit.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['inchead'] = clin_df.inchead.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['incphoto'] = clin_df.incphoto.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['incfont'] = clin_df.incfont.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['incneuro'] = clin_df.incneuro.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['incseiza'] = clin_df.incseiza.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['incseizh'] = clin_df.incseizh.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['incpurp'] = clin_df.incpurp.map({"Don't know":np.nan, 'No':0, 'Yes':1, 'Not applicable':np.nan})
clin_df['incneck'] = clin_df.incneck.map({'No':0, 'Yes':1, 'Not applicable':np.nan})




#Final diagnosis, 
clin_df['diag'] = clin_df.diag.map({'malaria':0, 'cereb malaria':0,'virus-malaria':0,\
                                    'virus-bacteria':1, 'bacteremia':1,'bact meningitis':1,\
                                    'virus-other':2,'virus':2,'malaria-bacterial':1,\
                                    'tb':2,'crypto':2, '0.0':2})

In [26]:
clin_df.isnull().sum()[:]

clinjaund         0
clinhepato        0
clinspleno        0
clinconv          0
clindehyd         0
clinoedem         0
clinlymph         0
clinresp          0
clinablung        0
clincyan          0
clincapref        0
clincoldext       0
clinearinf        0
clinanemia        0
clinorcand        0
clinhemmor        0
clinaids          0
abdpain         209
vom               0
diar              0
head              0
muscle          280
conv              3
oth1sp          406
oth2sp          460
sex               0
ageyrs            0
agemth            0
incdate           0
glasgtot         53
               ... 
damalnut        345
daother         345
daothersp       462
rdtresult        10
incirrit         17
inchead         284
incphoto        204
incfont         205
incneuro         14
incseiza          1
incseizh          3
incpurp           7
incneck           7
bfmed            17
bfatb            42
bfvir            29
bfvir1          477
bfmal            30
bfinf            30


In [27]:
# Consider items as "too much missing" if they are continuous variable with more than 50% missing, remove 
# da. variables as they do not appear to provide much information different from the di. variables.
# The .sp variables will go as well as it isn't clear how they are too specific and missing to
# work into a regression
nonmiss_df = clin_df.drop(['damainrobust','bfpyr1', 'bfinf1','bfvir1','daothersp','daother','oth1sp','oth2sp',\
                         'damain', 'damainsp', 'daaids', 'dalrti', 'dasepsis', 'damalnut','reason3','diothersp',\
                         'reason2', 'blanttot', 'mhother1sp','head'],axis=1)

In [37]:
nonmiss_df.to_csv('../Data/CleanedForRegression.csv')

In [29]:
# Here is the dataframe with only columns with missing data displayed, these are the columns that need 
# something done before regression can begin
nonmiss_df[nonmiss_df.columns[nonmiss_df.isnull().any()]].head(10)

Unnamed: 0,abdpain,muscle,conv,glasgtot,card,sbp,dbp,height,muac,mhdev,mhsickle,mhchron,vacbcg,vacmeasles,vachib1,vachib2,vachib3,vacmen,vacpneumo,digast,diaids,dilrti,disepsis,dimalnut,diother,rdtresult,incirrit,inchead,incphoto,incfont,incneuro,incseiza,incseizh,incpurp,incneck,bfmed,bfatb,bfvir,bfmal,bfinf,bfpyr,headache
0,,,0.0,15.0,180.0,137.0,72.0,93.0,16.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,,,1.0,,178.0,99.0,52.0,81.0,13.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,,,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,,,1.0,0.0,0.0,0.0,0.0,,,,,,
2,0.0,0.0,1.0,11.0,116.0,81.0,45.0,93.0,16.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,,,1.0,,111.0,106.0,57.0,80.2,16.0,0.0,,0.0,1.0,1.0,1.0,1.0,1.0,0.0,,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,
4,,,1.0,8.0,106.0,,,,15.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,,0.0,0.0,1.0,0.0,0.0,1.0,,,1.0,0.0,1.0,1.0
5,0.0,,1.0,,104.0,80.0,50.0,76.0,16.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,
6,,,1.0,,112.0,,,60.0,13.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,
7,,,1.0,,115.0,100.0,60.0,76.0,14.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,,,1.0,,1.0,
8,,,1.0,,120.0,70.0,26.0,63.4,13.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,1.0,1.0,0.0,0.0,,,,,,,
9,1.0,0.0,1.0,12.0,122.0,72.0,29.0,86.0,15.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,0.0,1.0,0.0,0.0,0.0,1.0,0.0,,1.0,0.0,1.0,


In [30]:
# For the yes/no (0/1) variables, I wiil put all NAs to a 2, indicating a third category yes/no/unknown.  I will
# do a non-ordinal regression strategy for these variables
yesno_df = nonmiss_df.drop(["glasgtot","card","sbp","dbp","height","muac"], axis=1)
yesno_df = yesno_df.replace(np.nan, 2)

In [31]:
#For the others I will mean impute
continuous_df = nonmiss_df[["glasgtot","card","sbp","dbp","height","muac"]]

In [32]:
#The extent of missingness is listed below, glasgtot, sbp, and dbp are the only variables where significant
#and potentially misleading imputation is occurring.
continuous_df.isnull().sum()[:]

glasgtot    53
card         2
sbp         79
dbp         79
height       8
muac         6
dtype: int64

In [33]:
continuous_df.mean(axis=0)

glasgtot     10.819672
card        132.778243
sbp          97.950125
dbp          52.518703
height       86.551483
muac         14.838186
dtype: float64

In [34]:
continuous_df['glasgtot'] = continuous_df['glasgtot'].fillna(continuous_df.mean(axis=0)[0])
continuous_df['card'] = continuous_df['card'].fillna(continuous_df.mean(axis=0)[1])
continuous_df['sbp'] = continuous_df['sbp'].fillna(continuous_df.mean(axis=0)[2])
continuous_df['dbp'] = continuous_df['dbp'].fillna(continuous_df.mean(axis=0)[3])
continuous_df['height'] = continuous_df['height'].fillna(continuous_df.mean(axis=0)[4])
continuous_df['muac'] = continuous_df['muac'].fillna(continuous_df.mean(axis=0)[5])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See

In [35]:
imputed_df = pd.concat([yesno_df, continuous_df], axis=1)

In [36]:
imputed_df.head(20)

Unnamed: 0,clinjaund,clinhepato,clinspleno,clinconv,clindehyd,clinoedem,clinlymph,clinresp,clinablung,clincyan,clincapref,clincoldext,clinearinf,clinanemia,clinorcand,clinhemmor,clinaids,abdpain,vom,diar,muscle,conv,sex,ageyrs,agemth,incdate,temp,resp,weight,mhhosp,mhdev,mhsickle,mhchron,vacbcg,vacmeasles,vachib1,vachib2,vachib3,vacmen,vacpneumo,monthday,dimain,dimainsp,digast,diaids,dilrti,disepsis,dimalnut,diother,rdtresult,incirrit,inchead,incphoto,incfont,incneuro,incseiza,incseizh,incpurp,incneck,bfmed,bfatb,bfvir,bfmal,bfinf,bfpyr,diag,incyear,incmonth,ageym,age,headache,dimainrobust,glasgtot,card,sbp,dbp,height,muac
0,1,1,1,0,1,0,1,1,1,0,0,0,0,1,0,0,0,2.0,0,0,2.0,0.0,1,3,0.0,2009-08-25,38.6,56,12.0,0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,2.0,265,0.0,0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2009,8,36.0,36.0,2.0,0.0,15.0,180.0,137.0,72.0,93.0,16.0
1,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,2.0,0,1,2.0,1.0,1,2,6.0,2009-08-27,38.4,48,9.1,0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,267,2.0,0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2,2009,8,24.0,30.0,2.0,2.0,10.819672,178.0,99.0,52.0,81.0,13.0
2,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0.0,0,0,0.0,1.0,2,3,5.0,2009-08-31,35.9,34,11.0,0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,271,0.0,0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2009,8,36.0,41.0,0.0,0.0,11.0,116.0,81.0,45.0,93.0,16.0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,0,0,2.0,1.0,1,1,11.0,2009-09-01,37.0,42,11.22,1,0.0,2.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,2.0,271,1.0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,2,2009,9,12.0,23.0,2.0,1.0,10.819672,111.0,106.0,57.0,80.2,16.0
4,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,2.0,1,0,2.0,1.0,1,4,3.0,2009-09-07,38.1,52,15.0,0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,277,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,2.0,1.0,0.0,1.0,0,2009,9,48.0,51.0,1.0,2.0,8.0,106.0,97.950125,52.518703,86.551483,15.0
5,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0.0,0,0,2.0,1.0,2,0,9.0,2009-09-08,38.0,68,10.0,0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,2.0,2.0,278,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,2,2009,9,0.0,9.0,2.0,0.0,10.819672,104.0,80.0,50.0,76.0,16.0
6,0,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,2.0,0,0,2.0,1.0,2,0,4.0,2009-09-09,36.8,80,5.05,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,279,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,2,2009,9,0.0,4.0,2.0,2.0,10.819672,112.0,97.950125,52.518703,60.0,13.0
7,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2.0,1,0,2.0,1.0,1,2,0.0,2009-09-11,37.8,30,10.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,281,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,1.0,2.0,1.0,0,2009,9,24.0,24.0,2.0,0.0,10.819672,115.0,100.0,60.0,76.0,14.5
8,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,2.0,0,0,2.0,1.0,2,0,9.0,2009-09-17,36.8,40,5.0,0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,2.0,287,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2,2009,9,0.0,9.0,2.0,1.0,10.819672,120.0,70.0,26.0,63.4,13.0
9,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,0,0,0.0,1.0,1,2,6.0,2009-09-18,38.0,28,10.0,0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,288,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,2.0,1.0,0.0,1.0,0,2009,9,24.0,30.0,2.0,1.0,12.0,122.0,72.0,29.0,86.0,15.0


In [118]:
imputed_df.to_csv('../Data/ImputedForRegression.csv')