<h1>Capstone Feature Creation Notebook</h1>

Import Libraries

In [560]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import regularizers
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.decomposition import PCA
%matplotlib inline

Import Data

In [561]:
df = pd.read_csv("../capstone/clean_data.csv")
try:
    df.drop("Unnamed: 0", axis = 1, inplace = True)
except:
    pass
ppq = pd.read_csv("../capstone/ProteinAndPathologyQuantifications.csv")
try:
    ppq.drop("Unnamed: 0", axis = 1, inplace = True)
except:
    pass
grey = pd.read_csv("../capstone/greymatter.csv")
try:
    grey.drop("Unnamed: 0", axis = 1, inplace = True)
except:
    pass
white = pd.read_csv("../capstone/whitematter.csv")
try:
    white.drop("Unnamed: 0", axis = 1, inplace = True)
except:
    pass

<h2>Grey Matter</h2>

Create Gender Binary (No data on Trans* or Gender Non-Binary people made available)

In [562]:
grey["gender"] = grey.sex.map({"F" : 0, "M" : 1})

Create Binary for Patients History of Ever Sustaining Traumatic Brain Injury

In [563]:
grey["tbi_w_loc"] = grey.ever_tbi_w_loc.map({"N" : 0, "Y" : 1})

Create Binary for Race = White 

In [564]:
grey["white_binary"] = grey.race.map({"Non-white" : 0, "White" : 1})

Create Binary for Race = Hispanic

In [565]:
grey["hispanic_binary"] = grey.hispanic.map({"Not Hispanic" : 0, "Hispanic" : 1})

Create Binary for apo_e4_allele Genetic Trait

In [566]:
grey['apo_e4_allele_binary'] = grey.apo_e4_allele.map({"N" : 0, "Y" : 1})

Change Age Categories into Integers

In [567]:
grey["age"] = grey.age.map({"87" : 87, 
                        '95-99' : 95,
                        '85' : 85,
                        '90-94' : 90,
                        '100+' : 100,
                        '78' : 78,
                        '89' : 89,
                        '83' : 83,
                        '82' : 82,
                        '77' : 77,
                        '81' : 81,
                        '88' : 88,
                        '84' : 84,
                        '86' : 86,
                        '79' : 79,
                        'NaN' : 0
                      })

Create Numerical Categories for Patient's Loss of Consciousness Decided by Length of Loss

In [568]:
grey["loc_cats"] = grey.longest_loc_duration.map({"Unknown or N/A" : 0, 
                                              '< 10 sec' : 1,
                                              '10 sec - 1 min' : 2,
                                              '1-2 min' : 3,
                                              '3-5 min' : 4,
                                              '6-9 min' : 5,
                                              '10 min - 1 hr' : 6,
                                              '> 1 hr' : 7
                                             })

Create Feature for Number of Years Since Patient's First Traumatic Brain Injury

In [569]:
grey["years_since_first_tbi"] = grey['age'] - grey['age_at_first_tbi']

Create Binary for Dementia Diagnosis

In [570]:
grey["dementia_binary"] = grey.act_demented.map({"No Dementia" : 0, "Dementia" : 1})

Verify No Null Values

In [571]:
#Verify that there's no NaN's
NaN_dict = {}
something_is_wrong_here = {}
for column in df:
    for idx, row in enumerate(column):
        try: 
            if (df[column]).isnull().sum() > 0:
                NaN_dict.update({"column" : "idx"})
        except: 
            something_is_wrong_here.update({"column" : "idx"})

if len(NaN_dict) > 0:
    print("There's a pesky NaN -> {}".format(NaN_dict))
elif (len(NaN_dict) < 1 & len(something_is_wrong_here)) < 1: 
    print("Nary a NaN in sight!")
elif len(something_is_wrong_here) > 0: 
    print("Something is wrong here -> {}".format(something_is_wrong_here))
else:
    print("Something went wrong?")

Nary a NaN in sight!


In [572]:
grey.to_csv("greymatter.csv", index = False)

<h2>White Matter</h2>

Create Gender Binary (No data on Trans* or Gender Non-Binary people made available)

In [573]:
white["gender"] = white.sex.map({"F" : 0, "M" : 1})

Create Binary for Patients History of Ever Sustaining Traumatic Brain Injury

In [574]:
white["tbi_w_loc"] = white.ever_tbi_w_loc.map({"N" : 0, "Y" : 1})

Create Binary for Patients History of Ever Sustaining Traumatic Brain Injury

In [575]:
white["white_binary"] = white.race.map({"Non-white" : 0, "White" : 1})

Create Binary for Race = Hispanic

In [576]:
white["hispanic_binary"] = white.hispanic.map({"Not Hispanic" : 0, "Hispanic" : 1})

Create Binary for apo_e4_allele Genetic Trait

In [577]:
white['apo_e4_allele_binary'] = white.apo_e4_allele.map({"N" : 0, "Y" : 1})

Change Age Categories into Integers

In [578]:
white["age"] = white.age.map({"87" : 87, 
                        '95-99' : 95,
                        '85' : 85,
                        '90-94' : 90,
                         '100+' : 100,
                        '78' : 78,
                        '89' : 89,
                        '83' : 83,
                        '82' : 82,
                        '77' : 77,
                        '81' : 81,
                        '88' : 88,
                        '84' : 84,
                        '86' : 86,
                        '79' : 79,
                      })

Create Numerical Categories for Patient's Loss of Consciousness Decided by Length of Loss

In [579]:
white["loc_cats"] = white.longest_loc_duration.map({"Unknown or N/A" : 0, 
                                              '< 10 sec' : 1,
                                              '10 sec - 1 min' : 2,
                                              '1-2 min' : 3,
                                              '3-5 min' : 4,
                                              '6-9 min' : 5,
                                              '10 min - 1 hr' : 6,
                                              '> 1 hr' : 7
                                             })

Create Feature for Number of Years Since Patient's First Traumatic Brain Injury

In [580]:
white["years_since_first_tbi"] = white['age'] - white['age_at_first_tbi']

Create Binary for Dementia Diagnosis

In [581]:
white["dementia_binary"] = white.act_demented.map({"No Dementia" : 0, "Dementia" : 1})

Verify No Null Values

In [582]:
#Verify that there's no NaN's
NaN_dict = {}
something_is_wrong_here = {}
for column in df:
    for idx, row in enumerate(column):
        try: 
            if (df[column]).isnull().sum() > 0:
                NaN_dict.update({"column" : "idx"})
        except: 
            something_is_wrong_here.update({"column" : "idx"})

if len(NaN_dict) > 0:
    print("There's a pesky NaN -> {}".format(NaN_dict))
elif (len(NaN_dict) < 1 & len(something_is_wrong_here)) < 1: 
    print("Nary a NaN in sight!")
elif len(something_is_wrong_here) > 0: 
    print("Something is wrong here -> {}".format(something_is_wrong_here))
else:
    print("Something went wrong?")

Nary a NaN in sight!


In [583]:
white.to_csv("whitematter.csv", index=False)