# Consolide data with the feature extraction data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Build the train dataset 

In [2]:
df_train_dementia = pd.read_excel(
    '../hokuto_profile.xlsx', sheet_name='dementia')
df_train_dementia['label'] = 'dementia'

df_train_mci = pd.read_excel('../hokuto_profile.xlsx', sheet_name='mci')
df_train_mci['label'] = 'mci'

df_train_control = pd.read_excel(
    '../hokuto_profile.xlsx', sheet_name='control')
df_train_control['label'] = 'control'

# union of the three dataframes
df_train = pd.concat([df_train_dementia, df_train_mci, df_train_control])


df_train['label_id'] = df_train['label'].map(
    {'dementia': 2, 'mci': 1, 'control': 0})
df_train['site_id'] = df_train['Site'].map(
    {'A': 0, 'B': 1})


df_train.head()

Unnamed: 0,ID,Age,Gender,MMSE,Site,label,label_id,site_id
0,hokuto_dementia1,60,M,13,A,dementia,2,0
1,hokuto_dementia2,64,M,17,B,dementia,2,1
2,hokuto_dementia3,69,F,9,A,dementia,2,0
3,hokuto_dementia4,70,M,22,B,dementia,2,1
4,hokuto_dementia5,73,M,18,A,dementia,2,0


In [3]:
df_features_test = pd.read_csv('extracted_features.csv')
# remove file extension pour filename
df_features_test['FileName'] = df_features_test['FileName'].str.replace(
    '.mat', '')
df_features_test.head()

Unnamed: 0,FileName,Delta-1:4,Theta-4:8,Alpha-8:13,Beta-13:30,Gamma-30:40
0,hokuto_dementia1,1527.738,1400.7176,1223.7844,3281.0699,1531.1306
1,hokuto_dementia10,4465.7692,3885.3643,3085.435,5508.2384,1505.5075
2,hokuto_dementia11,0.0,11281.5938,0.0,20694.7154,17422.9477
3,hokuto_dementia12,9232.7663,8138.7951,6629.8026,13220.6634,3823.2126
4,hokuto_dementia13,6805.6846,5951.8738,4787.976,9408.0489,2752.0021


In [4]:
# merge the two dataframes with FileName and ID
df_train = pd.merge(df_train, df_features_test, left_on='ID',
                    right_on='FileName').drop('FileName', axis=1)
df_train['gender_id'] = df_train['Gender'].map({'M': 1, 'F': 0})
# save csv
df_train.to_csv('df_train.csv', index=False)
df_train.head()

Unnamed: 0,ID,Age,Gender,MMSE,Site,label,label_id,site_id,Delta-1:4,Theta-4:8,Alpha-8:13,Beta-13:30,Gamma-30:40,gender_id
0,hokuto_dementia1,60,M,13,A,dementia,2,0,1527.738,1400.718,1223.7844,3281.07,1531.131,1
1,hokuto_dementia2,64,M,17,B,dementia,2,1,0.0,1470078.0,0.0,2326050.0,1262857.0,1
2,hokuto_dementia3,69,F,9,A,dementia,2,0,1278.396,1214.818,1133.075,3814.643,1928.315,0
3,hokuto_dementia4,70,M,22,B,dementia,2,1,0.0,397573.7,0.0,629541.3,344165.4,1
4,hokuto_dementia5,73,M,18,A,dementia,2,0,9715.4041,8743.269,7358.8373,15553.37,3254.478,1


# Build the test dataset

In [5]:
df_test = pd.read_csv('test_extracted_features.csv')
df_test['FileName'] = df_test['FileName'].str.replace('.mat', '')
df_test.head()

Unnamed: 0,FileName,Delta-1:4,Theta-4:8,Alpha-8:13,Beta-13:30,Gamma-30:40
0,hokuto_test1,25648300.0,25225210.0,24518520.0,85506270.0,44289850.0
1,hokuto_test10,3879.899,3625.605,3244.233,8351.222,2515.782
2,hokuto_test11,0.0,465263.2,0.0,736133.6,400966.2
3,hokuto_test12,1761.727,1575.399,1328.131,3414.823,1726.068
4,hokuto_test13,2806.736,2639.766,2407.759,7234.731,3125.113


In [6]:

df_profile = pd.read_excel('../hokuto_profile.xlsx', sheet_name='test_data')
df_profile['gender_id'] = df_profile['Gender'].map({'M': 1, 'F': 0})
df_profile['site_id'] = df_train['Site'].map(
    {'A': 0, 'B': 1})
df_profile['label'] = ''
df_profile['label_id'] = ''
df_profile.head()

Unnamed: 0,ID,Age,Gender,Site,gender_id,site_id,label,label_id
0,hokuto_test1,64,M,A,1,0,,
1,hokuto_test2,70,F,A,0,1,,
2,hokuto_test3,61,F,B,0,0,,
3,hokuto_test4,83,F,B,0,1,,
4,hokuto_test5,71,M,A,1,0,,


In [7]:
# merge the two dataframes with FileName and ID
df_test = pd.merge(df_profile, df_test, left_on='ID',
                      right_on='FileName').drop('FileName', axis=1)
df_test.to_csv('df_test.csv', index=False)

df_test.head()

Unnamed: 0,ID,Age,Gender,Site,gender_id,site_id,label,label_id,Delta-1:4,Theta-4:8,Alpha-8:13,Beta-13:30,Gamma-30:40
0,hokuto_test1,64,M,A,1,0,,,25648300.0,25225210.0,24518520.0,85506270.0,44289850.0
1,hokuto_test2,70,F,A,0,1,,,2838.066,2804.487,2718.521,8439.152,2834.072
2,hokuto_test3,61,F,B,0,0,,,0.0,15253.85,0.0,26201.35,18670.67
3,hokuto_test4,83,F,B,0,1,,,0.0,20930.9,0.0,36859.6,26635.69
4,hokuto_test5,71,M,A,1,0,,,336.8586,346.8851,362.9394,1620.425,1151.548
