In [1]:
import pandas as pd
from scipy.io import arff
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from sklearn import preprocessing
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier, 
                              GradientBoostingClassifier, ExtraTreesClassifier)
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import xgboost as xgb
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls

from Algorithms import knn,dt,lr,rf,svm,nb
from Algorithms import Ant_Colony,Bacterial_Foraging,Bee_Colony,Particle_Swarm

FOLDS =10
%matplotlib inline

In [2]:
data = 'Dataset/oasis_longitudinal.csv'
df = pd.read_csv (data)
df.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [3]:
nu = pd.DataFrame(df['Group']=='Nondemented')
nu["Group"].value_counts() 

True     190
False    183
Name: Group, dtype: int64

In [4]:
df['Group'] = df['Group'].replace(['Converted'], ['Demented'])
df.head(3)

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046


In [5]:
df.drop(['Subject ID'], axis = 1, inplace = True, errors = 'ignore')
df.drop(['MRI ID'], axis = 1, inplace = True, errors = 'ignore')
df.drop(['Visit'], axis = 1, inplace = True, errors = 'ignore')
#for this study the CDR we eliminated it
#df.drop(['CDR'], axis = 1, inplace = True, errors = 'ignore')
df.head(3)

Unnamed: 0,Group,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,Nondemented,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,Nondemented,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,Demented,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046


In [6]:
# 1 = Demented, 0 = Nondemented
df['Group'] = df['Group'].replace(['Converted'], ['Demented'])

df['Group'] = df['Group'].replace(['Demented', 'Nondemented'], [1,0])    
df.head(3)

Unnamed: 0,Group,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,0,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046


In [7]:
df['M/F'] = df['M/F'].replace(['M', 'F'], [1,0])  
df.head(3)

Unnamed: 0,Group,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,0,1,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,0,457,1,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,1,0,1,R,75,12,,23.0,0.5,1678,0.736,1.046


In [8]:
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
encoder.fit(df.Hand.values)
list(encoder.classes_)
#Transoformamos
encoder.transform(df.Hand.values)
df[['Hand']]=encoder.transform(df.Hand.values)
encoder2=LabelEncoder()
encoder2.fit(df.Hand.values)
list(encoder2.classes_)

[0]

In [9]:
data_na = (df.isnull().sum() / len(df)) * 100
data_na = data_na.drop(data_na[data_na == 0].index).sort_values(ascending=False)[:30]
missing_data = pd.DataFrame({'Lost proportion (%)' :round(data_na,2)})
missing_data.head(20)

Unnamed: 0,Lost proportion (%)
SES,5.09
MMSE,0.54


In [10]:
from sklearn.impute  import SimpleImputer
# We perform it with the most frequent value 
imputer = SimpleImputer ( missing_values = np.nan,strategy='most_frequent')

imputer.fit(df[['SES']])
df[['SES']] = imputer.fit_transform(df[['SES']])

# We perform it with the median
imputer = SimpleImputer ( missing_values = np.nan,strategy='median')

imputer.fit(df[['MMSE']])
df[['MMSE']] = imputer.fit_transform(df[['MMSE']])

In [11]:
from sklearn.impute  import SimpleImputer
# We perform it with the median
imputer = SimpleImputer ( missing_values = np.nan,strategy='median')

imputer.fit(df[['MMSE']])
df[['MMSE']] = imputer.fit_transform(df[['MMSE']])

In [12]:
from sklearn.preprocessing import StandardScaler
df_norm = df
scaler = StandardScaler()
df_norm[['Age','MR Delay','M/F','Hand','EDUC','SES','MMSE','CDR','eTIV','nWBV','ASF']]=scaler.fit_transform(df[['Age','MR Delay','M/F','Hand','EDUC','SES','MMSE','CDR','eTIV','nWBV','ASF']])

In [13]:
data_test = df

In [14]:
X = data_test.drop(["Group"],axis=1)
y = data_test["Group"].values
X.head(3)
X  = X.values

In [15]:
# We divide our data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state = 0)
X_test.shape

(94, 11)

## Evolutionary Algorithms

#### Ant Colony

In [16]:
k=[1 for r in range(len(X[0]))]

max_ant , colo_ant = Ant_Colony.Ant_Colony(k, X_train, X_test, y_train, y_test)

11111111111
0.9361702127659575
1  01111111001  8  0.940426
2  11111011111  10  0.934043
3  11111111111  11  0.938298
4  00111100001  5  0.768085
5  01001001000  3  0.946809
6  00111110100  6  0.827660
7  00001100001  3  0.853191
8  11011001111  8  0.925532
9  10000000000  1  0.531915
10  11111111111  11  0.934043
11  10110011101  7  0.938298
12  10001001000  3  0.934043
13  11111011011  9  0.927660
14  00001000001  2  0.776596
15  11111111111  11  0.925532
16  11100111111  9  0.936170
17  00001000000  1  0.653191
18  00000000110  2  0.614894
19  10100110000  4  0.782979
20  10111010111  8  0.738298
[12, 9, 12, 11, 15, 9, 11, 11, 10, 9, 13] 6
[ 4 10  2  0  7  6]
Final:   10101011001   6   0.841383    2753.0199


In [19]:
max_bee , colo_bee = Bee_Colony.Bee_Colony(k, X_train, X_test, y_train, y_test)

0.9340425531914894
1  00110111111  8  0.940426
2  11011111111  10  0.940426
3  01011101011  7  0.927660
4  11110011101  8  0.936170
5  01110111111  9  0.936170
6  01110111111  9  0.940426
7  01111101111  9  0.936170
8  11001101111  8  0.921277
9  10011101111  8  0.929787
10  01111001111  8  0.921277
11  11111111111  11  0.931915
12  01111101111  9  0.942553
13  10010111111  8  0.938298
14  00110011111  7  0.936170
15  00010111101  6  0.944681
16  00110011111  7  0.938298
17  10011011111  8  0.927660
18  11010101011  7  0.927660
19  00010111101  6  0.946809
20  00111011111  8  0.931915
[8, 11, 11, 19, 10, 14, 13, 20, 18, 17, 20] 8
[10  7  3  8  9  5  6  2]
Final:   00110111111   8   0.934787    802.4099


In [20]:
max_pswarm , colo_pswarm = Particle_Swarm.Particle_Swarm(k, X_train, X_test, y_train, y_test)

0.9361702127659575
1  11010011111  8  0.931915
2  00011111101  7  0.940426
3  00110111111  8  0.938298
4  00011111111  8  0.934043
5  00110101110  6  0.936170
6  00010011101  5  0.936170
7  00011101101  6  0.931915
8  01010101101  6  0.940426
9  00010111111  7  0.931915
10  00111001111  7  0.921277
11  00110011111  7  0.927660
12  00010101101  5  0.938298
13  01111001111  8  0.929787
14  00011101111  7  0.940426
15  00010011011  5  0.934043
16  00110101101  6  0.944681
17  00110101111  7  0.938298
18  00111001111  7  0.936170
19  01010101111  7  0.934043
20  00010101111  6  0.929787
[1, 4, 8, 20, 7, 13, 8, 20, 19, 14, 19] 6
[ 7  3 10  8  9  5]
Final:   00010101111   6   0.934787    2861.9842


In [21]:
max_bf , colo_bf = Bacterial_Foraging.Bacterial_Foraging(k, X_train, X_test, y_train, y_test)

0.9361702127659575
1  00010001100  3  0.936170
2  10010011101  6  0.938298
3  00010001100  3  0.936170
4  00111011111  8  0.938298
5  01110001111  7  0.934043
6  01010001001  4  0.936170
7  01110111111  9  0.940426
8  10010001100  4  0.936170
9  00010001111  5  0.931915
10  01001001110  5  0.921277
11  01010001100  4  0.936170
12  00010001110  4  0.936170
13  00010001001  3  0.936170
14  11011101111  9  0.931915
15  01010001100  4  0.936170
16  01010001001  4  0.936170
17  01111111111  10  0.942553
18  01111111110  9  0.936170
19  11110111111  10  0.938298
20  11010011011  7  0.927660
[5, 12, 6, 19, 5, 5, 7, 20, 16, 11, 12] 5
[ 7  3  8 10  1]
Final:   01010001101   5   0.935319    1158.3128
