In [2]:
%%bash
pip install seaborn --user




In [11]:
#General Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import sklearn

from sklearn.ensemble import ExtraTreesClassifier 
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_validate
from sklearn.model_selection import StratifiedKFold

#Autoenconder Functions
from keras.layers import Input, Dense, Dropout, BatchNormalization
from keras.models import Model
from sklearn.model_selection import train_test_split

#Nueral Network Functions
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Data Preprocessing

In [5]:
#List of Missing Values
missing_values = ["?", "NaN",]
#Import Data
BCdata = pd.read_csv("dataR2.csv", delimiter=",",na_values = missing_values)
BCdata.head()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,48,23.5,70,2.707,0.467409,8.8071,9.7024,7.99585,417.114,1
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,1
2,82,23.12467,91,4.498,1.009651,17.9393,22.43204,9.27715,554.697,1
3,68,21.367521,77,3.226,0.612725,9.8827,7.16956,12.766,928.22,1
4,86,21.111111,92,3.549,0.805386,6.6994,4.81924,10.57635,773.92,1


In [6]:
#Rename Data to fit algorithm
XData = BCdata
XData.dtypes

Age                 int64
BMI               float64
Glucose             int64
Insulin           float64
HOMA              float64
Leptin            float64
Adiponectin       float64
Resistin          float64
MCP.1             float64
Classification      int64
dtype: object

In [7]:
#Data Visualization
col = XData.select_dtypes("number").columns
col
XData[col]

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,48,23.500000,70,2.707,0.467409,8.8071,9.702400,7.99585,417.114,1
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,1
2,82,23.124670,91,4.498,1.009651,17.9393,22.432040,9.27715,554.697,1
3,68,21.367521,77,3.226,0.612725,9.8827,7.169560,12.76600,928.220,1
4,86,21.111111,92,3.549,0.805386,6.6994,4.819240,10.57635,773.920,1
5,49,22.854458,92,3.226,0.732087,6.8317,13.679750,10.31760,530.410,1
6,89,22.700000,77,4.690,0.890787,6.9640,5.589865,12.93610,1256.083,1
7,76,23.800000,118,6.470,1.883201,4.3110,13.251320,5.10420,280.694,1
8,73,22.000000,97,3.350,0.801543,4.4700,10.358725,6.28445,136.855,1
9,75,23.000000,83,4.952,1.013839,17.1270,11.578990,7.09130,318.302,1


In [9]:
#Check for Missing Values
XData.isnull().sum()

Age               0
BMI               0
Glucose           0
Insulin           0
HOMA              0
Leptin            0
Adiponectin       0
Resistin          0
MCP.1             0
Classification    0
dtype: int64

In [10]:
#Data Info
XData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116 entries, 0 to 115
Data columns (total 10 columns):
Age               116 non-null int64
BMI               116 non-null float64
Glucose           116 non-null int64
Insulin           116 non-null float64
HOMA              116 non-null float64
Leptin            116 non-null float64
Adiponectin       116 non-null float64
Resistin          116 non-null float64
MCP.1             116 non-null float64
Classification    116 non-null int64
dtypes: float64(7), int64(3)
memory usage: 9.1 KB


In [13]:
#Separate into X and Y components for data processing
Y = XData['Classification']
X = XData.drop(columns=['Classification'])


# AUTOENCODER

In [18]:

#Split Data into Training and Testing (Change for experiments)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size= 0.02, random_state=0)


def AutoEncoder(X,Y,encodersize, activationH, activationout):
    
    #create input layer
    input_layer = Input(shape=(X.shape[1],))
    #create encoder layer
    encoder_layer = Dense(encodersize, activation=activationH)(input_layer)
    #create decoder layer 
    decoder_layer = Dense(X.shape[1],activation = activationout)(encoder_layer)
    
    #create autoencoder and encoder models
    Autoencoder = Model(input_layer, decoder_layer)
    Encoder = Model(input_layer, encoder_layer)
    
    #Autoenconder Compile & Fit
    Autoencoder.compile(optimizer='adam', loss = 'mean_squared_error')
    Autoencoder.fit(X_train, X_train,
                   epochs = 50,
                   batch_size= 30,
                   shuffle=True,
                   verbose=0,
                   validation_data=(X_test, X_test))
    return Autoencoder, Encoder

In [19]:
#Call Autoencoder Function
(AE, E) = AutoEncoder(X,Y,5,'relu','relu')

In [20]:
#Save the Learned features to use in the NN
learnedfeatures = E.predict(X_train)
ValidationFeatures = E.predict(X_test)
print(learnedfeatures)

[[269.02493  224.31673    0.       287.12146    0.      ]
 [186.11697  128.0147     0.       195.38113    0.      ]
 [817.39526  697.59985    0.       884.10547    0.      ]
 [377.18228  338.7201     0.       403.12103    0.      ]
 [323.0942   271.67145    0.       343.4069     0.      ]
 [137.94467  110.10261    0.       142.82164    0.      ]
 [362.32797  321.51477    0.       400.6889     0.      ]
 [448.6025   399.19287    0.       451.48807    0.      ]
 [309.77304  258.13235    0.       320.27438    0.      ]
 [234.53758  178.9232     0.       234.96092    0.      ]
 [439.23145  406.41638    0.       465.0551     0.      ]
 [175.1407   139.477      0.       182.87129    0.      ]
 [510.48032  468.63815    0.       562.05853    0.      ]
 [569.3628   520.2518     0.       632.34186    0.      ]
 [155.11287  104.87553    0.       146.02608    0.      ]
 [132.62178   96.49109    0.       140.16995    0.      ]
 [193.74974  154.01161    0.       198.15614    0.      ]
 [151.57768  1

# Neural Network

In [21]:

NN = MLPClassifier(solver='sgd',alpha=1e-6, hidden_layer_sizes=(28,1),learning_rate ='adaptive', random_state=1)
NN.fit(learnedfeatures,Y_train)
NNPredicted = NN.predict(ValidationFeatures)
NNAccuracy = accuracy_score(Y_test,NNPredicted)*100
print(NNAccuracy)

66.66666666666666
