In [1]:
import sys
import pandas as pd
import numpy as np
import sklearn
import keras

from sklearn import model_selection
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

from keras.utils.np_utils import to_categorical



Using TensorFlow backend.


In [2]:
names = ['diagnosis', 'fvc', 'fev1', 'performance_status', 'pain_before_surgery',
        'haemoptysis_before_surgery', 'dyspnoea_before_surgery', 'cough_before_surgery', 'weakness_before_surgery',
        'tumour_size', 'diabetes_mellitus', 'mi_6_months', 'peripheral_arterial_disease', 'smoking', 'asthma', 
         'age', 'survival']

#From https://archive.ics.uci.edu/ml/datasets/Thoracic+Surgery+Data
df = pd.read_csv("/Users/tommykwan/Documents/MachineLearning/DeepLearning/thoracic-surgery-survival/thoracic-surgery-data.csv", names = names)

# Unsure of how to interpret diagnosis, may implement later 
df = df.drop(columns=['diagnosis'])

# Drop age for this version
df = df.drop(columns=['age'])


print(df)


      fvc   fev1 performance_status pain_before_surgery  \
0    2.88   2.16               PRZ1                   F   
1    3.40   1.88               PRZ0                   F   
2    2.76   2.08               PRZ1                   F   
3    3.68   3.04               PRZ0                   F   
4    2.44   0.96               PRZ2                   F   
5    2.48   1.88               PRZ1                   F   
6    4.36   3.28               PRZ1                   F   
7    3.19   2.50               PRZ1                   F   
8    3.16   2.64               PRZ2                   F   
9    2.32   2.16               PRZ1                   F   
10   2.56   2.32               PRZ0                   F   
11   4.28   4.44               PRZ1                   F   
12   3.00   2.36               PRZ1                   F   
13   3.98   3.06               PRZ2                   F   
14   1.96   1.40               PRZ1                   F   
15   4.68   4.16               PRZ1                   F 

In [3]:
# Need to preprocess the data 

# Rename survival as class for ease of reading
df.rename(columns = {'survival': 'class'}, inplace = True) 

print(df)

      fvc   fev1 performance_status pain_before_surgery  \
0    2.88   2.16               PRZ1                   F   
1    3.40   1.88               PRZ0                   F   
2    2.76   2.08               PRZ1                   F   
3    3.68   3.04               PRZ0                   F   
4    2.44   0.96               PRZ2                   F   
5    2.48   1.88               PRZ1                   F   
6    4.36   3.28               PRZ1                   F   
7    3.19   2.50               PRZ1                   F   
8    3.16   2.64               PRZ2                   F   
9    2.32   2.16               PRZ1                   F   
10   2.56   2.32               PRZ0                   F   
11   4.28   4.44               PRZ1                   F   
12   3.00   2.36               PRZ1                   F   
13   3.98   3.06               PRZ2                   F   
14   1.96   1.40               PRZ1                   F   
15   4.68   4.16               PRZ1                   F 

In [4]:
df['performance_status'] = pd.get_dummies(df['performance_status'])
df['pain_before_surgery'] = pd.get_dummies(df['pain_before_surgery'])
df['haemoptysis_before_surgery'] = pd.get_dummies(df['haemoptysis_before_surgery'])
df['dyspnoea_before_surgery'] = pd.get_dummies(df['dyspnoea_before_surgery'])
df['cough_before_surgery'] = pd.get_dummies(df['cough_before_surgery'])
df['weakness_before_surgery'] = pd.get_dummies(df['weakness_before_surgery'])
df['tumour_size'] = pd.get_dummies(df['tumour_size'])
df['diabetes_mellitus'] = pd.get_dummies(df['diabetes_mellitus'])
df['mi_6_months'] = pd.get_dummies(df['mi_6_months'])
df['peripheral_arterial_disease'] = pd.get_dummies(df['peripheral_arterial_disease'])
df['smoking'] = pd.get_dummies(df['smoking'])
df['asthma'] = pd.get_dummies(df['asthma'])
df['class'] = pd.get_dummies(df['class'])



In [5]:
print(df)

      fvc   fev1  performance_status  pain_before_surgery  \
0    2.88   2.16                   0                    1   
1    3.40   1.88                   1                    1   
2    2.76   2.08                   0                    1   
3    3.68   3.04                   1                    1   
4    2.44   0.96                   0                    1   
5    2.48   1.88                   0                    1   
6    4.36   3.28                   0                    1   
7    3.19   2.50                   0                    1   
8    3.16   2.64                   0                    1   
9    2.32   2.16                   0                    1   
10   2.56   2.32                   1                    1   
11   4.28   4.44                   0                    1   
12   3.00   2.36                   0                    1   
13   3.98   3.06                   0                    1   
14   1.96   1.40                   0                    1   
15   4.68   4.16        

In [6]:
y = np.array(df['class'])
X = np.array(df.drop(['class'], 1))
seed = 1
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=seed)

Y_train = to_categorical(y_train, num_classes=None)
Y_test = to_categorical(y_test, num_classes=None)


In [7]:
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(10, input_dim=14, kernel_initializer='normal', activation='relu'))
    model.add(Dense(10, kernel_initializer='normal', activation='relu'))
    model.add(Dense(2, activation='softmax'))
    
    # compile model
    adam = Adam(lr=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    return model

model = create_model()

print(model.summary())



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 10)                150       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 22        
Total params: 282
Trainable params: 282
Non-trainable params: 0
_________________________________________________________________
None


In [8]:
model.fit(X_train, Y_train, epochs=5, batch_size=20, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1a24cbb7b8>