# Domaci zadatak 3 - Neuronske mreze

<b>Studenti: Vladan Milojević - 1248, Vladimir Nešić - 1235</b>

Cilj projekta je predviđanje smrtnog ishoda kod pacijenata koji su doživeli zastoj srca. Dataset koji se koristi: https://www.kaggle.com/andrewmvd/heart-failure-clinical-data

Da bi dataset bio balansiran, generisani su podaci uz pomoc SMOTE filtera u Weka programu.

### Korišćene biblioteke, učitavanje i analiza podataka

In [83]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
import tensorflow as tf
from tensorflow import keras
from keras.layers import Bidirectional, Dense, Dropout
from keras.models import Sequential
from keras.utils.vis_utils import plot_model
from tensorflow.python.keras.callbacks import TensorBoard
from keras_visualizer import visualizer 
from time import time
import keras.backend as K

In [84]:
#loading data
data = pd.read_csv("heart_failure_clinical_records_dataset_balanced.csv")
data.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,53.0,1,1808,0,60,1,249000.0,0.7,138,1,1,106,0
1,70.0,0,1202,0,50,1,358000.0,0.9,141,0,0,196,0
2,50.0,1,249,1,35,1,319000.0,1.0,128,0,0,28,1
3,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
4,45.0,0,145,0,25,0,219000.0,1.1,137,1,0,6,1


In [85]:
data.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0
mean,60.676569,0.408416,491.779703,0.391089,35.690594,0.319307,238623.032525,1.366312,135.980198,0.665842,0.240099,101.34901,0.497525
std,12.083863,0.49215,855.137273,0.488599,11.640854,0.466786,100308.923793,0.923587,4.268251,0.47228,0.427673,83.131839,0.500614
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,50.0,0.0,104.0,0.0,25.0,0.0,173750.0,1.0,134.0,0.0,0.0,23.75,0.0
50%,60.0,0.0,201.5,0.0,35.0,0.0,235000.0,1.1,136.0,1.0,0.0,85.5,0.0
75%,70.0,1.0,582.0,1.0,40.0,1.0,279000.0,1.4,139.0,1.0,0.0,186.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


In [86]:
data.isnull().any()

age                         False
anaemia                     False
creatinine_phosphokinase    False
diabetes                    False
ejection_fraction           False
high_blood_pressure         False
platelets                   False
serum_creatinine            False
serum_sodium                False
sex                         False
smoking                     False
time                        False
DEATH_EVENT                 False
dtype: bool

In [87]:
data.corr()["DEATH_EVENT"].abs().nlargest(20)

DEATH_EVENT                 1.000000
time                        0.689805
ejection_fraction           0.395489
serum_sodium                0.291508
platelets                   0.281216
smoking                     0.200041
serum_creatinine            0.197666
age                         0.159432
diabetes                    0.056901
creatinine_phosphokinase    0.056803
sex                         0.033226
high_blood_pressure         0.012538
anaemia                     0.000922
Name: DEATH_EVENT, dtype: float64

### Priprema podataka

In [88]:
X=data.drop(["DEATH_EVENT"],axis=1)
y=data["DEATH_EVENT"]

#Standard scaler features of the dataset
col_names = list(X.columns)
s_scaler = preprocessing.StandardScaler()
X_df= s_scaler.fit_transform(X)
X_df = pd.DataFrame(X_df, columns=col_names)   
X_df.describe().T

#Spliting test and training sets
X_train, X_test, y_train,y_test = train_test_split(X_df,y,test_size=0.3,random_state=0)

def custom_mean_squared_error(y_true, y_pred):
    return tf.math.reduce_mean(tf.square(y_true - y_pred))

### Loss funkcije

In [89]:
def custom_mean_squared_error(y_true, y_pred):
    return tf.math.reduce_mean(tf.square(y_true - y_pred))

def euclidean_distance_loss(y_true, y_pred):
    return K.sqrt(K.sum(K.square(y_pred - y_true), axis=-1))


def custom_mse(y_true, y_pred):
 
    # calculating squared difference between target and predicted values 
    loss = K.square(y_pred - y_true)  # (batch_size, 2)
    
    # multiplying the values with weights along batch dimension
    loss = loss * [0.3, 0.7]          # (batch_size, 2)
                
    # summing both loss values along batch dimension 
    loss = K.sum(loss, axis=1)        # (batch_size,)
    
    return loss


### Kreiranje i treniranje modela

In [93]:
model = Sequential()

model.add(Dense(units = 9, activation = 'relu', input_dim = 12))
# model.add(Dropout(0.1))
model.add(Dense(units = 7, activation = 'relu'))
model.add(Dense(units = 5, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(units = 1, activation = 'sigmoid'))

print(model.summary())
# visualizer(model, format='png', view=True)

model.compile(optimizer = 'adam', loss = custom_mean_squared_error, metrics = ['accuracy'])

history = model.fit(X_train, y_train, batch_size = 32, epochs = 100, validation_split=0.2)

test_loss, test_acc = model.evaluate(X_test, y_test)

print('Test loss: ', test_loss)
print('Test accuracy: ', test_acc)


Model: "sequential_43"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_169 (Dense)            (None, 9)                 117       
_________________________________________________________________
dense_170 (Dense)            (None, 7)                 70        
_________________________________________________________________
dense_171 (Dense)            (None, 5)                 40        
_________________________________________________________________
dropout_43 (Dropout)         (None, 5)                 0         
_________________________________________________________________
dense_172 (Dense)            (None, 1)                 6         
Total params: 233
Trainable params: 233
Non-trainable params: 0
_________________________________________________________________
None
Train on 225 samples, validate on 57 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100