#*Imports*

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn.linear_model import  LogisticRegression
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import random as python_random
from matplotlib import pyplot



def reset_seeds():
   np.random.seed(2001) 
   python_random.seed(2001)
   tf.random.set_seed(2001)

# *Import Heart Failure Dataset*

In [None]:
heart_data = pd.read_csv('heart_failure_clinical_records_dataset.csv')
heart_data.head()

# Exploratry Analysis

## *Correlation HeatMap*

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(heart_data.corr(), vmin=-1, cmap='Spectral', annot=True);

#*Logistic Regression*

In [None]:
y = heart_data['DEATH_EVENT']
X = heart_data.drop(labels=['DEATH_EVENT'], axis=1)

In [None]:

# Logistic Regression
for c in [0.01,0.05,0.15,0.25,0.5,1]:
  lr = LogisticRegression(C=c,solver='liblinear', random_state=2001)
  # Crossvalidation
  kfold = model_selection.KFold(n_splits=10, random_state=2001, shuffle=True)
  scoring = 'accuracy'
  results = model_selection.cross_val_score(lr, X, y, cv=kfold, scoring=scoring)
  print("Accuracy for C=%s: %.3f (%.3f)" % (c, results.mean(), results.std()))

# Clustering

## KMeans

In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2, max_iter=600, algorithm='auto')
kmeans.fit(X)

## Neural Network


In [None]:
reset_seeds()
# Binary Classification with Sonar Dataset: Baseline
from pandas import read_csv
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
# load dataset
dataset = heart_data.values
X = dataset[:,0:12].astype(float)
Y = dataset[:,12]
# split into input (X) and output (Y) variables
X = dataset[:,0:12].astype(float)
Y = dataset[:,12]
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# baseline model
def create_baseline():
	# create model
	model = Sequential()
	model.add(Dense(20, input_dim=12, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=1)
kfold = StratifiedKFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

#Ergasia sgourou

##Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import  LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm

##Dataset Import

In [None]:
heart_data = pd.read_csv('heart_failure_clinical_records_dataset.csv')
heart_data.head()

In [None]:
heart_data.describe()

In [None]:
plt.figure(figsize=(13,13))
sns.heatmap(heart_data.corr(), vmin=-1, cmap='coolwarm', annot=True);

##Without PCA

In [None]:
feature=heart_data.iloc[:,0:12]
target=heart_data['DEATH_EVENT']
print(feature.shape)
print(target.shape)

X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size = 0.2, random_state = 2)

###Logistic Regression

In [None]:
lr = LogisticRegression(random_state = 0)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
y_pred = lr.predict(X_test)



# print(confusion_matrix(y_test,y_pred))
# print(classification_report(y_test,y_pred))
print(accuracy_score(y_test, y_pred))

### Random Forest

In [None]:
randForest=RandomForestClassifier(n_estimators=100)

#Train
randForest.fit(X_train,y_train)

#Test
y_pred=randForest.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

### KNN

In [None]:
knn = KNeighborsClassifier(n_neighbors=7)

#Train
knn.fit(X_train, y_train)

#Predict
y_pred = knn.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

### SVM

In [None]:
SVM = svm.SVC(kernel='linear')

#train
SVM.fit(X_train, y_train)

#Predict
y_pred = SVM.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

## With PCA

###PCA implementation

In [None]:
feature=heart_data.iloc[:,0:12]
target=heart_data['DEATH_EVENT']
print(feature.shape)
print(target.shape)


scaler = StandardScaler()
feature=scaler.fit_transform(feature)

X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size = 0.2, random_state = 2)

In [None]:
pca = PCA(n_components=8)
X_train = pca.fit_transform(X_train)
X_test= pca.transform(X_test)

###Visualization

In [None]:
def applyPCA(normalizedData, dimensions=2, dataframeLabels=['principal component 1', 'principal component 2']):
    pca = PCA(n_components=dimensions)
    pca_data = pca.fit_transform(normalizedData)

    print("Explained variation per principal component:", format(pca.explained_variance_ratio_))
    print("Total variation: ", sum(pca.explained_variance_ratio_))

    pca_data_dataFrame = pd.DataFrame(data=pca_data, columns=dataframeLabels)
    return pca_data_dataFrame

pca_data_dataFrame = applyPCA(feature)

plt.figure(figsize=(10, 7))
plt.xticks(fontsize=12)
plt.yticks(fontsize=14)
plt.xlabel('Principal Component - 1', fontsize=20)
plt.ylabel('Principal Component - 2', fontsize=20)
plt.title("Principal Component Analysis of Heart Failure Dataset", fontsize=20)
targets = heart_data['DEATH_EVENT'].unique()
colors = ['r', 'g']
for target, color in zip(targets, colors):
    indicesToKeep = heart_data['DEATH_EVENT'] == target
    plt.scatter(pca_data_dataFrame.loc[indicesToKeep, 'principal component 1'],
                pca_data_dataFrame.loc[indicesToKeep, 'principal component 2'], c=color, s=50)

plt.legend(heart_data['DEATH_EVENT'].unique(), prop={'size': 15})
plt.show()

# plot 3D PCA results
plt.figure(figsize=(10, 7))
ax = plt.axes(projection="3d")
ax.set_xlabel('principal component 1')
ax.set_ylabel('principal component 2')
ax.set_zlabel('principal component 3')
targets = heart_data['DEATH_EVENT'].unique()
colors = ['r', 'g']

pca_data_3D_dataFrame = applyPCA(feature, dimensions=3, dataframeLabels=['principal component 1', 'principal component 2',
                                                                       'principal component 3'])

# Creating plot
for target, color in zip(targets, colors):
    indicesToKeep = heart_data['DEATH_EVENT'] == target

    ax.scatter3D(pca_data_3D_dataFrame.loc[indicesToKeep, 'principal component 1'],
                    pca_data_3D_dataFrame.loc[indicesToKeep, 'principal component 2'],
                    pca_data_3D_dataFrame.loc[indicesToKeep, 'principal component 3'], color=color)

plt.title("simple 3D PCA results")

# show plot
plt.show()

### Logistic Regression

In [None]:
lr = LogisticRegression(random_state = 0)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
y_pred = lr.predict(X_test)

# print(confusion_matrix(y_test,y_pred))
# print(classification_report(y_test,y_pred))
print(accuracy_score(y_test, y_pred))

### Random Forest

In [None]:
randForest=RandomForestClassifier(n_estimators=100)

#Train
randForest.fit(X_train,y_train)

#Test
y_pred=randForest.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

### KNN

In [None]:
knn = KNeighborsClassifier(n_neighbors=7)

#Train
knn.fit(X_train, y_train)

#Predict
y_pred = knn.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

### SVM

In [None]:
SVM = svm.SVC(kernel='linear')

#train
SVM.fit(X_train, y_train)

#Predict
y_pred = SVM.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

### Neural Network

In [None]:
pca = PCA(n_components=8)
feature_pca = pca.fit_transform(feature)

In [None]:
reset_seeds()
def create_baseline():
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=8, activation='relu'))
	model.add(Dense(8, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=500, batch_size=10, verbose=1)
kfold = StratifiedKFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, feature_pca, target , cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

In [None]:
reset_seeds()
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(X_train, y_train, epochs=500, batch_size=10, verbose=0)
# evaluate the keras model
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))