In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Importing Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf

### Reading the data set

In [None]:
heart = '../input/heart-attack-analysis-prediction-dataset/heart.csv'
o2 = '../input/heart-attack-analysis-prediction-dataset/o2Saturation.csv'

df_heart = pd.read_csv(heart)
df_o2 = pd.read_csv(o2)

In [None]:
df = pd.concat([df_heart, df_o2], axis=1, join='inner')
df.head()

In [None]:
df.rename(columns={'98.6':'o2'})

In [None]:
df.isna().sum()

In [None]:
df.info()

### Count plot

In [None]:
catFeatures = ['sex', 'cp', 'fbs', 'restecg', 'exng', 'slp', 'caa', 'thall']
i=1
fig = plt.figure(figsize=(10,8))
fig.subplots_adjust(hspace=0.5, wspace=1)
for catFeature in catFeatures:
    ax = fig.add_subplot(2, 4, i)
    sns.countplot(x = df[catFeature])
    i+=1

### Checking for skewness

In [None]:
plt.figure(figsize=(7,5))
df['trtbps'].plot(kind='density')
plt.title("Resting blood pressure ")
plt.show()

plt.figure(figsize=(7,5))
df['chol'].plot(kind='density')
plt.title("Cholestoral")
plt.show()

plt.figure(figsize=(7,5))
df['thalachh'].plot(kind='density')
plt.title("Maximum heart rate achieved")
plt.show()

plt.figure(figsize=(7,5))
df['age'].plot(kind='density')
plt.title("Age")
plt.show()

In [None]:
cor_mat = df.corr()
plt.figure(figsize=(10,8))
sns.heatmap(cor_mat, annot=True)
plt.show()

### Feature and Label selection

In [None]:
X = df.drop('output', axis=1)
y = df['output']

In [None]:
X.head()

In [None]:
y.head()

In [None]:
sns.countplot(x="output", data=df)

### Spliting the train and test data

In [None]:
# Split the data

from sklearn.model_selection import train_test_split
X_train,X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Normalizing our data

In [None]:
# preprocessing
from sklearn.preprocessing import MinMaxScaler
scalar = MinMaxScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

### Creating our neural network

Creating model with learning rate callback

In [None]:
# model to find optimal learning rate
tf.random.set_seed(42)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss=tf.keras.losses.binary_crossentropy,
             optimizer=tf.keras.optimizers.Adam(),
             metrics=['accuracy'])

# Create a learning rate callback
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-4 * 10**(epoch/20))

history = model.fit(X_train, y_train, epochs=100, callbacks=[lr_scheduler], verbose=0)

### Plotting loss and learning rate curve to find the optimal learning rate

In [None]:
# Plot the learning rate versus the loss
lrs = 1e-4 * (10 ** (tf.range(100)/20))
plt.figure(figsize=(10,8))
plt.semilogx(lrs, history.history['loss'])
plt.xlabel('learning rate')
plt.ylabel('loss')
plt.title('Learning rate vs Loss')
plt.show();

We can see that loss is still decreasing with learning rate between 0.001 to 0.01. For the above curve we can see that 0.003 is a good option

### Final model creation

In [None]:
# model creation
tf.random.set_seed(42)

model_F = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_F.compile(loss=tf.keras.losses.binary_crossentropy,
             optimizer=tf.keras.optimizers.Adam(learning_rate=0.002),
             metrics=['accuracy'])

history_F = model_F.fit(X_train, y_train, epochs=50, verbose=0)

In [None]:
model_F.evaluate(X_test, y_test)

Loss and accuracy curve

In [None]:
pd.DataFrame(history_F.history).plot(xlabel='epochs', figsize=(6,4))

### **So, The accuracy by our model on test data is 90.16%**

In [None]:
# Check model summary
model.summary()

### Prediction by our model

In [None]:
y_pred = tf.round(model_F.predict(X_test))
y_pred[:10]

### Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
#Visualize confusion matrix
plt.figure(figsize = (8, 8))
sns.heatmap(confusion_matrix(y_test, tf.round(y_pred)), cmap = 'Blues', annot = True, fmt = 'd', linewidths = 5, cbar = False, annot_kws = {'fontsize': 15},
           yticklabels = ['No stroke', 'Stroke'], xticklabels = ['Predicted no stroke', 'Predicted stroke'])
plt.yticks(rotation = 0)
plt.show()