In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
import pandas as pd
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb


In [None]:
# Read hypertension csv file
htn_read = pd.read_csv('resources/hypertension_data.csv')

htn_read.head()


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,57.0,1.0,3,145,233,1,0,150,0,2.3,0,0,1,1
1,64.0,0.0,2,130,250,0,1,187,0,3.5,0,0,2,1
2,52.0,1.0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56.0,0.0,1,120,236,0,1,178,0,0.8,2,0,2,1
4,66.0,0.0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [None]:
htn_read.count()

age         26083
sex         26058
cp          26083
trestbps    26083
chol        26083
fbs         26083
restecg     26083
thalach     26083
exang       26083
oldpeak     26083
slope       26083
ca          26083
thal        26083
target      26083
dtype: int64

In [None]:
htn_read.nunique()

age          88
sex           2
cp            4
trestbps     49
chol        152
fbs           2
restecg       3
thalach      91
exang         2
oldpeak      40
slope         3
ca            5
thal          4
target        2
dtype: int64

In [None]:
# Checked for missing values then,
# Determine unique values
for x in htn_read.columns:
    print(x, len(htn_read[x].unique()))


age 88
sex 3
cp 4
trestbps 49
chol 152
fbs 2
restecg 3
thalach 91
exang 2
oldpeak 40
slope 3
ca 5
thal 4
target 2


In [None]:
htn_read['age'].value_counts().sort_index()

age
11.0    1
12.0    2
13.0    2
14.0    2
15.0    3
       ..
94.0    6
95.0    6
96.0    3
97.0    4
98.0    2
Name: count, Length: 88, dtype: int64

In [None]:
htn_read["age_category"] = np.where(htn_read['age']<=44, "18-44",
                           np.where(htn_read['age']<=59, "45-59",
                           np.where(htn_read['age']<=69, "60-69", "70+")))
del htn_read['age']
htn_read

Unnamed: 0,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target,age_category
0,1.0,3,145,233,1,0,150,0,2.3,0,0,1,1,45-59
1,0.0,2,130,250,0,1,187,0,3.5,0,0,2,1,60-69
2,1.0,1,130,204,0,0,172,0,1.4,2,0,2,1,45-59
3,0.0,1,120,236,0,1,178,0,0.8,2,0,2,1,45-59
4,0.0,0,120,354,0,1,163,1,0.6,2,0,2,1,60-69
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26078,0.0,0,138,294,1,1,106,0,1.9,1,3,2,0,70+
26079,1.0,0,144,200,0,0,126,1,0.9,1,0,3,0,60-69
26080,1.0,0,100,234,0,1,156,0,0.1,2,1,3,0,60-69
26081,1.0,1,154,232,0,0,164,0,0.0,2,1,2,0,60-69


In [None]:
htn_read.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26083 entries, 0 to 26082
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sex           26058 non-null  float64
 1   cp            26083 non-null  int64  
 2   trestbps      26083 non-null  int64  
 3   chol          26083 non-null  int64  
 4   fbs           26083 non-null  int64  
 5   restecg       26083 non-null  int64  
 6   thalach       26083 non-null  int64  
 7   exang         26083 non-null  int64  
 8   oldpeak       26083 non-null  float64
 9   slope         26083 non-null  int64  
 10  ca            26083 non-null  int64  
 11  thal          26083 non-null  int64  
 12  target        26083 non-null  int64  
 13  age_category  26083 non-null  object 
dtypes: float64(2), int64(11), object(1)
memory usage: 2.8+ MB


In [None]:
htn_read = htn_read.dropna()

In [None]:
dummies_htn_df = pd.get_dummies(htn_read, dtype=int)
dummies_htn_df

Unnamed: 0,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target,age_category_18-44,age_category_45-59,age_category_60-69,age_category_70+
0,1.0,3,145,233,1,0,150,0,2.3,0,0,1,1,0,1,0,0
1,0.0,2,130,250,0,1,187,0,3.5,0,0,2,1,0,0,1,0
2,1.0,1,130,204,0,0,172,0,1.4,2,0,2,1,0,1,0,0
3,0.0,1,120,236,0,1,178,0,0.8,2,0,2,1,0,1,0,0
4,0.0,0,120,354,0,1,163,1,0.6,2,0,2,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26078,0.0,0,138,294,1,1,106,0,1.9,1,3,2,0,0,0,0,1
26079,1.0,0,144,200,0,0,126,1,0.9,1,0,3,0,0,0,1,0
26080,1.0,0,100,234,0,1,156,0,0.1,2,1,3,0,0,0,1,0
26081,1.0,1,154,232,0,0,164,0,0.0,2,1,2,0,0,0,1,0


In [None]:
# Split the data
y = dummies_htn_df["target"]
X = dummies_htn_df.drop("target", axis=1)

In [None]:
y.value_counts()

target
1    14274
0    11784
Name: count, dtype: int64

In [None]:
# Splitting the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1)

**Logistic Regression Model 98% accuracy**

In [None]:
# Train Model
clf = LogisticRegression(random_state=1,max_iter=1000)
clf.fit(X_train, y_train)
# Make predictions
predictions = clf.predict(X_test)

print(accuracy_score(y_test,predictions))
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))

0.8615502686108979
              precision    recall  f1-score   support

           0       0.90      0.78      0.84      2984
           1       0.84      0.93      0.88      3531

    accuracy                           0.86      6515
   macro avg       0.87      0.86      0.86      6515
weighted avg       0.87      0.86      0.86      6515

[[2339  645]
 [ 257 3274]]


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the neural network
model = Sequential()
model.add(Dense(60, input_shape=(X_train.shape[1],), activation="relu"))
model.add(BatchNormalization())
model.add(Dense(120, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1, activation="sigmoid"))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Make predictions
predictions = model.predict(X_test_scaled)
predictions = (predictions > 0.5).astype(int)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, predictions))
print("Classification Report:")
print(classification_report(y_test, predictions))
print("Confusion Matrix:")
print(confusion_matrix(y_test, predictions))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78