In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from imblearn.over_sampling import SMOTE

In [2]:
data = pd.read_csv('Hypertension-risk-model-main.csv')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4240 entries, 0 to 4239
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             4240 non-null   int64  
 1   age                4240 non-null   int64  
 2   currentSmoker      4240 non-null   int64  
 3   CIGARATES PER DAY  4240 non-null   int64  
 4   diabetes           4240 non-null   int64  
 5   TOTAL CHOLESTROL   4240 non-null   int64  
 6   BP                 4240 non-null   float64
 7   BMI                4240 non-null   float64
 8   heartRate          4240 non-null   int64  
 9   glucose            4240 non-null   int64  
 10  Target             4240 non-null   int64  
dtypes: float64(2), int64(9)
memory usage: 364.5 KB


In [4]:
X = data.drop('Target', axis=1)
y = data['Target']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train_scaled, y_train)



In [12]:
model = LogisticRegression(
    penalty='l2', solver='liblinear', C=1.0,
    class_weight='balanced', random_state=42
)
model.fit(X_resampled, y_resampled)

In [13]:
with open('hypertension_model_with_scaler.sav', 'wb') as file:
    pickle.dump({'model': model, 'scaler': scaler}, file)

In [14]:
y_pred = model.predict(X_test_scaled)
y_prob = model.predict_proba(X_test_scaled)[:, 1]

In [15]:
print(" Accuracy:", accuracy_score(y_test, y_pred))
print("\n Classification Report:\n", classification_report(y_test, y_pred))

 Accuracy: 0.8231132075471698

 Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.82      0.86       585
           1       0.67      0.84      0.75       263

    accuracy                           0.82       848
   macro avg       0.80      0.83      0.81       848
weighted avg       0.84      0.82      0.83       848

