In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import RandomOverSampler

In [None]:
db_data = pd.read_csv('heart.csv')
db_data.tail()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
302,57,0,1,130,236,0,0,174,0,0.0,1,1,2,0


In [None]:
db_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [None]:
db_data.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [None]:
db_data['target'].value_counts()

1    165
0    138
Name: target, dtype: int64

In [None]:
X = db_data.drop(columns = "target", axis =1)
Y = db_data["target"]

In [None]:
# Initialize the RandomOverSampler
ros = RandomOverSampler(random_state=42)

In [None]:
# Fit the sampler to your data
X_resampled, y_resampled = ros.fit_resample(X, Y)

In [None]:
scaler = StandardScaler()
X_resampled_scaled = scaler.fit_transform(X_resampled)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_resampled_scaled, y_resampled, test_size = 0.225,  random_state = 4)

In [None]:
clf = SVC()

In [None]:
clf.fit(X_train, Y_train)

In [None]:
# Evaluate your model
accuracy = clf.score(X_test, Y_test)
print("Accuracy:", accuracy)

Accuracy: 0.84


In [None]:
# Define a function to get user input for heart disease detection
def get_heart_input():
    print("Enter the following information:")
    age = float(input("Age: "))
    sex = float(input("Sex (0 for female, 1 for male): "))
    chest_pain_type = float(input("Chest pain type (0 to 3): "))
    resting_blood_pressure = float(input("Resting blood pressure: "))
    cholesterol = float(input("Cholesterol level: "))
    fasting_blood_sugar = float(input("Fasting blood sugar (0 or 1): "))
    resting_ecg = float(input("Resting ECG (0 to 2): "))
    max_heart_rate = float(input("Maximum heart rate: "))
    exercise_induced_angina = float(input("Exercise induced angina (0 or 1): "))
    st_depression = float(input("ST depression induced by exercise: "))
    st_slope = float(input("ST slope (0 to 2): "))
    num_major_vessels = float(input("Number of major vessels (0 to 3): "))
    thal = float(input("Thalassemia type (0 to 3): "))
      # Return user input as a numpy array
    return np.array([[age, sex, chest_pain_type, resting_blood_pressure, cholesterol, fasting_blood_sugar, resting_ecg, max_heart_rate, exercise_induced_angina, st_depression, st_slope, num_major_vessels, thal]])

In [None]:
# Define a function to predict heart disease based on user input
def predict_heart_disease():
    # Get user input
    user_input = get_heart_input()

    # Scale the user input using the trained scaler
    user_input_scaled = scaler.transform(user_input)

    # Make predictions
    prediction = clf.predict(user_input_scaled)

    # Output the prediction
    if prediction == 0:
        print("The model predicts that the person does not have heart disease.")
    else:
        print("The model predicts that the person has heart disease.")

In [None]:
predict_heart_disease()

Enter the following information:
Age: 57
Sex (0 for female, 1 for male): 0
Chest pain type (0 to 3): 0
Resting blood pressure: 140
Cholesterol level: 241
Fasting blood sugar (0 or 1): 0
Resting ECG (0 to 2): 1
Maximum heart rate: 123
Exercise induced angina (0 or 1): 1
ST depression induced by exercise: 0.2
ST slope (0 to 2): 1
Number of major vessels (0 to 3): 0
Thalassemia type (0 to 3): 3
The model predicts that the person does not have heart disease.




In [None]:
import pickle

# Save the model
with open('heart_disease_model.pkl', 'wb') as model_file:
    pickle.dump(clf, model_file)

# Save the scaler
with open('heart_disease_scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)