In [5]:
import pandas as pd
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv("/Users/s_lokesh/Downloads/heart_disease_uci.csv")

# Display first few rows
print(df.head())

# Check data types and missing values
print(df.info())
print(df.isnull().sum())

# Drop non-numeric columns
non_numeric_cols = df.select_dtypes(include=['object']).columns
df = df.drop(columns=non_numeric_cols)  # Remove text-based columns

# Handle missing values (only for numeric columns)
df.fillna(df.mean(numeric_only=True), inplace=True)

# Select 'age' and 'chol' as features, 'num' as output (converted to binary)
X1 = df['age'].values
X2 = df['chol'].values
y = (df['num'] > 0).astype(int).values  # Convert 'num' > 0 to 1 (heart disease)

# Normalize features using Min-Max Scaling
X1_min, X1_max = min(X1), max(X1)
X2_min, X2_max = min(X2), max(X2)

X1 = (X1 - X1_min) / (X1_max - X1_min)
X2 = (X2 - X2_min) / (X2_max - X2_min)

print(f"Processed Data:\nX1 (Age): {X1[:5]}\nX2 (Cholesterol): {X2[:5]}\ny (Target): {y[:5]}")


   id  age     sex    dataset               cp  trestbps   chol    fbs  \
0   1   63    Male  Cleveland   typical angina     145.0  233.0   True   
1   2   67    Male  Cleveland     asymptomatic     160.0  286.0  False   
2   3   67    Male  Cleveland     asymptomatic     120.0  229.0  False   
3   4   37    Male  Cleveland      non-anginal     130.0  250.0  False   
4   5   41  Female  Cleveland  atypical angina     130.0  204.0  False   

          restecg  thalch  exang  oldpeak        slope   ca  \
0  lv hypertrophy   150.0  False      2.3  downsloping  0.0   
1  lv hypertrophy   108.0   True      1.5         flat  3.0   
2  lv hypertrophy   129.0   True      2.6         flat  2.0   
3          normal   187.0  False      3.5  downsloping  0.0   
4  lv hypertrophy   172.0  False      1.4    upsloping  0.0   

                thal  num  
0       fixed defect    0  
1             normal    2  
2  reversable defect    1  
3             normal    0  
4             normal    0  
<class '

In [20]:
# Manually split into 80% training and 20% testing
split_ratio = 0.8
split_index = int(len(X1) * split_ratio)

X1_train, X1_test = X1[:split_index], X1[split_index:]
X2_train, X2_test = X2[:split_index], X2[split_index:]
y_train, y_test = y[:split_index], y[split_index:]


In [21]:
# Sigmoid Function
def sigmoid(z):
    return 1 / (1 + math.exp(-z))

# Logistic Regression Training
def logistic_regression(X1, X2, y, learning_rate=0.01, epochs=1000):
    w1, w2, b = 0, 0, 0  # Initialize weights and bias
    N = len(X1)  # Number of samples

    for epoch in range(epochs):
        dw1, dw2, db = 0, 0, 0

        for i in range(N):
            z = w1 * X1[i] + w2 * X2[i] + b
            y_pred = sigmoid(z)

            error = y_pred - y[i]

            # Compute gradients
            dw1 += (1/N) * error * X1[i]
            dw2 += (1/N) * error * X2[i]
            db += (1/N) * error

        # Update weights
        w1 -= learning_rate * dw1
        w2 -= learning_rate * dw2
        b -= learning_rate * db

        if epoch % 100 == 0:
            loss = -sum(y[i] * math.log(sigmoid(w1 * X1[i] + w2 * X2[i] + b)) +
                        (1 - y[i]) * math.log(1 - sigmoid(w1 * X1[i] + w2 * X2[i] + b))
                        for i in range(N)) / N
            print(f"Epoch {epoch}: Loss = {loss:.4f}")

    return w1, w2, b

# Train the Model
w1_final, w2_final, b_final = logistic_regression(X1_train, X2_train, y_train, learning_rate=0.01, epochs=1000)
print(f"Final Parameters: w1 = {w1_final:.4f}, w2 = {w2_final:.4f}, b = {b_final:.4f}")


Epoch 0: Loss = 0.6931
Epoch 100: Loss = 0.6920
Epoch 200: Loss = 0.6909
Epoch 300: Loss = 0.6899
Epoch 400: Loss = 0.6889
Epoch 500: Loss = 0.6878
Epoch 600: Loss = 0.6869
Epoch 700: Loss = 0.6859
Epoch 800: Loss = 0.6849
Epoch 900: Loss = 0.6840
Final Parameters: w1 = 0.2139, w2 = -0.2338, b = -0.0078


In [22]:
# Prediction Function
def predict(age, chol):
    age_scaled = (age - X1_min) / (X1_max - X1_min)
    chol_scaled = (chol - X2_min) / (X2_max - X2_min)
    prob = sigmoid(w1_final * age_scaled + w2_final * chol_scaled + b_final)
    return 1 if prob > 0.5 else 0

# Example Prediction
new_age, new_chol = 55, 240
print(f"Predicted Heart Disease (1=Yes, 0=No): {predict(new_age, new_chol)}")


Predicted Heart Disease (1=Yes, 0=No): 1


In [23]:
# Evaluate on test set
y_pred_test = [predict(X1_test[i] * (X1_max - X1_min) + X1_min, 
                       X2_test[i] * (X2_max - X2_min) + X2_min) for i in range(len(X1_test))]

# Calculate Accuracy
accuracy = sum(y_pred_test[i] == y_test[i] for i in range(len(y_test))) / len(y_test)
print(f"Accuracy on Test Data: {accuracy:.4f}")


Accuracy on Test Data: 0.7554
