Important Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# We're not going to use sklearn's LogisticRegression
# from sklearn.linear_model import LogisticRegression

ImportError: DLL load failed while importing _c_internal_utils: The specified module could not be found.

Date collection and processing

In [None]:
heart_data=pd.read_csv('Cardiovascular_Disease_Dataset.csv')

In [116]:
heart_data.head()

In [117]:
heart_data.tail()

In [118]:
heart_data.shape

In [119]:
heart_data.info()



In [120]:
heart_data.isnull().sum()

In [121]:
heart_data.describe()

In [122]:
heart_data['target'].value_counts()

1--> defective heart
0-->healthy heart

In [123]:
X=heart_data.drop(columns='target',axis=1)
Y=heart_data['target']
X=X.drop(columns='patientid',axis=1)

In [124]:
print(X)



In [125]:
print(Y)



Splitting the data into training and testing data

In [126]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,stratify=Y,random_state=2)

In [127]:
print(X.shape,X_train.shape,X_test.shape)



## Custom LogisticRegression Implementation

In [None]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, max_iter=1000, lambda_=0.01, verbose=False, scaler=None):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.lambda_ = lambda_  # Regularization parameter
        self.verbose = verbose
        self.weights = None
        self.bias = None
        self.scaler = scaler
        
    def sigmoid(self, z):
        # Clip z to avoid overflow
        z = np.clip(z, -500, 500)
        return 1 / (1 + np.exp(-z))
    
    def initialize_weights(self, n_features):
        # Initialize weights with small random values
        self.weights = np.random.randn(n_features) * 0.01
        self.bias = 0
    
    def compute_cost(self, X, y, predicted):
        # Compute the cost function with L2 regularization
        m = X.shape[0]
        # Avoid log(0) by clipping predicted values
        predicted = np.clip(predicted, 1e-10, 1 - 1e-10)
        cost = (-1/m) * (np.dot(y, np.log(predicted)) + np.dot((1-y), np.log(1-predicted)))
        # Add regularization term (exclude bias from regularization)
        reg_cost = (self.lambda_ / (2*m)) * np.sum(np.square(self.weights))
        return cost + reg_cost
    
    def fit(self, X, y):
        # Convert to numpy arrays if not already
        X = np.asarray(X)
        y = np.asarray(y)
        
        # Get number of samples and features
        m, n_features = X.shape
        
        # Initialize weights and bias
        self.initialize_weights(n_features)
        
        # Gradient descent optimization
        for i in range(self.max_iter):
            # Forward pass: compute predictions
            z = np.dot(X, self.weights) + self.bias
            predictions = self.sigmoid(z)
            
            # Compute gradients
            dw = (1/m) * np.dot(X.T, (predictions - y)) + (self.lambda_ / m) * self.weights
            db = (1/m) * np.sum(predictions - y)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
            
            # Print cost every 100 iterations if verbose
            if self.verbose and i % 100 == 0:
                cost = self.compute_cost(X, y, predictions)
                print(f"Iteration {i}, Cost: {cost}")
        
        return self
    
    def predict_proba(self, X):
        # Convert to numpy array if not already
        X = np.asarray(X)
        
        # Apply feature scaling if available
        if self.scaler is not None:
            X = self.scaler.transform(X)
        
        # Compute probability estimates
        z = np.dot(X, self.weights) + self.bias
        probabilities = self.sigmoid(z)
        
        # Return probabilities for both classes (similar to sklearn API)
        return np.column_stack([1-probabilities, probabilities])
    
    def predict(self, X):
        # Predict class labels (0 or 1)
        probabilities = self.predict_proba(X)[:, 1]  # Probability of class 1
        return (probabilities >= 0.5).astype(int)

## Create and Apply Feature Scaler

In [None]:
from sklearn.preprocessing import StandardScaler

# Create a scaler and fit it to the training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Model Training

In [None]:
# Create and train our custom model with feature scaling
model = LogisticRegression(learning_rate=0.01, max_iter=1000, lambda_=0.01, verbose=True, scaler=scaler)
model.fit(X_train_scaled, Y_train)



Model Evaluation

In [None]:
X_train_prediction = model.predict(X_train_scaled)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy on training data:', training_data_accuracy)



In [None]:
X_test_prediction = model.predict(X_test_scaled)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:
print('Accuracy on test data:', test_data_accuracy)



Building Predicting System

In [None]:
input_data = (58, 1, 2, 140, 300, 0, 0, 140, 0, 1.5, 2, 0)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# Scale the input data
input_data_scaled = scaler.transform(input_data_reshaped)

# Make prediction
prediction = model.predict(input_data_scaled)
print(prediction)

if prediction[0] == 0:
    print('The person does not have a heart disease')
else:
    print('The person has heart disease')





## Save the custom model to a pickle file

In [None]:
import pickle

# Save the trained model to a pickle file
pickle.dump(model, open('heart_disease_model4.pkl', 'wb'))
print('Model saved successfully as heart_disease_model4.pkl')

In [None]:
# Test loading the model back in to make sure it works
loaded_model = pickle.load(open('heart_disease_model4.pkl', 'rb'))
test_prediction = loaded_model.predict(input_data_scaled)
print(f"Test prediction from loaded model: {test_prediction}")

if test_prediction[0] == 0:
    print('The person does not have a heart disease')
else:
    print('The person has heart disease')