In [41]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from collections import Counter
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Loading and preparing the dataset
dataset = pd.read_csv("/content/heart.csv")
X = dataset.drop("target", axis=1).values
y = dataset["target"].values

# Standardizing the feature set for Logistic Regression
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

### Logistic Regression Implementation from Scratch
class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.05, n_iterations=5000):  # Adjusted learning rate and iterations
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return [1 if i > 0.5 else 0 for i in y_predicted]

# Random Forest and Decision Tree Implementations from Scratch
class DecisionTreeScratch:
    def __init__(self, max_depth=10):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        self.tree = self._grow_tree(X, y)

    def _grow_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

        if depth >= self.max_depth or n_labels == 1 or n_samples < 2:
            return np.bincount(y).argmax()

        best_feat, best_thresh = self._best_split(X, y)
        left_indices = X[:, best_feat] < best_thresh
        right_indices = X[:, best_feat] >= best_thresh
        left = self._grow_tree(X[left_indices], y[left_indices], depth + 1)
        right = self._grow_tree(X[right_indices], y[right_indices], depth + 1)
        return (best_feat, best_thresh, left, right)

    def _best_split(self, X, y):
        best_gini = 1.0
        split_idx, split_thresh = None, None
        for i in range(X.shape[1]):
            thresholds = np.unique(X[:, i])
            for t in thresholds:
                left = y[X[:, i] < t]
                right = y[X[:, i] >= t]
                gini = self._gini_impurity(left, right)
                if gini < best_gini:
                    best_gini = gini
                    split_idx, split_thresh = i, t
        return split_idx, split_thresh

    def _gini_impurity(self, left, right):
        def gini(y):
            classes, counts = np.unique(y, return_counts=True)
            return 1.0 - sum((count / len(y)) ** 2 for count in counts)

        n = len(left) + len(right)
        gini_left = gini(left)
        gini_right = gini(right)
        return (len(left) / n) * gini_left + (len(right) / n) * gini_right

    def predict(self, X):
        return [self._predict(inputs, self.tree) for inputs in X]

    def _predict(self, inputs, node):
        if not isinstance(node, tuple):
            return node
        feature, threshold, left, right = node
        if inputs[feature] < threshold:
            return self._predict(inputs, left)
        else:
            return self._predict(inputs, right)

class RandomForestScratch:
    def __init__(self, n_trees=20, max_depth=10):  # Increased trees and depth
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.trees = []

    def fit(self, X, y):
        for _ in range(self.n_trees):
            idxs = np.random.choice(len(X), len(X), replace=True)
            X_sample, y_sample = X[idxs], y[idxs]
            tree = DecisionTreeScratch(max_depth=self.max_depth)
            tree.fit(X_sample, y_sample)
            self.trees.append(tree)

    def predict(self, X):
        tree_preds = np.array([tree.predict(X) for tree in self.trees])
        return [Counter(tree_pred).most_common(1)[0][0] for tree_pred in tree_preds.T]

# Training and Testing Logistic Regression
lr_model = LogisticRegressionScratch(learning_rate=0.05, n_iterations=5000)
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)

# Training and Testing Random Forest
rf_model = RandomForestScratch(n_trees=20, max_depth=10)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)

# Calculating and Displaying Metrics using sklearn's library functions
print("Logistic Regression:")
lr_accuracy = accuracy_score(y_test, lr_predictions) * 100
lr_precision = precision_score(y_test, lr_predictions) * 100
lr_recall = recall_score(y_test, lr_predictions) * 100
lr_f1 = f1_score(y_test, lr_predictions) * 100
print(f"Accuracy: {lr_accuracy:.2f}%")
print(f"Precision: {lr_precision:.2f}%")
print(f"Recall: {lr_recall:.2f}%")
print(f"F1-Score: {lr_f1:.2f}%\n")

print("Random Forest:")
rf_accuracy = accuracy_score(y_test, rf_predictions) * 100
rf_precision = precision_score(y_test, rf_predictions) * 100
rf_recall = recall_score(y_test, rf_predictions) * 100
rf_f1 = f1_score(y_test, rf_predictions) * 100
print(f"Accuracy: {rf_accuracy:.2f}%")
print(f"Precision: {rf_precision:.2f}%")
print(f"Recall: {rf_recall:.2f}%")
print(f"F1-Score: {rf_f1:.2f}%")


Logistic Regression:
Accuracy: 85.25%
Precision: 87.10%
Recall: 84.38%
F1-Score: 85.71%

Random Forest:
Accuracy: 85.25%
Precision: 87.10%
Recall: 84.38%
F1-Score: 85.71%
