In [None]:
class GaussianNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.parameters = {}
        for cls in self.classes:
            X_c = X[y == cls]
            mean = X_c.mean(axis=0)
            var = X_c.var(axis=0) + 1e-9
            prior = X_c.shape[0] / X.shape[0]
            self.parameters[cls] = (mean, var, prior)

    def _predict_row(self, x):
        posteriors = []
        for cls in self.classes:
            mean, var, prior = self.parameters[cls]
            log_likelihood = -0.5 * np.sum(np.log(2 * math.pi * var))
            log_likelihood -= 0.5 * np.sum(((x - mean) ** 2) / var)
            posterior = log_likelihood + np.log(prior)
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        return np.array([self._predict_row(x) for x in X])

    def score(self, X, y):
        predictions = self.predict(X)
        return np.mean(predictions == y)

gnb = GaussianNaiveBayes()
gnb.fit(X_train, y_train)

print("Gausian Naive Bayes - Validation Accuracy: ", gnb.score(X_val, y_val))
print("Gausian Naive Bayes - Test Accuracy: ", gnb.score(X_test, y_test))

In [None]:
from collections import Counter

def gini(groups, classes):
    n_instances = float(sum(len(group) for group in groups))
    gini_score = 0.0
    for group in groups:
        size = len(group)
        if size == 0: continue
        score = sum((list(group[:, -1]).count(cls) / size) ** 2 for cls in classes)
        gini_score += (1 - score) * (size / n_instances)
    return gini_score

def test_split(index, value, dataset):
    left = dataset[dataset[:, index] < value]
    right = dataset[dataset[:, index] >= value]
    return left, right

def get_split(dataset, n_features):
    class_values = list(set(dataset[:, -1]))
    features = random.sample(range(dataset.shape[1] - 1), n_features)
    best_index, best_value, best_score, best_groups = 999, 999, 999, None
    for index in features:
        for row in dataset:
            groups = test_split(index, row[index], dataset)
            score = gini(groups, class_values)
            if score < best_score:
                best_index, best_value, best_score, best_groups = index, row[index], score, groups
    return {'index': best_index, 'value': best_value, 'groups': best_groups}

def to_terminal(group):
    outcomes = [row[-1] for row in group]
    return Counter(outcomes).most_common(1)[0][0]

def split(node, max_depth, min_size, n_features, depth):
    left, right = node['groups']
    del node['groups']

    if left.shape[0] == 0 or right.shape[0] == 0:
        node['left'] = node['right'] = to_terminal(np.vstack((left, right)))
        return

    if depth >= max_depth:
        node['left'], node['right'] = to_terminal(left), to_terminal(right)
        return

    if len(left) <= min_size:
        node['left'] = to_terminal(left)
    else:
        node['left'] = get_split(left, n_features)
        split(node['left'], max_depth, min_size, n_features, depth + 1)

    if len(right) <= min_size:
        node['right'] = to_terminal(right)
    else:
        node['right'] = get_split(right, n_features)
        split(node['right'], max_depth, min_size, n_features, depth + 1)

def build_tree(train, max_depth, min_size, n_features):
    root = get_split(train, n_features)
    split(root, max_depth, min_size, n_features, 1)
    return root

def predict_tree(node, row):
    if row[node['index']] < node['value']:
        if isinstance(node['left'], dict):
            return predict_tree(node['left'], row)
        else:
            return node['left']
    else:
        if isinstance(node['right'], dict):
            return predict_tree(node['right'], row)
        else:
            return node['right']

class RandomForest:
    def __init__(self, n_trees=10, max_depth=10, min_size=1, sample_size=1.0, n_features=None):
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.min_size = min_size
        self.sample_size = sample_size
        self.n_features = n_features
        self.trees = []

    def subsample(self, dataset):
        n_sample = round(len(dataset) * self.sample_size)
        return dataset[np.random.choice(len(dataset), n_sample, replace=True)]

    def fit(self, X, y):
        data = np.hstack((X, y.reshape(-1, 1)))
        self.trees = []
        for _ in range(self.n_trees):
            sample = self.subsample(data)
            tree = build_tree(sample, self.max_depth, self.min_size, self.n_features or int(np.sqrt(X.shape[1])))
            self.trees.append(tree)

    def predict_row(self, row):
        predictions = [predict_tree(tree, row) for tree in self.trees]
        return Counter(predictions).most_common(1)[0][0]

    def predict(self, X):
        return np.array([self.predict_row(row) for row in X])

    def score(self, X, y):
        predictions = self.predict(X)
        return np.mean(predictions == y)

rf = RandomForest(n_trees=10, max_depth=10, min_size=2, sample_size=0.9)
rf.fit(X_train, y_train)

print("\nRandom Forest - Validation Accuracy: ", rf.score(X_val, y_val))
print("Random Forest - Test Accuracy: ", rf.score(X_test, y_test))

In [None]:
class LDA:
    def fit(self, X, y):
        self.classes = np.unique(y)
        n_features = X.shape[1]
        self.means = {}
        self.priors = {}
        self.Sw = np.zeros((n_features, n_features))

        for cls in self.classes:
            X_c = X[y == cls]
            self.means[cls] = np.mean(X_c, axis=0)
            self.priors[cls] = X_c.shape[0] / X.shape[0]
            self.Sw += np.cov(X_c, rowvar=False) * (X_c.shape[0] - 1)

        self.Sw /= (X.shape[0] - len(self.classes))  # Pooled covariance matrix
        self.Sw_inv = np.linalg.inv(self.Sw)

    def _discriminant(self, x, cls):
        mu_k = self.means[cls]
        prior = self.priors[cls]
        return (x @ self.Sw_inv @ mu_k) - 0.5 * (mu_k @ self.Sw_inv @ mu_k) + np.log(prior)

    def predict(self, X):
        return np.array([
            max(self.classes, key=lambda cls: self._discriminant(x, cls)) for x in X
        ])

    def score(self, X, y):
        return np.mean(self.predict(X) == y)

lda = LDA()
lda.fit(X_train, y_train)

print("\nLDA - Validation Accuracy: ", lda.score(X_val, y_val))
print("LDA - Test Accuracy: ", lda.score(X_test, y_test))