**DATA 2060 Final Notebook**

blah blah

In [1]:
import numpy as np
import itertools as it
import logistic_regression as lr

In [2]:
### One vs. All Logistic Regression multi-class regression classifier ###

class OneVsAll:
    def __init__(
        self,
        n_features: int,
        n_classes: int,
        batch_size: int,
        conv_threshold: float
    ) -> None:
        self.n_classes = n_classes
        self.models = [
            lr.LogisticRegression(n_features, 2, batch_size, conv_threshold) 
            for _ in range(n_classes)
        ]

    def train(self, X: np.ndarray, Y: np.ndarray) -> None:
        # Train a binary classifier for each class against all others
        for class_label in range(self.n_classes):
            binary_labels = (Y == class_label).astype(int)
            self.models[class_label].train(X, binary_labels)

    def predict(self, X: np.ndarray) -> np.ndarray:
        # Get the probabilities of each class for each data point
        probabilities = np.array([
            model.predict(X) for model in self.models
        ])

        # Return the class with the highest probability for each data point
        return np.argmax(probabilities, axis=0)

    def accuracy(self, X: np.ndarray, Y: np.ndarray) -> float:
        predictions = self.predict(X)
        return np.mean(predictions == Y)


In [3]:
### All Pairs Logistic Regression multi-class regression classifier ###

class AllPairs:
    def __init__(
        self,
        n_features: int,
        n_classes: int,
        batch_size: int,
        conv_threshold: float,
    ):
        self.n_classes = n_classes
        self.pairs = list(it.combinations(range(n_classes), 2))
        self.models = {
            (i, j): lr.LogisticRegression(
                n_features,
                2,
                batch_size,
                conv_threshold
            )
            for i, j in self.pairs
        }

    def train(self, X: np.ndarray, Y: np.ndarray) -> None:
        # Iterate over all pair combinations
        for i, j in self.pairs:
            indices = (Y == i) | (Y == j)

            # Get appropriate data
            X_pair, Y_pair = X[indices], Y[indices]
            Y_pair = (Y_pair == i).astype(int)

            # Train on the pair
            self.models[(i, j)].train(X_pair, Y_pair)

    def predict(self, X: np.ndarray) -> np.ndarray:
        # Count predictions for each class
        votes = np.zeros((len(X), self.n_classes))
        for (i, j), model in self.models.items():
            predictions = model.predict(X)
            votes[:, i] += predictions
            votes[:, j] += 1 - predictions

        # Return the class with the most predictions
        return np.argmax(votes, axis=1)

    def accuracy(self, X: np.ndarray, Y: np.ndarray) -> float:
        predictions = self.predict(X)
        return np.mean(predictions == Y)


In [4]:
### Dummy test ###

n_samples = 500
n_features = 10
n_classes = 5
X = np.random.rand(n_samples, n_features + 1)
Y = np.random.randint(0, n_classes, n_samples)

# One-vs-All
ova = OneVsAll(n_features, n_classes, batch_size=32, conv_threshold=1e-4)
ova.train(X, Y)
print(f"One-vs-All Accuracy: {ova.accuracy(X, Y)}")

# All-Pairs
all_pairs = AllPairs(n_features, n_classes, batch_size=32, conv_threshold=1e-4)
all_pairs.train(X, Y)
print(f"All-Pairs Accuracy: {all_pairs.accuracy(X, Y)}")


One-vs-All Accuracy: 0.192
All-Pairs Accuracy: 0.282
