In [1]:
import classifierAgents
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
file = '/Users/youssefawad/Documents/Kings/term_2/6CCS3ML1_Machine_Learning/coursework/cw1_pacman/good-moves.txt'

data, target = classifierAgents.loadData(file)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

In [4]:
lr_1 = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
lr_2 = LogisticRegression(multi_class='multinomial', solver='newton-cg', max_iter=1000)
lr_3 = LogisticRegression(multi_class='ovr', solver='liblinear', max_iter=1000)

lr_1.fit(X_train, y_train)
lr_2.fit(X_train, y_train)
lr_3.fit(X_train, y_train)

preds_1 = lr_1.predict(X_test)
preds_2 = lr_2.predict(X_test)
preds_3 = lr_3.predict(X_test)

accuracy = lr_1.score(X_test, y_test)
print(accuracy)

accuracy = lr_2.score(X_test, y_test)
print(accuracy)

accuracy = lr_3.score(X_test, y_test)
print(accuracy)

0.6923076923076923
0.6923076923076923
0.6923076923076923




In [5]:
# naive bayes as a baseline
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, y_train)
nb_preds = nb.predict(X_test)
accuracy = nb.score(X_test, y_test)
print(accuracy)

0.6538461538461539


In [6]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

K = max(y_train) + 1  # number of classes
D = X_train.shape[1] # feature space
w = np.zeros((K, D)) # weights
b = np.zeros(K) # biases

def predict_loss_grad(x, y):
    # softmax from https://en.wikipedia.org/wiki/Multinomial_logistic_regression and 8.3.7 Murphy book
    # but added bias term and vectorized
    z = w @ x + b
    z -= np.max(z)
    probs = np.exp(z) / np.sum(np.exp(z))

    # https://en.wikipedia.org/wiki/Multinomial_logistic_regression#Likelihood_function
    loss = -np.log(probs[y])

    # (4.109) PRML book
    t = np.zeros_like(probs)
    t[y] = 1
    grad_w = np.outer(probs - t, x)
    grad_b = probs - t

    return probs, loss, grad_w, grad_b


lr = 0.01
num_epochs = 100
N = len(X_train)

for epoch in range(num_epochs):
    total_loss = 0.0
    grad_w_sum = np.zeros_like(w)
    grad_b_sum = np.zeros_like(b)

    for x, y in zip(X_train, y_train):
        probs, loss, grad_w, grad_b = predict_loss_grad(x, y)
        total_loss += loss
        grad_w_sum += grad_w
        grad_b_sum += grad_b

    # batch gradient descent from slides
    w -= lr * grad_w_sum / N
    b -= lr * grad_b_sum / N

    if epoch % 50 == 0:
        print(f"epoch {epoch}, avg loss: {total_loss / N}")

print(f"w: {w}")
print(f"b: {b}")
print(f"Loss: {total_loss/N}")

epoch 0, avg loss: 1.3862943611198917
epoch 50, avg loss: 1.2901160886331144
w: [[-0.1024507   0.08569766 -0.06174487  0.06355989  0.13198929 -0.00852151
  -0.05024084 -0.0376089   0.          0.          0.          0.
   0.         -0.00260567  0.          0.         -0.00229686  0.
   0.          0.          0.          0.          0.          0.
  -0.00260567]
 [ 0.07074048 -0.09106543  0.10231172 -0.04490459 -0.03942987  0.05760577
  -0.03722856 -0.07284216  0.          0.          0.          0.
   0.         -0.00228917  0.          0.         -0.00257042  0.
   0.          0.          0.          0.          0.          0.
  -0.00228917]
 [-0.07244903  0.05857072 -0.10847654  0.06558604 -0.04215359 -0.01756526
   0.12601463 -0.0267331   0.          0.          0.          0.
   0.          0.00719711  0.          0.         -0.00231277  0.
   0.          0.          0.          0.          0.          0.
   0.00719711]
 [ 0.10415926 -0.05320296  0.06790969 -0.08424133 -0.050405

In [7]:
correct = 0
for x,y in zip(X_test, y_test):
    z = w @ x + b
    z -= np.max(z)
    probs = np.exp(z) / np.sum(np.exp(z))
    pred = np.argmax(probs)
    print(f"pred: {pred}, true: {y}")
    if pred == y:
        correct += 1

accuracy = correct / len(X_test)
print(accuracy)

pred: 0, true: 0
pred: 3, true: 3
pred: 2, true: 2
pred: 3, true: 1
pred: 2, true: 2
pred: 3, true: 3
pred: 3, true: 3
pred: 3, true: 3
pred: 2, true: 2
pred: 3, true: 0
pred: 0, true: 0
pred: 3, true: 3
pred: 1, true: 1
pred: 3, true: 3
pred: 3, true: 2
pred: 3, true: 3
pred: 3, true: 3
pred: 0, true: 1
pred: 3, true: 3
pred: 1, true: 1
pred: 0, true: 0
pred: 3, true: 3
pred: 3, true: 1
pred: 3, true: 1
pred: 3, true: 3
pred: 3, true: 1
0.7307692307692307


In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, ClassifierMixin

param_grid = {
    "learning_rate": [0.001, 0.005, 0.01],
    "num_epochs": [10, 50, 100]
}

class SoftmaxGDClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, learning_rate=0.01, num_epochs=100):
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

    def _softmax(self, x):
        # softmax from https://en.wikipedia.org/wiki/Multinomial_logistic_regression and 8.3.7 Murphy book
        # but added bias term and vectorized
        if x.ndim == 1:
            z = self.w_ @ x + self.b_
            z -= np.max(z)
            return np.exp(z) / np.sum(np.exp(z))
        else:
            z = x @ self.w_.T + self.b_
            z -= np.max(z, axis=1, keepdims=True)
            return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)
    
    def _predict_loss_grad(self, x, y):
        probs = self._softmax(x)

        # https://en.wikipedia.org/wiki/Multinomial_logistic_regression#Likelihood_function
        loss = -np.log(probs[y])

        # (4.109) PRML book
        t = np.zeros_like(probs)
        t[y] = 1
        grad_w = np.outer(probs - t, x)
        grad_b = probs - t

        return loss, grad_w, grad_b
    
    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        N = len(X)
        # adding self.classes_ for gridsearch
        self.classes_ = np.unique(y)
        K = len(self.classes_) # number of classes
        D = X.shape[1] # feature space
        self.w_ = np.zeros((K, D)) # weights
        self.b_ = np.zeros(K) # biases

        for epoch in range(self.num_epochs):
            total_loss = 0.0
            grad_w_sum = np.zeros_like(w)
            grad_b_sum = np.zeros_like(b)

            for x, y in zip(X_train, y_train):
                loss, grad_w, grad_b = self._predict_loss_grad(x, y)
                total_loss += loss
                grad_w_sum += grad_w
                grad_b_sum += grad_b

            # batch gradient descent from slides
            self.w_ -= self.learning_rate * grad_w_sum / N
            self.b_ -= self.learning_rate * grad_b_sum / N

            if epoch % 50 == 0:
                print(f"epoch {epoch}, avg loss: {total_loss / N}")

        return self
    
    def predict_proba(self, X):
        X = np.atleast_2d(X)
        return self._softmax(X)
    
    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

In [9]:
clf = SoftmaxGDClassifier()

gs = GridSearchCV(
    clf,
    param_grid,
    scoring="accuracy",
    cv=5,
    n_jobs=-1
)

gs.fit(X_train, y_train)

print("Best params:", gs.best_params_)
print("Best CV score:", gs.best_score_)

epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 50, avg loss: 1.716797942899476
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 50, avg loss: 1.716797942899476
epoch 0, avg loss: 1.7328679513998648
epoch 0, avg loss: 1.7328679513998648
epoch 50, avg loss: 1.716797942899476
epoch 0, avg

In [10]:
best_clf = gs.best_estimator_

y_test_pred = best_clf.predict(X_test)
accuracy = np.mean(y_test_pred == y_test)
print("Test set accuracy of best model:", accuracy)

Test set accuracy of best model: 0.7307692307692307


In [11]:
best_clf.predict(X_test[0])

array([0])

In [22]:
best_clf