In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


def load_data():
    mnist = datasets.fetch_openml("mnist_784", version=1, as_frame=False)
    X, y = mnist.data, mnist.target
    y = y.astype(np.int32)  # Convert target to integers
    return X, y


def split_data(X, y, test_size=0.2, random_state=42):
    return train_test_split(X, y, test_size=test_size, random_state=random_state)


def avg_pooling(image, pool_size):
    pooled_height = image.shape[0] // pool_size[0]
    pooled_width = image.shape[1] // pool_size[1]
    pooled_image = np.zeros((pooled_height, pooled_width))

    for i in range(pooled_height):
        for j in range(pooled_width):
            start_i, end_i = i * pool_size[0], (i + 1) * pool_size[0]
            start_j, end_j = j * pool_size[1], (j + 1) * pool_size[1]
            pooled_image[i, j] = np.mean(image[start_i:end_i, start_j:end_j])

    return pooled_image


def preprocess_data(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled

In [2]:
def train_model(X_train, y_train):
    model = LogisticRegression(
        max_iter=1000, solver="lbfgs", multi_class="multinomial", random_state=42
    )
    model.fit(X_train, y_train)
    return model

In [3]:
def evaluate_model(pred_func, X_test, y_test):
    y_pred = pred_func(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


def predict(model, new_data):
    predictions = model.predict(new_data)
    return predictions

In [4]:
import tqdm

# Load and split the data
X, y = load_data()

pool_size = 7
X_pooled = np.zeros((X.shape[0], (28 // pool_size) * (28 // pool_size)))
for i in tqdm.tqdm(range(X.shape[0])):
    image = X[i].reshape(28, 28)
    pooled_image = avg_pooling(image, (pool_size, pool_size))
    X_pooled[i] = pooled_image.flatten()

X_train, X_test, y_train, y_test = split_data(X_pooled, y)

# Preprocess the data
X_train_scaled, X_test_scaled = preprocess_data(X_train, X_test)

100%|██████████| 70000/70000 [00:03<00:00, 17592.78it/s]


In [5]:
# Train the model
model = train_model(X_train_scaled, y_train)



In [6]:
# Evaluate the model
evaluate_model(model.predict, X_test_scaled, y_test)

Accuracy: 0.7457142857142857
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.68      0.74      1343
           1       0.82      0.90      0.86      1600
           2       0.82      0.75      0.79      1380
           3       0.79      0.77      0.78      1433
           4       0.64      0.69      0.66      1295
           5       0.68      0.61      0.64      1273
           6       0.84      0.87      0.86      1396
           7       0.77      0.81      0.79      1503
           8       0.64      0.69      0.66      1357
           9       0.64      0.64      0.64      1420

    accuracy                           0.75     14000
   macro avg       0.74      0.74      0.74     14000
weighted avg       0.75      0.75      0.75     14000

Confusion Matrix:
 [[ 916   10   23   23   19   77   23    4  237   11]
 [   0 1446    6   36   13   50    3   10   24   12]
 [  11   33 1038   66   60   11   81   12   54   14]
 [  22   5

In [None]:
from neural_bandits.algorithms import LinearThompsonBandit, LinearUCBBandit


n_features = X_train_scaled.shape[1]
alpha = 0.01
n_arms = 10  # Number of classes
# bandit = LinearThompsonBandit(n_arms, n_features * n_arms)
bandit = LinearUCBBandit(n_arms, n_features * n_arms)

In [15]:
for t in tqdm.tqdm(range(X_train_scaled.shape[0])):
    contextualised_actions = np.outer(np.identity(n_arms), X_train_scaled[t]).reshape(
        n_arms, n_features * n_arms
    )
    chosen_arm = bandit.select_arm(contextualised_actions)
    reward = 1 if y_train[t] == chosen_arm else 0

    bandit.update_step(reward, contextualised_actions[chosen_arm])

100%|██████████| 56000/56000 [01:44<00:00, 533.95it/s]


In [9]:
# batch = 1000
# buffer_reward = []
# buffer_contextualised_actions = []
# for t in tqdm.tqdm(range(X_train_scaled.shape[0])):
#     # for t in tqdm.tqdm(range(1)):
#     contextualised_actions = np.outer(np.identity(n_arms), X_train_scaled[t]).reshape(
#         n_arms, n_features * n_arms
#     )
#     chosen_arm = bandit.select_arm(contextualised_actions)
#     reward = 1 if y_train[t] == chosen_arm else 0
#     buffer_reward.append(reward)
#     buffer_contextualised_actions.append(contextualised_actions[chosen_arm])

#     if t % batch == 99:
#         bandit.update_batch(buffer_contextualised_actions, buffer_reward)
#         buffer_reward = []
#         buffer_contextualised_actions = []

In [16]:
# X_test, y_test
X_test_contextualised_actions = np.ndarray(
    shape=(X_test_scaled.shape[0], n_arms, n_features * n_arms)
)

for t in tqdm.tqdm(range(X_test_scaled.shape[0])):
    contextualised_actions = np.outer(np.identity(n_arms), X_test_scaled[t]).reshape(
        n_arms, n_features * n_arms
    )
    X_test_contextualised_actions[t] = contextualised_actions

evaluate_model(bandit.predict, X_test_contextualised_actions, y_test)

100%|██████████| 14000/14000 [00:00<00:00, 89824.64it/s]


Accuracy: 0.7248571428571429
Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.67      0.73      1343
           1       0.74      0.88      0.80      1600
           2       0.89      0.69      0.78      1380
           3       0.82      0.73      0.77      1433
           4       0.59      0.70      0.64      1295
           5       0.76      0.53      0.63      1273
           6       0.78      0.86      0.82      1396
           7       0.74      0.78      0.76      1503
           8       0.59      0.72      0.65      1357
           9       0.63      0.63      0.63      1420

    accuracy                           0.72     14000
   macro avg       0.74      0.72      0.72     14000
weighted avg       0.74      0.72      0.72     14000

Confusion Matrix:
 [[ 906   23   11   22   16   51   66    2  220   26]
 [   0 1413    4   31   37   24   19   25   35   12]
 [  19   74  949   62   68   11  119   11   52   15]
 [  18   7