In [None]:
import tarfile
import numpy as np
import os
import gzip
import pickle
from matplotlib import pyplot as plt
import tempfile
from sklearn.model_selection import train_test_split

In [None]:
def unpickle(file): # ref: https://www.cs.toronto.edu/~kriz/cifar.html
  train_data = []
  train_label = []
  test_data = []
  test_label = []
  with tarfile.open(file, 'r:gz') as t:
    t.extractall('./data')
    is_test_file = False

    for member in t.getnames():
      if ('test_batch' in member):
        is_test_file = True

      filename = os.path.join('.','data', member)
      try:
        with open(filename, 'rb') as fo:
          batch = pickle.load(fo, encoding='latin1')
          data_file, labels_file = batch['data'], batch['labels']
          if (is_test_file == False):
            train_data.append(data_file)
            train_label.append(labels_file)
          else :
            test_data.append(data_file)
            test_label.append(labels_file)
            is_test_file = False
      except:
        print("Error not data files")
  return np.concatenate(train_data), np.concatenate(test_data), np.concatenate(train_label), np.concatenate(test_label)


train_data_raw, test_data_raw, train_label_raw, test_label_raw = unpickle('cifar-10-python.tar.gz')




Error not data files
Error not data files
Error not data files


In [None]:
# normalize [0, 255] --> [0, 1]

def greyscale_img(img_data):
  img_data_split = np.array(np.split(img_data, 3, axis=1))
  normalize_arr = (np.mean(img_data_split, axis=0, keepdims=True) / 255).reshape(img_data_split.shape[1], img_data_split.shape[2])
  return np.array(normalize_arr)


def onehot_encoding(labels, num_of_classifier = 9):
  labels_onehot = np.zeros((labels.shape[0], num_of_classifier + 1))
  # for i in range(labels.shape[0]):
  #   labels_onehot[i, labels[i]] = 1
  # optimize version
  labels_onehot[np.arange(labels.shape[0]), labels] = 1
  return labels_onehot

def add_ones(X):
  ones = np.ones((X.shape[0], 1))
  return np.concatenate((ones, X), axis=1)

In [None]:
train_X_pre_val = (train_data_raw)
test_X_pre_val = (test_data_raw)
train_Y_pre_val = onehot_encoding(train_label_raw)
test_Y_pre_val= onehot_encoding(test_label_raw)


In [None]:

n_rimages = 10; n_cimages = 10
padding = 2
canvas = 0.5 * np.ones((n_rimages * (32 + 2 * padding), n_cimages * (32 + 2 * padding)))
rand_idxs = np.random.permutation(np.arange(len(train_data_raw))[:n_rimages * n_cimages])
for r in range(n_rimages):
    for c in range(n_cimages):
        i = r * n_cimages + c
        image = train_data_raw[rand_idxs[i]].reshape(32, 32)
        temp1 = r * (32 + 2 * padding) + padding
        temp2 = c * (32 + 2 * padding) + padding
        canvas[temp1:temp1 + 32, temp2:temp2 + 32] = image
plt.imshow(canvas, cmap='gray', vmin=0, vmax=1)
plt.grid(None); plt.axis('off');

ValueError: cannot reshape array of size 3072 into shape (32,32)

In [None]:


train_X_ones =(train_X_pre_val)
print(train_X_ones.shape)

(50000, 3072)


In [None]:
train_X, val_X, train_Y, val_Y = train_test_split(train_X_ones, train_Y_pre_val, test_size=0.25, random_state=1)

In [None]:
print(train_X.shape)
print(val_X.shape)
print(train_Y.shape)
print(val_Y.shape)

(37500, 3072)
(12500, 3072)
(37500, 10)
(12500, 10)


## Section Tensorflow

In [None]:
import tensorflow as tf
import keras

In [None]:
class MLP_tensorflow:
  def __init__(self, num_of_layers = 3, num_of_node_input = train_X.shape[1], num_of_node_output = 10):
    self.num_of_layers = num_of_layers
    self.num_of_node_input = num_of_node_input
    self.num_of_node_output = num_of_node_output
    self.num_of_node_hidden = (2 / 3) * (self.num_of_node_input + self.num_of_node_output)
    self.lr = 0.02



In [None]:
# model = tf.keras.models.Sequential()

# model.add(tf.keras.Input(train_X))

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import numpy as np
import time

# Assuming train_X, val_X, train_Y, val_Y are preloaded and have the following shapes:
# train_X: (37500, 1025), val_X: (12500, 1025)
# train_Y: (37500, 10), val_Y: (12500, 10)


# Define the MLP model
def build_mlp(input_dim, output_dim):
    model = Sequential([
        Dense(512, activation='relu', input_shape=(input_dim,)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dense(output_dim, activation='softmax')
    ])
    return model

# Initialize model
input_dim = train_X.shape[1]
output_dim = train_Y.shape[1]
model = build_mlp(input_dim, output_dim)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

# Train the model
batch_size = 128
epochs = 200

start_time = time.time()
history = model.fit(
    train_X, train_Y,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(val_X, val_Y),
    callbacks=[early_stopping],
    verbose=1
)
training_time = time.time() - start_time
print(f"Training time: {training_time:.2f} seconds")

# Evaluate the model
val_loss, val_accuracy = model.evaluate(val_X, val_Y, verbose=0)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

test_loss, test_accuracy = model.evaluate(test_X_pre_val, test_Y_pre_val, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy* 100:.2f}%")
# Predictions and metrics
y_pred = np.argmax(model.predict(val_X), axis=-1)
y_true = np.argmax(val_Y, axis=-1)

conf_matrix = confusion_matrix(y_true, y_pred)
class_report = classification_report(y_true, y_pred, target_names=[
    f"Class {i}" for i in range(10)
])

print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)


gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    memory_info = tf.config.experimental.get_memory_info('GPU:0')
    print(f"GPU memory used: {memory_info['current'] / 1024**2:.2f} MB")

# Save model
model.save("mlp_cifar10_model.h5")
print("Model saved as 'mlp_cifar10_model.h5'")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 19ms/step - accuracy: 0.2846 - loss: 2.0916 - val_accuracy: 0.3486 - val_loss: 1.8429
Epoch 2/200
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.3898 - loss: 1.7037 - val_accuracy: 0.2822 - val_loss: 2.0821
Epoch 3/200
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4246 - loss: 1.6198 - val_accuracy: 0.3727 - val_loss: 1.7449
Epoch 4/200
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4371 - loss: 1.5727 - val_accuracy: 0.3558 - val_loss: 1.8049
Epoch 5/200
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4482 - loss: 1.5387 - val_accuracy: 0.4223 - val_loss: 1.6240
Epoch 6/200
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4574 - loss: 1.5139 - val_accuracy: 0.4312 - val_loss: 1.6399
Epoch 7/200
[1m293/



Confusion Matrix:
 [[676  27  45  41  23  13  63  58 167 112]
 [ 77 637  31  38  16  12  53  29  82 263]
 [137  15 450  93  58  57 249  96  44  28]
 [ 60  16  97 396  25 115 315  71  57  74]
 [ 81   7 195  69 364  29 325 131  45  33]
 [ 49  16 126 284  45 349 251  88  54  40]
 [ 21  16  70  80  42  30 933  29  19  38]
 [ 50  15  75  72  59  59 109 690  30  92]
 [134  61  18  43  12   8  19  13 786 119]
 [ 65 126  20  33  12  23  56  48  58 820]]
Classification Report:
               precision    recall  f1-score   support

     Class 0       0.50      0.55      0.53      1225
     Class 1       0.68      0.51      0.59      1238
     Class 2       0.40      0.37      0.38      1227
     Class 3       0.34      0.32      0.33      1226
     Class 4       0.55      0.28      0.38      1279
     Class 5       0.50      0.27      0.35      1302
     Class 6       0.39      0.73      0.51      1278
     Class 7       0.55      0.55      0.55      1251
     Class 8       0.59      0.65      

In [30]:
import jax
import jax.numpy as jnp
from jax import random, grad, jit
import optax
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import time

# Assuming train_X, val_X, train_Y, val_Y, test_X_pre_val, test_Y_pre_val are preloaded
# Normalize the dataset
train_X = (train_X - train_X.mean(axis=0)) / train_X.std(axis=0)
val_X = (val_X - val_X.mean(axis=0)) / val_X.std(axis=0)
test_X_pre_val = (test_X_pre_val - test_X_pre_val.mean(axis=0)) / test_X_pre_val.std(axis=0)

# Define the model with Xavier initialization
def init_params(key, input_dim, output_dim):
    key1, key2, key3, key4 = random.split(key, 4)
    params = {
        'W1': random.uniform(key1, (input_dim, 512), minval=-jnp.sqrt(6 / (input_dim + 512)), maxval=jnp.sqrt(6 / (input_dim + 512))),
        'b1': jnp.zeros((512,)),
        'W2': random.uniform(key2, (512, 256), minval=-jnp.sqrt(6 / (512 + 256)), maxval=jnp.sqrt(6 / (512 + 256))),
        'b2': jnp.zeros((256,)),
        'W3': random.uniform(key3, (256, 128), minval=-jnp.sqrt(6 / (256 + 128)), maxval=jnp.sqrt(6 / (256 + 128))),
        'b3': jnp.zeros((128,)),
        'W4': random.uniform(key4, (128, output_dim), minval=-jnp.sqrt(6 / (128 + output_dim)), maxval=jnp.sqrt(6 / (128 + output_dim))),
        'b4': jnp.zeros((output_dim,))
    }
    return params

def forward(params, x, is_training=True, dropout_rate=0.3):
    rng = random.PRNGKey(42)  # Use a random key for reproducibility
    x = jnp.dot(x, params['W1']) + params['b1']
    x = jax.nn.relu(x)
    if is_training:
        mask = random.bernoulli(rng, p=1-dropout_rate, shape=x.shape)
        x = x * mask / (1 - dropout_rate)
    x = jnp.dot(x, params['W2']) + params['b2']
    x = jax.nn.relu(x)
    if is_training:
        mask = random.bernoulli(rng, p=1-dropout_rate, shape=x.shape)
        x = x * mask / (1 - dropout_rate)
    x = jnp.dot(x, params['W3']) + params['b3']
    x = jax.nn.relu(x)
    x = jnp.dot(x, params['W4']) + params['b4']
    return jax.nn.softmax(x)

# Loss and accuracy functions
def loss_fn(params, x, y):
    preds = forward(params, x)
    return -jnp.mean(jnp.sum(y * jnp.log(preds + 1e-7), axis=1))

def accuracy_fn(params, x, y):
    preds = forward(params, x, is_training=False)
    return jnp.mean(jnp.argmax(preds, axis=1) == jnp.argmax(y, axis=1))

# Initialize parameters
key = random.PRNGKey(42)
input_dim = train_X.shape[1]
output_dim = train_Y.shape[1]
params = init_params(key, input_dim, output_dim)

# Optimizer with learning rate scheduling
lr_schedule = optax.exponential_decay(init_value=0.001, transition_steps=100, decay_rate=0.9)
optimizer = optax.chain(optax.adam(lr_schedule))
opt_state = optimizer.init(params)

@jit
def update(params, opt_state, x, y):
    grads = grad(loss_fn)(params, x, y)
    updates, opt_state = optimizer.update(grads, opt_state)
    params = optax.apply_updates(params, updates)
    return params, opt_state

# Training loop
batch_size = 128
epochs = 200
patience = 10
best_val_loss = float('inf')
patience_counter = 0

start_time = time.time()
for epoch in range(epochs):
    # Shuffle data
    permutation = np.random.permutation(train_X.shape[0])
    train_X_shuffled = train_X[permutation]
    train_Y_shuffled = train_Y[permutation]

    # Mini-batch training
    for i in range(0, train_X.shape[0], batch_size):
        x_batch = train_X_shuffled[i:i+batch_size]
        y_batch = train_Y_shuffled[i:i+batch_size]
        params, opt_state = update(params, opt_state, x_batch, y_batch)

    # Validation
    val_loss = loss_fn(params, val_X, val_Y)
    val_acc = accuracy_fn(params, val_X, val_Y)
    print(f"Epoch {epoch+1}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        best_params = params
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered")
            break

training_time = time.time() - start_time
print(f"Training time: {training_time:.2f} seconds")

# Evaluation
test_acc = accuracy_fn(best_params, test_X_pre_val, test_Y_pre_val)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

# Predictions and metrics
val_preds = jnp.argmax(forward(best_params, val_X, is_training=False), axis=1)
val_true = jnp.argmax(val_Y, axis=1)

conf_matrix = confusion_matrix(val_true, val_preds)
class_report = classification_report(val_true, val_preds, target_names=[
    f"Class {i}" for i in range(10)
])

print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)


Epoch 1, Val Loss: 1.8005, Val Acc: 0.4025
Epoch 2, Val Loss: 1.7066, Val Acc: 0.4469
Epoch 3, Val Loss: 1.6428, Val Acc: 0.4717
Epoch 4, Val Loss: 1.6042, Val Acc: 0.4833
Epoch 5, Val Loss: 1.5859, Val Acc: 0.4901
Epoch 6, Val Loss: 1.5673, Val Acc: 0.5016
Epoch 7, Val Loss: 1.5518, Val Acc: 0.5074
Epoch 8, Val Loss: 1.5517, Val Acc: 0.5112
Epoch 9, Val Loss: 1.5442, Val Acc: 0.5106
Epoch 10, Val Loss: 1.5406, Val Acc: 0.5146
Epoch 11, Val Loss: 1.5365, Val Acc: 0.5190
Epoch 12, Val Loss: 1.5348, Val Acc: 0.5181
Epoch 13, Val Loss: 1.5340, Val Acc: 0.5187
Epoch 14, Val Loss: 1.5338, Val Acc: 0.5195
Epoch 15, Val Loss: 1.5320, Val Acc: 0.5211
Epoch 16, Val Loss: 1.5326, Val Acc: 0.5211
Epoch 17, Val Loss: 1.5325, Val Acc: 0.5210
Epoch 18, Val Loss: 1.5320, Val Acc: 0.5206
Epoch 19, Val Loss: 1.5322, Val Acc: 0.5203
Epoch 20, Val Loss: 1.5321, Val Acc: 0.5206
Epoch 21, Val Loss: 1.5322, Val Acc: 0.5214
Epoch 22, Val Loss: 1.5322, Val Acc: 0.5213
Epoch 23, Val Loss: 1.5321, Val Acc: 0.52

In [None]:
!pip install mxnet-mkl==1.6.0 numpy==1.23.1

Collecting mxnet-mkl==1.6.0
  Downloading mxnet_mkl-1.6.0-py2.py3-none-manylinux1_x86_64.whl.metadata (3.6 kB)
Collecting numpy==1.23.1
  Downloading numpy-1.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB)
Downloading mxnet_mkl-1.6.0-py2.py3-none-manylinux1_x86_64.whl (76.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.7/76.7 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.0/17.0 MB[0m [31m85.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, mxnet-mkl
  Attempting uninstall: numpy
    Found existing installation: numpy 1.23.5
    Uninstalling numpy-1.23.5:
      Successfully uninstalled numpy-1.23.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of t

In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding
!pip install chainer

Collecting chainer
  Downloading chainer-7.8.1.tar.gz (1.0 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m46.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: chainer
  Building wheel for chainer (setup.py) ... [?25l[?25hdone
  Created wheel for chainer: filename=chainer-7.8.1-py3-none-any.whl size=971816 sha256=dc72ce5b754c5bee675bde195358d66d2dbdb18913deb11026c94d478ad57397
  Stored in directory: /root/.cache/pip/wheels/c4/95/6a/16014db6f761c4e742755b64aac60dbe142da1df6c5919f790
Successfully built chainer
Installing collected packages: chainer
Successfully installed chainer-7.8.1


In [None]:
%env PYTHONUTF8=1
!pip install chainer==7.8.1


env: PYTHONUTF8=1


NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968

In [None]:
!pip install mxnet


Collecting mxnet
  Downloading mxnet-1.9.1-py3-none-manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting numpy<2.0.0,>1.16.0 (from mxnet)
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting graphviz<0.9.0,>=0.8.1 (from mxnet)
  Downloading graphviz-0.8.4-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading mxnet-1.9.1-py3-none-manylinux2014_x86_64.whl (49.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.1/49.1 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading graphviz-0.8.4-py2.py3-none-any.whl (16 kB)
Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m101.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, graphviz, mxnet
