In [22]:
%%writefile util.py

import tensorflow as tf
from tensorflow.keras import layers, models, datasets, utils, regularizers, callbacks
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from google.colab import files


def exportModel(history, polarity, amsgrad, model):
  """
  this function allows us to save the model in its final state
  for later experimentation, use, and reproducability storage

  NOTE: this function is designed for google colab
  """




  fileName="cifar100_"+str(polarity)+"_"+str(amsgrad)+".keras"
  print(fileName)
  model.save(fileName)
  #files.download(fileName)


def getData(reproSeed=1):
  """
  this function not only grabs the data from the source
  but also does a true 3 way split
  as a 2 way split is only performed by default
  which does not provide validation data

  this function also prepares the data in a way
  that works well with our model setup
  """

  # first split, from obtaining the data
  (trainValImgs, trainValLabels),(testImgs, testLabels)= datasets.cifar100.load_data( )

  # normalize the values because 255 format
  # makes things difficult
  trainValImgs = trainValImgs/255
  testImgs = testImgs/255

  #flatten to ensure compatable with later functions
  trainValLabels = trainValLabels.ravel()
  testLabels = testLabels.ravel()


  # second split is needed to run testing
  trainImgs, valImgs, trainLabels, valLabels = train_test_split(trainValImgs,
                                                              trainValLabels,
                                                              test_size=0.25,
                                                              random_state=reproSeed)

  return trainImgs, valImgs, trainLabels, valLabels, testImgs, testLabels


Overwriting util.py


In [23]:
%%writefile model.py

import tensorflow as tf
from tensorflow.keras import layers, models, datasets, utils, regularizers, callbacks
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from google.colab import files



def modelMaker(amsgrad= False, polarity = "up"):
  """
  this function creates and compiles a model

  there are two settings for the architecture:
    'up'   : the convolutional layers build up in filter count
    'down' : the convolutional layers build down on filter count
    (all kernels are the same size)

  there are two settings for hyperparamters
    'True' : ams grad is on
    'False': ams grad is off


  """

  match polarity:
    case "up":
      model = models.Sequential([
        layers.Input(shape=(32, 32, 3)),
        layers.BatchNormalization(),
        #layers.Conv2D(32, 3, strides=2, padding='same', use_bias=False),

        layers.Conv2D(32, 3, padding='same', use_bias=False), #128
        layers.Activation('swish'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        layers.Conv2D(64, 3, padding='same', use_bias=False), #128
        layers.Activation('swish'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        layers.Conv2D(128, 3, padding='same', use_bias=False), #128
        layers.Activation('swish'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),


        layers.Flatten(),


        layers.Dense(256, activation="silu", kernel_regularizer=regularizers.l2(0.01)),
        layers.Dense(128, activation="silu", kernel_regularizer=regularizers.l2(0.01)),
        layers.Dense(100, activation="softmax")
      ])
      adamm=Adam(amsgrad=amsgrad)
      model.compile(optimizer=adamm,
                loss="sparse_categorical_crossentropy",
                metrics=["accuracy"])
      return model


    case "down":
      model = models.Sequential([
        layers.Input(shape=(32, 32, 3)),
        layers.BatchNormalization(),
        #layers.Conv2D(32, 3, strides=2, padding='same', use_bias=False),

        layers.Conv2D(128, 3, padding='same', use_bias=False), #128
        layers.Activation('swish'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        layers.Conv2D(64, 3, padding='same', use_bias=False), #128
        layers.Activation('swish'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        layers.Conv2D(32, 3, padding='same', use_bias=False), #128
        layers.Activation('swish'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),


        layers.Flatten(),


        layers.Dense(256, activation="silu", kernel_regularizer=regularizers.l2(0.01)),
        layers.Dense(128, activation="silu", kernel_regularizer=regularizers.l2(0.01)),
        layers.Dense(100, activation="softmax")
      ])
      adamm=Adam(amsgrad=amsgrad)
      model.compile(optimizer=adamm,
                loss="sparse_categorical_crossentropy",
                metrics=["accuracy"])
      return model


Overwriting model.py


In [24]:
%%writefile main.py

from util import *
from model import *


import tensorflow as tf
from tensorflow.keras import layers, models, datasets, utils, regularizers, callbacks
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from google.colab import files




def mainrun(polarity, amsgrad):
  """
  this function is the main run for setting up
  fitting, testing, and exporting our model

  there are some manipulatable settings
  but nothing that should be changed between tests
  """
  # seed for reproducability
  reproSeed=1
  # max epochs
  epoccs=20#00
  # early stopping, does not always activate
  # due to other regularizers at play
  callback = callbacks.EarlyStopping(monitor='loss',
                                                patience=5,
                                                restore_best_weights=True)

  # get cifar100 fine data with full 3 way split
  trainImgs, valImgs, trainLabels, valLabels, testImgs, testLabels = getData(reproSeed=reproSeed)

  # generate the model with architecture based on up or down
  # and amsgrad on or off
  model = modelMaker(amsgrad=amsgrad , polarity=polarity)

  # show relavant information about the model
  print("polarity:", polarity,"   amsgrad:",  amsgrad)
  model.summary()

  # fit the model
  history = model.fit(
    trainImgs, trainLabels, epochs=epoccs, validation_data=(valImgs, valLabels),
    callbacks=[callback], batch_size=50
  )

  # download the model so you can use it later
  exportModel(history=history, polarity=polarity, amsgrad=amsgrad, model=model)

  # placeholder until we get resampled CI testacc
  testLoss, testAcc = model.evaluate(testImgs, testLabels)
  print(f"Test Accuracy: {testAcc*100:.2f}%")




if __name__=="__main__":
  """
  each of these function calls is a new test
  we need four total
  (two architectures two hyperparameters)
  more can be added, and individual ones can
  be commed out
  since these models take a long time to train

  This setup helps maintain that we change
  what we are interested in for the experiment
  and hold all else constant
  """

  mainrun(polarity="up", amsgrad=False)
  mainrun(polarity="up", amsgrad=True)
  mainrun(polarity="down", amsgrad=False)
  mainrun(polarity="down", amsgrad=True)


Overwriting main.py


In [25]:
from google.colab import files


!python main.py

2024-07-08 06:24:23.450356: I tensorflow/core/tpu/tpu_api_dlsym_initializer.cc:95] Opening library: /usr/local/lib/python3.10/dist-packages/tensorflow/python/platform/../../libtensorflow_cc.so.2
2024-07-08 06:24:23.450538: I tensorflow/core/tpu/tpu_api_dlsym_initializer.cc:119] Libtpu path is: libtpu.so
2024-07-08 06:24:23.504273: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-08 06:24:34.870728: I external/local_xla/xla/service/service.cc:168] XLA service 0x592cb8ba2d60 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
2024-07-08 06:24:34.870783: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): TPU, 2a886c8
2024-07-08 06:24:34.870793: I external/local_xla/xla/service/serv

In [26]:
# show files
!ls

cifar100_down_False.keras  cifar100_up_False.keras  main.py   __pycache__  util.py
cifar100_down_True.keras   cifar100_up_True.keras   model.py  sample_data


In [27]:
# the downloading in files is bugged for colab
# so this command is separate from saving

files.download("cifar100_down_False.keras")
files.download("cifar100_down_True.keras")
files.download("cifar100_up_False.keras")
files.download("cifar100_up_True.keras")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>