**Notebook made to compare Autoencoders with PCA using FCNN**

The notebook is designed to run on colab. It can be easily adapted to run locally, with a few imports.

# Set up

In [None]:
# This is the path to the helper functions folder
# On drive for example: '/content/drive/My Drive/myfolder/helper_functions'
modules_path = './helper_functions'
models_path = './models' # for the models folder

# This is the path to the data folder
# On drive for example: '/content/drive/myfolder/data'
base_path = './data/'

# This is the path to the tensorflow checkpoint folder
# On drive for example: '/content/drive/MyDrive/best_models'
checkpoint_filepath = './best_models'

In [None]:
# pip calls
!pip install tensorflow_addons

# all the nice imports <3
import os
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import h5py
import numpy as np
from datetime import datetime
import keras
import importlib
import pickle

# black magic, so that tf.Tensor objects can be used as numpy things
# all hail tensorflow 0_0
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

# show important library versions
print("TensorFlow version: {}".format(tf.__version__))
print("TensorFlow Datasets version: ",tfds.__version__)


checkpoint_dir = os.path.dirname(checkpoint_filepath)

# add custom paths (to import the nice models and helper classes)
import sys
sys.path.append(models_path)
sys.path.append(modules_path)

# add custom imports here
import processing
import metrics
import first_models as fm
import autoencoder_models as am
import conv_architectures as ca

# Very specific Colab things

In [None]:
# import & mount drive
from google.colab import drive
drive.mount('/content/drive')

# hardware speed-up magic (TPU OR GPU?)
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('Warning: GPU device not found')
else:  
  print('Found GPU at: {}'.format(device_name))

# Tensorboard stuff (callbacks for logging data)
%load_ext tensorboard
logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

# Set up of runs
**Walkthorugh**
1. Give the problem a name for file naming and create the PROBLEM FOLDER, along with a sub-folder called "history" 
2. The data sets are named based on sample size (size) and data set size (dataset_size). These can be specified below
3. All parameters that can be varied (or that there exists data sets for)
4. Custom definitions, and defintions of the models (dictionary with: model class, designation, and regularization)

In [None]:
# 1.
PROBLEM_FOLDER = 'pca_ae_comparison'
PROBLEM = 'HAM'

# 2. 
dataset_size = '1000'
size = '32' # or 32 or 64

size_path = size + 'x' + size + '/'
model_base_path = './'+ PROBLEM_FOLDER + '/' + size_path

# 3.
# LOOP EVERYTHING (Models are missing)
cr_array = np.array(['0.1000', '0.0500', '0.0200', '0.0100', '0.0050', '0.0020'])
ds_size_array = np.array([1000,2000,5000,10000])
twopoint_array = np.array([False, True])

# CUSTOM LOOPS
# CAN VARY: contrast ratio, ds size, 2pc, models

# choose models, for example
PATH_TWOPOINT = './autoencoders/32x32/AUTOENCODER_ds18000_tpTrue_epochs1000'
PATH_BINARY = './autoencoders/32x32/AUTOENCODER_ds18000_tpFalse_epochs1000'

type_array = np.array(['pretrained', 'untrained'])
# type_array = np.array(['freeE', 'frozenE'])
cr_array = np.array(['0.0100'])
ds_size_array = np.array([1000])
twopoint_array = np.array([False])
EPOCHS = 1000
REG = 0.001


**Function to define the FCNN**

In [None]:
def get_model2(model_type, encoder_path=None, regularization=0.05):
    reg2 = tf.keras.regularizers.L2
    REG = regularization
    encoder = am.create_enc_spec(inputshape=(32,32,1))
    if model_type == 'pretrained':
      print("Creating Pretrained Model")
      encoder.load_weights(encoder_path)
      # TOGGLE TRAINABILITY OF AUTOENCODER
      # encoder.trainable = False
    elif model_type == 'untrained':
      print("Creating Untrained Model")
    else:
      print("Unknown model type")
      return 42
    
    # DEFINE FCNN HERE
    # change input shape in (x, x, 1)
    inputs = tf.keras.Input(shape=(32,32,1))
    conv_out = encoder(inputs)
    y1 = tf.keras.layers.Flatten()(conv_out)
    y4 = tf.keras.layers.Dense(6, activation='relu', kernel_regularizer=reg2(REG))(y1)

    return tf.keras.Model(inputs=[inputs], outputs=[y4])

# Execute Runs

In [None]:
EPOCHS = 1000
results_array = []
FILENAME = 'all_data_' + PROBLEM
TEST_SIZE = 1000
SPLITS = np.array([0.8, 0.1, 0.1])

counter = 0

for mt in type_array:
  for cr in cr_array:
    for ds in ds_size_array:
      for tp in twopoint_array:

        counter += 1
        print("Currently working on model No.", counter)

        data_path = base_path + size_path + 'n=' + size + '_x=' + dataset_size + '_cr=' + cr + '_DS.h5'
        tp_data_path = base_path + size_path + 'n=' + size + '_x=' + dataset_size + '_cr=' + cr + '_TP.h5'

        f = h5py.File(data_path, 'r')
        imgs = np.array(f['images'])
        labs = np.array(f['c_vectors'])

        print(labs[0])

        twopoint_f = h5py.File(tp_data_path, 'r')
        tp_imgs = np.array(twopoint_f['twopoint'])

        f.close()
        twopoint_f.close()

        if tp:
          ims = tp_imgs

        if mt == 'PCA':
          pca_ind = True
        else:
          pca_ind = False


        dataproc = processing.Processing(
          imgs[:ds], 
          labs[:ds], 
          SPLITS, 
          custom_valid = (imgs[-200:-100], labs[-200:-100]),
          custom_test = (imgs[-100:], labs[-100:]),
          batch_size=256, 
          conv_behavior=not pca_ind, 
          scale_labels=False, 
          symmetric=True, 
          twopoint=False,
          shuffle=False,
          pca=pca_ind,
          pca_dims=128
          )

        if tp:
          xpath = PATH_TWOPOINT
        else:
          xpath = PATH_BINARY


        model = get_model2(mt, xpath, regularization=0.02)
        print(model.summary())

        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_filepath,
            save_weights_only=True,
            monitor='val_r_square',
            mode='max',
            save_best_only=True)


        params = {
            'x' : dataproc.ds_train,
            'batch_size' : dataproc.batch_size, 
            'epochs' : EPOCHS, 
            'validation_data' : dataproc.ds_val, 
            'verbose' : 1, 
            'callbacks' : [model_checkpoint_callback]
            # 'callbacks' : [tensorboard_callback, model_checkpoint_callback]
        }

        model.compile(optimizer="Adam", loss="mse", metrics=[tfa.metrics.r_square.RSquare()])
        history = model.fit(**params)

        model.load_weights(checkpoint_filepath)

        test_metrics = model.evaluate(dataproc.ds_test)

        custom_path = PROBLEM + '_model' + mt + '_ds' + str(dataproc.splits[0]) + '_tp' + str(tp) + '_epochs' + str(params['epochs']) + '_cr' + cr
        total_path = model_base_path + custom_path
        model.save(total_path)

        results_array.append(
            {
                'run_name' : custom_path,
                'model_name' : mt,
                'regularization' : 0,
                'ds_size' : dataproc.splits[0],
                'twopoint' : tp,
                'contrast_ratio' : cr,
                'epochs' : params['epochs'],
                'history' : history.history,
                'test_metrics' : test_metrics,
                'model_function' : model, # the actual model here
                'path_to_weights' : total_path,
            }
        )


with open(model_base_path + 'history/' + FILENAME, 'wb') as file_pi:
  pickle.dump(results_array, file_pi)