# Preparation

## Imports

In [1]:
# TensorFlow
import tensorflow as tf
import tensorboard as tb
# Data handling
import pandas as pd
import numpy as np
# Plotting
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
# Other stuff
import itertools
import datetime
import time
import pickle
import functools
import json
import sys
import os
import shutil

## Paths

In [2]:
# Base path for functions
base_path = "C:/repos/"
sys.path.append(base_path)

# Path where configs are stored
configs_path = base_path + "twrds_unbiased_anns/configs/"

# Path top store runs
runs_path = base_path + "twrds_unbiased_anns/runs/"

# Path to store run results
results_path = base_path + "twrds_unbiased_anns/runs/results/"

## Functions

In [3]:
# Import of functions
# Sample creation
from twrds_unbiased_anns.src.data.samples import create_sample_array, get_sample_data, get_sample_params, convert_sample_to_np_array, gen_from_sample, dataset_from_gen
# Evaluation
from twrds_unbiased_anns.src.data.eval import load_eval_samples, evaluate_performance, evaluate_performance_class, evaluate_model, store_results
# Models
from twrds_unbiased_anns.src.tf.models import get_model
# Losses
from twrds_unbiased_anns.src.tf.losses import get_loss
# Optimizers
from twrds_unbiased_anns.src.tf.optimizers import get_optimizer
# Utils
from twrds_unbiased_anns.src.utils import load_config_from_file, mkdir, rmdir

# Config

In [4]:
# Set name for this run
run_name = "classification_all" # Also name of the config file

# Set run directory
run_dir = runs_path + run_name

# Get current date
cur_date = datetime.datetime.today()
date_str = cur_date.strftime("%d-%m-%Y")

# Load all variables from config file
config_filename = run_name + ".json"
name, eval_sample_filename, dataset_size, colors, optimizer, repeats_per_model, batch_size, n_epochs, mean_diffs, stddevs, minority_shares, categorical, models, loss_functions, thresholds, noises = load_config_from_file(configs_path + config_filename, "class")

# Create directory for this run
mkdir(run_dir)

# Create logdir
log_base_dir = run_dir + "/logs/"
mkdir(log_base_dir)

[WinError 183] Eine Datei kann nicht erstellt werden, wenn sie bereits vorhanden ist: 'C:/repos/twrds_unbiased_anns/runs/classification_all'
Deleting directory and creating again ...


# Training

## TensorBoard Setup

In [9]:
# Delete tensorboard temp dir
#rmdir("C:/Users/lucas/AppData/Local/Temp/.tensorboard-info")
# Load Tensorboard
%reload_ext tensorboard
%tensorboard --logdir $log_base_dir

Reusing TensorBoard on port 6006 (pid 8768), started 0:02:06 ago. (Use '!kill 8768' to kill it.)

In [None]:
# Create empty list of all results
results = []

# Load evaluation sample
eval_samples = load_eval_samples(base_path + "twrds_unbiased_anns/data/eval/" + eval_sample_filename)

# Calculate number of steps per epoch
n_steps = int(dataset_size/batch_size)

# Iterate over all variable parameter combinations
for (modelname, lossname, category, m_diff, std, share, threshold, noise) in itertools.product(models, loss_functions, categorical, mean_diffs, stddevs, minority_shares, thresholds, noises):

  # Get name of current iteration
  cur_name = name.format(modelname, lossname, category, m_diff, std, share, threshold, noise)

  # Clear session once and then every time before a new model is trained
  tf.keras.backend.clear_session()

  # Get sample parameters
  white_square, white_circle, colorful_square, colorful_circle = get_sample_params(category, m_diff, std, share)

  # Prepare and save sample
  train_sample = create_sample_array(dataset_size, white_square, white_circle, colorful_square, colorful_circle)
  sample_filename = run_dir + "/" + "sample_{}_{}".format(cur_name, date_str)
  np.save(file = sample_filename, arr = train_sample)

  # Create dataset from training data sample
  data = dataset_from_gen(train_sample, n_epochs, batch_size, colors, task_type = "class", threshold = threshold, noise = noise, distractor = category) 

  # Loop training for number of repeats
  for repeat in range(1, repeats_per_model + 1):   

    # Clear keras session
    tf.keras.backend.clear_session()

    # Create model and compile it
    model = get_model(modelname, task_type = "class")
    model.compile(optimizer = get_optimizer(optimizer), loss = get_loss(lossname))  

    # Create logdir and callback
    logdir = log_base_dir + cur_name + "_" + str(repeat)
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = logdir)

    # Do training
    model.fit(data, epochs = n_epochs, steps_per_epoch = n_steps, verbose = 0, callbacks=[tensorboard_callback])

    # Evaluate model
    # Create dictionary with model information
    row = {
        "run": run_name,
        "date": cur_date,
        "model": modelname,
        "loss": lossname,
        "category": category,
        "m_diff": m_diff,
        "stddev": std,
        "minority_share": share,
        "repeat": repeat,
        "threshold": threshold,
        "noise": noise
    } 
    # Run eval
    evaluate_model(model, eval_samples, row, results, colors, task_type = "class", threshold = 75)   

# Store total results as excel
excel_name = "{}_{}_results.xlsx".format(run_name, date_str)
filepath = results_path + excel_name
store_results(results, filepath)