# PSBC - training notebook

Goal: perform PSBC model evaluation on a grid in the folder "../Grids", on the main folder.

How: 

Input: 
* Neumann: bool (True or False), denotes the type of Boundary condition.
* subordinate: bool (True or False, default is True) to describe the type of model
* parallel: bool (True or False, default is False), in case the model is parallel
* with_PCA: bool (True or False, default is False), in case a different basis matrix is used


Initially, we need to allow google drive to access the folders with files.

In [1]:
Colab = True

In [None]:
if Colab:
    from google.colab import drive
    drive.mount ('/content/drive')

Mounted at /content/drive


In [None]:
#import  matplotlib.pyplot as plt
import scipy.sparse as sc
import itertools as it
import pandas as pd
import numpy as np
import warnings
import shutil 
import copy
import glob
import sys
import os
import time
import tensorflow as tf
from tensorflow import keras
try: ## In order to open and save dictionaries, "dt": self.dt, "kind" : "Viscosity"
    import cPickle as pickle
except ImportError:  # python 3.x
    import pickle

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import multiprocess as mp
warnings.filterwarnings (action = "ignore", message = "internal issue")

At this moment we are in the folder


In [None]:
folder_now = os.getcwd ()
print (folder_now)

'/content'

We then move to the folder we need, and import the folder with all the libraries we will use.

In [None]:
if Colab: 
    os.chdir ("/content/drive/MyDrive/PSBC/")

sys.path.insert (0, "MOTHER_PSBC/")
folder_now = os.getcwd ()
print (folder_now)

which are

In [None]:
from tfversion_binary_phase_separation import *
from tf_PSBC_extra_libs_for_training_and_grid_search import *

Now we acccess the appropriate folder

In [None]:
print ("Folder options are 'Neumann', 'Periodic', 'PCA_196', 'Classifier_196'")

which_folder = "Neumann"

os.chdir (which_folder)

print ("\n\nNow we are in folder", os.getcwd ())

## Setting up parameters for the model

In [None]:
if which_folder in ['Neumann', 'Periodic','Neumann_non_subordinate']:
    print (
        "\tRecall that grid search happens at eps = 0, hence models with\n"+\
        "\t Neumann and Periodic BCs are the same, because no diffusion is in place\n\n"
        )
    Neumann = not (which_folder == 'Periodic')
    subordinate = not (which_folder == 'Neumann_non_subordinate')
    with_PCA = False     
    parallel = False
    cpu = 4  ## In case of parallel processing
    Nx = 784
    classifier = False
elif which_folder == 'PCA_196':
    Neumann = True
    subordinate = True
    with_PCA = True
    parallel = False
    cpu = 4  ## In case of parallel processing
    Nx = 784
    classifier = True
elif which_folder == 'Classifier_196':
    Neumann = True
    Nt = 2
    save_history = True
    subordinate = True
    with_PCA = False     
    parallel = False
    cpu = 4  ## In case of parallel processing
    Nx = 784
    classifier = True

grid_type =  "training"
    
print ("The model will perform \n", grid_type,
       "\nwith the following parameters:\n* Neumann is",
       Neumann, "\n* with_PCA is", with_PCA,
       "\n* subordinate is", subordinate,"\n* parallel is",\
       parallel)

## Computations

In [None]:
if which_folder == 'PCA_196':
    nt_range  = [2]
    digits_range = [0,1]  ## Will be ignored
    pairs_of_digits = [(4,9), (3, 5)]    
elif which_folder == 'Classifier_196':
    nt_range  = [2]
    digits_range = np.arange (0,45)
else:
    nt_range = [1,2,4]
    digits_range = [0]    

> **Remark** : if you are running this model in Colab you'd better use TPUs or GPUs to speed up training. In this case it is also convenient to break the processing in cases, doing one folder Nt at a time, or chopping the digits_range in pieces, in case of classifiers. 
In general, each batch evaluation runs pretty fast, and setting EPOCHS larger than 10 was a bit of an overkill. You can change that if you want. 

# Training part

In [None]:
with open ("../Grids/digits_index.p", 'rb') as pickled_dic:
    grid_indexes  = pickle.load (pickled_dic)

for index in digits_range:
    
    if with_PCA:
            variable_0, variable_1 = pairs_of_digits [index]
    else:
        variable_0, variable_1 = grid_indexes [index] 

    for Nt in nt_range:
        
        os.chdir (str (Nt))
        
        ### READ VARIABLES AND RETRIEVE TRAINING DATA (BOTH VARIABLES COMBINED)
        
        filename = "training_" + str (Neumann)+ "_" + str (Nt) + ".p"
        with open ("../../Grids/" + filename, 'rb') as pickled_dic:
            grid_range  = pickle.load (pickled_dic)

        cv = grid_range ["cv"]

        ############################################
        print ("Asserting Nt")
        assert (grid_range["Nt"] == Nt)
        print ("Asserting Neumann")
        assert (grid_range["Neumann"] == Neumann)
        print (grid_range)
        ############################################

        EPOCHS = grid_range ["EPOCHS"]
        patience = grid_range ["patience"]
        Nt = grid_range ["Nt"]
        train_dt_U = grid_range ["train_dt_U"]
        train_dt_P = grid_range ["train_dt_P"]

        print ("\n* Number of cross valications :", cv)
        print ("Variables given:\n\tvariable_0 :", variable_0,"\n\tvariable_1 :", variable_1)
        print ("\n* Parallel is", parallel, ". (If parallel is True, then use ", cpu," cores.)")
        print ("\n* Nx :", Nx, ", Neumann :", Neumann, ", Epochs : ", EPOCHS, ", Patience : ", patience)
        print ("\n* Nt :",  Nt, ", train_dt_U :", train_dt_U, ", train_dt_P :", train_dt_P)
        print ("\n* with_PCA :", with_PCA)

        ### SELECTING TRAINING SET
        S = select_split_pickle (level = 2)
        X_train, Y_train, _, _, _ = prepare_train_test_set (variable_0, variable_1, level = 2)
        
        all_results = []
        
        ### RETRIEVING PRINCIPAL COMPONENTS IF NECESSARY
        if with_PCA:
            print ("Model with PCA : retrieving principal components")
            _, _, Vstar = np.linalg.svd (X_train)
        else:
            Vstar = None
        
        print ("Constructing GRIDS with best hyperparameters!!!")
        
        retrieve_best_par = BestPararameters_ptt_card_weights_k_shared_fixed (
              Nt, variable_0, variable_1, classifier = classifier, with_PCA = with_PCA)

        parameters_model_1, parameters_model_Nt = fill_parameters_dict (
            Nt,  retrieve_best_par,
            weight_sharing_split = True, classifier = True)

        all_parameters = {**parameters_model_1, **parameters_model_Nt}
        all_keys = list(all_parameters.keys())

        for key in all_parameters.keys():
            X_train, Y_train, _, _, _ =\
            prepare_train_test_set (variable_0, variable_1, level = 2)

            parameters_now = all_parameters [key]

            assert (key [0] == parameters_now ["layer_share_range"])
            assert (key [1] == parameters_now ["ptt_range"])

            print (
                "Training the model for weight_k_share : ", parameters_now ["layer_share_range"],\
                 "and partition cardinality ", parameters_now ["ptt_range"]
            )
            
            if which_folder in ['PCA_196', 'Classifier_196']:
                append_to_saved_file_name = "_var0_" + str (variable_0) + "_var1_" + str (variable_1)
                if which_folder in ['PCA_196']:
                    append_to_saved_file_name = "_PCA_" + append_to_saved_file_name
            else:
                append_to_saved_file_name = "_Index_"+ str (key [0]) + "_" + str (key [1]) + "_" + str (Nt)

            print ("\n Parameters in use :", parameters_now)

            all_results = fitting_several_models(
                cv, parallel, cpu, X_train, Y_train, X_train, Y_train, parameters_now,
                Nx, Neumann, EPOCHS, patience, Nt, train_dt_U, train_dt_P,
                with_PCA, Vstar, save_best_only = True,
                append_to_saved_file_name = append_to_saved_file_name,
                save_history = True,
                subordinate = subordinate)

            #return results
            for j, a, b in all_results:
                if  j == 0:
                    Accuracies, Parameters = a [np.newaxis,:], b
                else:
                    Accuracies_tmp, Parameters_tmp = a [np.newaxis,:], b
                    assert (Parameters_tmp == Parameters)
                    Accuracies = np.vstack ([Accuracies, Accuracies_tmp]) 

            try: os.mkdir ("training")
            except: pass

            print ("Creating Accuracies and parameter pickled file")
            if which_folder in ['PCA_196', 'Classifier_196']:
                file_name =\
                "Training_accuracies_" + str (key [0]) + "_" + str (key [1]) +\
                "_" + str (Nt)+"_classifier_"+ str (variable_0)+ "_" +\
                str (variable_1)+".p"
                
                if which_folder in ['PCA_196']:
                    file_name = "PCA_" + file_name
            else:
                file_name = "Training_accuracies_"+ str (key [0])+\
                "_" + str (key [1]) + "_" + str (Nt)+"_vary_eps_" +\
                str (variable_0)+ "_" + str (variable_1)+".p"
            
            file_name = "training/" + file_name
            
            with open (file_name, 'wb') as save:
                pickle.dump ((Accuracies, Parameters), save, protocol = pickle.HIGHEST_PROTOCOL)        
                print ("Statistics pickled to ", file_name)
            
            evaluate_model (
                *key, Nt, variable_0, variable_1, all_parameters,
                Neumann = Neumann, classifier = classifier,
                with_PCA = with_PCA, subordinate = subordinate)
        
        os.chdir ("../")

Training the model for weight_k_share :  [1] and partition cardinality  [784]

 Parameters in use : {'eps_range': array([0.], dtype=float32), 'dt_range': array([0.2], dtype=float32), 'ptt_range': array([784], dtype=uint16), 'layer_share_range': array([1], dtype=uint16), 'lr_U_range': array([0.1], dtype=float32), 'lr_P_range': array([0.001], dtype=float32)}

RUNNING THE MODEL SERIALLY

Fixed hyperparameters

Nx : 784 	Nt : 1 	Neumann : True 	patience : 10 	train_dt_U : True 	train_dt_P : True 
	with_PCA : False


	We will fit the model 1 times



Varying parameters time 
 0
eps : 0.0 	dt : 0.2 	ptt_cardnlty : 784 	layers_K_shared : 1 	lr_U : 0.1 	lr_P : 0.001
Setting up a subordinate model with phase
Setting up a basic layer with Neumann B.C.s.
Setting up a basic layer with Neumann B.C.s.
Model: "psbc_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_layer_2 (Zero_layer)    multiple                