# ***Installation Requirements***



In [None]:
!pip install tensorflow==1.14

# ***Mount Google Drive***

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/My\ Drive/wowmom22_tes-rnn/Anomaly_Detection

# ***Imports***

In [None]:
import math
import numpy as np
from torch.utils.data import DataLoader
from data_loading import create_dataset, Dataset
from config import get_config
from trainer import TESRNNTrainer
from validator import TESRNNValidator
from tester import TESRNNTester
from model import TESRNN
from loss_modules import *

# ***TES-RNN***

In [None]:
# CONFIGURATION SETTINGS

# List of the services to be tested
services = ['Facebook', 'Instagram', 'Snapchat', 'Twitter', 'YouTube']

# Base Station to be tested
bs_file = '/(35,3).npy'
bs_folder = '/BS_(35,3)'

# Number of clusters (single cell prediction)
num_clusters = 1

# List of dropout probabilities to be tested
ps_dropout = [0.1, 0.2, 0.3, 0.4, 0.5]

# Validation runs for uncertainty
uncertainty_runs = 10 

# Test runs for uncertainty
uncertainty_test_runs = 100

# Define the number of training epochs
epochs = 100

# Define the number of training batch size
batch_size = 288

# Define the number of train, validation and test samples
train_samples = 16128
val_samples = 4032
test_samples = 2016

# Define the input size and output size of the prediction
input_size = 6
output_size = 1

# Golden ratio for the golden search algorithm
gratio = (math.sqrt(5) + 1) / 2

# Stopping condition value for the golden search algorithm (interval length)
stop_value = 0.01

In [None]:
# SIMULATION RUNS
num_runs = 10

# Simulations over different services
for service in services:

    # Simulations over different dropout probabilities
    for p_dropout in ps_dropout:

        # Configuration loading
        config = get_config('Traffic', epochs, num_clusters, batch_size, train_samples, val_samples, test_samples, input_size, output_size)
    
        # Data loading
        data = '../../Dataset/' + service + bs_file
        train, val, test = create_dataset(data, config['chop_train'], config['chop_val'], config['chop_test'])
        dataset = Dataset(train, val, test, config['device'])
    
        # Maximum of single cluster traffic in the training set (for normalization)
        maximum = np.max(train[0])
    

        # Running many simulations for a given service
        for i in range(1, num_runs+1):

            # Initial extremes of the interval of the Minimum Level Threshold tau (expressed as fraction of maximum)
            tau_min = 0.0
            tau_max = 1.0
    
            # Current extremes of the interval of tau
            c = tau_min
            d = tau_max
    
            # Iterations counter for golden search algorithm
            iterations = 1
    
            # Dictionary collecting denormalized validation loss values for a given tau
            val_dict = {}
    

            # Stopping condition for golden search algorithm
            while abs(tau_max - tau_min) > stop_value:
        
                # Determine current Minimum Level Threshold tau
                if (iterations%3) > 0:
                    # Try tau as left extreme    
                    if (iterations%3) == 1:
                        tau = c
                    # Try tau as right extreme
                    else:
                        tau = d
        

                # Run actual golden search algorithm 
                else:

                    # Determine the new extreme of tau interval
                    if f_c < f_d:
                        # print("\nNew right-extreme of the interval is %f" % d)
                        tau_max = d
                    else:
                        # print("\nNew left-extreme of the interval is %f" % c)
                        tau_min = c
                
                    # print("Current length of tau interval is %f \n" % abs(tau_max - tau_min))
                    c = tau_max - (tau_max - tau_min) / gratio
                    d = tau_min + (tau_max - tau_min) / gratio
                    iterations = iterations + 1
                    continue
        

                # Compute denormalized validation loss for current tau
                f_val = val_dict.get(round(tau,6))
                # print("\nSearching a threshold in the interval [%f,%f]" % (tau_min, tau_max))
                # print("Threshold for this run is %f" % tau)
        

                # Denormalized validation loss not yet calculated for current tau
                if f_val == None:
        
                    # Dataloader initialization
                    dataloader = DataLoader(dataset, batch_size=config['series_batch'], shuffle=False)

                    # Model initialization
                    run_id = service + bs_folder + '/Dropout_' + str(p_dropout) + '/Simulation_' + str(i)
                    model = TESRNN(tau = tau, maximum = maximum, num_clusters = num_clusters, config = config, run_id = run_id, p_dropout = p_dropout)

                    # Run model trainer
                    trainer = TESRNNTrainer(model, dataloader, run_id, config)
                    trainer.train_epochs()
    
                    # Run model validator with uncertainty
                    uncertain_val_losses = []
                    for val_run in range(uncertainty_runs):
                      validator = TESRNNValidator(model, dataloader, run_id, config)
                      validator.validating()
        
                      # Compute denormalized validation loss
                      norm_preds = np.load('Results/' + run_id + '/val_predictions.npy')
                      norm_actuals = np.load('Results/' + run_id + '/val_actuals.npy')
                      levels = np.load('Results/' + run_id + '/val_levels.npy')
                      val_loss = denorm_validation_loss(norm_preds, norm_actuals, levels)
                      uncertain_val_losses.append(val_loss)

                    val_loss = np.average(np.array(uncertain_val_losses))
                    # print("Denormalized validation loss for this run %f" % val_loss)
                    val_dict[round(tau,6)] = val_loss

                    # Set denormalized validation loss for interval extreme
                    if (iterations%3) == 1:
                        f_c = val_loss
                    else:
                        f_d = val_loss
        

                # Denormalized validation loss already calculated for current tau
                else:
                    # print("Denormalized validation loss for this run %f" % f_val)
                    # Set denormalized validation loss for interval extreme
                    if (iterations%3) == 1:
                        f_c = f_val
                    else:
                        f_d = f_val
            

                # Increase algorithm iterations
                iterations = iterations + 1

        
    
            # Get the final optimal Minimum Level Threshold tau
            tau = (tau_min + tau_max) / 2
            # print('\nFinally chosen threshold = %f\n' % tau)
            np.save('Results/' + run_id + '/optimal_tau.npy', tau)
    


            # Run the optimized model
    
            # Dataloader initialization
            dataloader = DataLoader(dataset, batch_size=config['series_batch'], shuffle=False)
    
            # Model initialization
            model = TESRNN(tau = tau, maximum = maximum, num_clusters = num_clusters, config = config, run_id = run_id, p_dropout = p_dropout)
    
            # Run model trainer
            trainer = TESRNNTrainer(model, dataloader, run_id, config)
            trainer.train_epochs()
    
            # Run model tester with uncertainty
            uncertain_test_losses = []
            for test_run in range(uncertainty_test_runs):
              tester = TESRNNTester(model, dataloader, run_id, config, test_run)
              tester.testing()