### Test Jupyter Notebook with noisy versus deterministic data 

#### Enabling notebook to be run on any device

In [1]:
import os

# Create universally (i.e. across various operating systems) recognized paths

notebook_path = os.path.abspath('MSc Thesis 1.ipynb')                  # Jupyter notebook path
TCDF_path = os.path.join(os.path.dirname(notebook_path), 'TCDF')       # TCDF folder path
TCDFdata_path = os.path.join(TCDF_path, 'data')                        # TCDF/data folder path
EBT_path = os.path.join(os.path.dirname(notebook_path), 'EBT Data')    # EBT folder path
DefaultCod_path = os.path.join(EBT_path, 'Modified Default Cod')       # EBT/Modified Default Cod folder path
Cod_path = os.path.join(DefaultCod_path, 'Modified Default.out')       # EBT/Modified Default Cod/Modified Default.out file path

#### Installing packages and dependencies

In [2]:
# Install libraries
import sys
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.lines as lines

# Install conda- and pip packages in the current Jupyter kernel
!conda install -c anaconda tensorflow --y
!pip3 install torch torchvision torchaudio
!{sys.executable} -m pip install torchvision

# These command lines may work too - unhash if using
#!pip install torch==1.3.1+cu100 torchvision==0.4.2+cu100 -f https://download.pytorch.org/whl/torch_stable.html
#!conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch --yes

^C
^C
^C


#### Redirection to directory for running TCDF

In [3]:
# Define needed directory
directory = TCDF_path

# Check current working directory
current_directory = os.getcwd()

# Change working directory to perform TCDF
if current_directory != directory:
    os.chdir(TCDF_path)

# Check new current working directory
os.getcwd()

'C:\\Users\\10528504\\Documents\\GitHub\\thesis\\TCDF'

In [4]:
# Define function for visualisation of scaled data 

def visualise_scaled(ebt_scaled, years):
    # Select data to be visualized
    cod = ebt_scaled[['Cj', 'Ca', 'Cb']]
    sprat = ebt_scaled[['Sj', 'Sa', 'Sb']]
    resource = ebt_scaled[['Rj', 'Rs', 'Ra']]
    ebt_scaled['Time'] = np.linspace(years[0], years[1], len(ebt_scaled))
    time = ebt_scaled['Time']

    # Set subplot figure defaults
    fig, [cod_plt, sprat_plt, resource_plt] = plt.subplots(3, 1, sharex = True)
    fig.subplots_adjust(hspace = 0)
    fig.suptitle('Cod-sprat-resource dynamics {start}-{stop} years (scaled min-max)'.format(start=years[0],stop=years[1]))

    # Set labels for shared x-axis and individual y-axes
    cod_plt.set(ylabel = 'Cod biomass')
    sprat_plt.set(ylabel = 'Sprat biomass')
    resource_plt.set(ylabel = 'Resource biomass')
    plt.xlabel('Time (years)')

    # Visualize data
    cod_plt.plot(time, cod)
    sprat_plt.plot(time, sprat)
    resource_plt.plot(time, resource)

    # Add legends
    cod_plt.legend(['Cj', 'Ca', 'Cb'], loc = 'center left')
    sprat_plt.legend(['Sj', 'Sa', 'Sb'], loc = 'center left')
    resource_plt.legend(['Rj', 'Rs', 'Ra'], loc = 'center left')

    # Remove Time column
    ebt_scaled.drop(['Time'], axis = 1, inplace = True)

#### Run deterministic dataset 20-70 years

In [5]:
%matplotlib inline

filepath = os.path.join(TCDFdata_path, 'EBT_20_70_dataset.csv')

deterministic_data = pd.read_csv(filepath)
visualise_scaled(deterministic_data, [20,70])

Error in callback <function flush_figures at 0x00000254C949CCA0> (for post_execute):


KeyboardInterrupt: 

In [None]:
%matplotlib inline

# Run TCDF with the selected total dataset of EBT data

%run -i "runTCDF.py"  --ground_truth data/EBT_20_70_dataset.csv=data/EBT_demo_groundtruth.csv --learning_rate 0.1 --hidden_layers 3 --epochs 1000 --log_interval 250 --significance 0.9 --dilation_coefficient 3 --kernel_size 3 --seed 5467 --plot


Arguments: Namespace(cuda=False, data=None, dilation_coefficient=3, early_stopping=True, epochs=1000, ground_truth={'data/EBT_20_70_dataset.csv': 'data/EBT_demo_groundtruth.csv'}, hidden_layers=3, kernel_size=3, learning_rate=0.1, log_interval=250, lr_scheduler=True, optimizer='Adam', plot=True, seed=5467, significance=0.9)

 Dataset:  EBT_20_70_dataset.csv

 Analysis started for target:  Rs
INFO: Initializing learning rate scheduler
INFO: Initializing early stopping
Epoch:  1 [0%] 	Loss: 0.097749
Epoch    17: reducing learning rate of group 0 to 1.0000e-02.
Epoch    38: reducing learning rate of group 0 to 1.0000e-03.
Epoch    49: reducing learning rate of group 0 to 1.0000e-04.
Epoch    60: reducing learning rate of group 0 to 1.0000e-05.
Epoch    71: reducing learning rate of group 0 to 1.0000e-06.


In [None]:
# Run TCDF for predictive accuracy

%run -i "evaluate_predictions_TCDF.py" --data data/EBT_20_70_dataset.csv --learning_rate 0.1 --hidden_layers 3 --epochs 1000 --log_interval 250 --dilation_coefficient 3 --kernel_size 3 --seed 5467 --plot --train_test_split 0.5

#### Run noisy dataset 20-70 years

In [None]:
%matplotlib inline

filepath = os.path.join(TCDFdata_path, 'noisy_EBT_20_70_dataset.csv')

deterministic_data = pd.read_csv(filepath)
visualise_scaled(deterministic_data, [20,70])

In [None]:
%matplotlib inline

# Run TCDF with the selected total dataset of EBT data

%run -i "runTCDF.py"  --ground_truth data/noisy_EBT_20_70_dataset.csv=data/EBT_demo_groundtruth.csv --learning_rate 0.1 --hidden_layers 4 --epochs 1000 --log_interval 250 --significance 0.85 --dilation_coefficient 1 --kernel_size 1 --seed 5678 --plot


In [None]:
# Run TCDF for predictive accuracy

%run -i "evaluate_predictions_TCDF.py" --data data/noisy_EBT_20_70_dataset.csv --learning_rate 0.1 --hidden_layers 3 --epochs 1000 --log_interval 250 --dilation_coefficient 3 --kernel_size 3 --seed 5467 --plot --train_test_split 0.5