In [2]:
from tqdm.auto import tqdm 
import pandas as pd
import numpy as np
import xarray as xr
import netCDF4 as nf
from netCDF4 import Dataset
%matplotlib inline
import glob
import seaborn as sns
import matplotlib.pyplot as plt
import ast,gc,pickle
from copy import deepcopy

# Custom packages
import read_config
from util.data_process import read_vars, proc_dataset, miss
from util.models import performance_scores,train_baseline,causal_settings,train_PC1



In [3]:
import pickle
import numpy as np


# A function to save models with pickle
def save_models(models, filename):
    with open(filename,'wb') as f:
        pickle.dump(models, f)


def read_pickle(filepath=None):
    with open(filepath, "rb") as f:
        x = pickle.load(f)
    return x


def flatten(xss):
    return [x for xs in xss for x in xs]

In [4]:
import pickle
import gc
from tqdm import tqdm
import numpy as np
import os

In [5]:
# Read configuration file
config_set = read_config.read_config()
#config_set = read_config.read_config('./config.ini')
# Define Target
if int(config_set['target_lag'])==20:
    target='delv120'
if int(config_set['target_lag'])==16:
    target='delv96'
if int(config_set['target_lag'])==12:
    target='delv72'
if int(config_set['target_lag'])==8:
    target='delv48'
if int(config_set['target_lag'])==4:
    target='delv24'
#seeds = np.arange(100,131,1)

In [6]:
target

'delv24'

#### Loops through the 7 splits of SHIPSPLUS data (with causal predictors) and runs PC_stable in Tigramite for given pc_alpha values, creates results pkl inside results/4/shipsnew/

In [19]:
for split in range(7):  # Assuming 0 through 6
    # Load the current split
    split_path = f'proc/pickle/delv24/dict_split{split}.pkl'
    with open(split_path, 'rb') as f:
        TIDATA = pickle.load(f)

    var_names = TIDATA['Xnorml']['train'][list(TIDATA['Xnorml']['train'].keys())[0]].columns

    onlyships_lag = causal_settings.link_onlyships(
        numvar=TIDATA['aligned_train'][list(TIDATA['aligned_train'].keys())[0]].shape[1],
        lag=4,
        target_ind=[0],
    )

    results = []
    for pc_alpha in tqdm([0.0001, 0.00015 ,0.001,0.0015,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,
                      0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6]):
        Xnorml_c = {
            'train': {ind: np.asarray(TIDATA['Xnorml']['train'][key].replace(np.nan, -999.0))
                      for ind, key in enumerate(TIDATA['Xnorml']['train'].keys())},
            'valid': {ind: np.asarray(TIDATA['Xnorml']['valid'][key].replace(np.nan, -999.0))
                      for ind, key in enumerate(TIDATA['Xnorml']['valid'].keys())},
            'test': {ind: np.asarray(TIDATA['Xnorml']['test'][key].replace(np.nan, -999.0))
                     for ind, key in enumerate(TIDATA['Xnorml']['test'].keys())}
        }

        result = train_PC1.Pipeline(
            Xnorml_c['train'],
            pc_alpha,
            pc_type='run_pcstable',
            tau_min0=int(config_set['tau_min']),
            tau_max0=int(config_set['tau_max']),
            var_name=var_names,
            link_assumptions=onlyships_lag
        ).run_tigramite()

        del Xnorml_c
        gc.collect()
        results.append(result)

    savetos = {
        'dataframes': TIDATA['Xnorml'],
        'PC1_results': results,
        'var_names': var_names
    }

    output_dir = f'results/{int(config_set["target_lag"])}/shipsnew/'
    output_path = f'{output_dir}results_fold_{split}.pkl'

    os.makedirs(output_dir, exist_ok=True)
    with open(output_path, 'wb') as handler:
        pickle.dump(savetos, handler)








00%|███████████████████████████████████████████| 24/24 [00:39<00:00,  1.63s/it]

#### Loops through the 7 splits of SHIPS developmental data and runs PC_stable in Tigramite for given pc_alpha values, creates results pkl inside results/4/shipsold/

In [7]:
for split in range(7):  # Assuming 0 through 6
    # Load the current split
    split_path = f'proc/pickle/delv24/olddict_split{split}.pkl'
    with open(split_path, 'rb') as f:
        TIDATA = pickle.load(f)

    var_names = TIDATA['Xnorml']['train'][list(TIDATA['Xnorml']['train'].keys())[0]].columns

    onlyships_lag = causal_settings.link_onlyships(
        numvar=TIDATA['aligned_train'][list(TIDATA['aligned_train'].keys())[0]].shape[1],
        lag=4,
        target_ind=[0],
    )

    results = []
    for pc_alpha in tqdm([0.0001, 0.00015, 0.001, 0.0015, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08,
                         0.09, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6]):
        Xnorml_c = {
            'train': {ind: np.asarray(TIDATA['Xnorml']['train'][key].replace(np.nan, -999.0))
                      for ind, key in enumerate(TIDATA['Xnorml']['train'].keys())},
            'valid': {ind: np.asarray(TIDATA['Xnorml']['valid'][key].replace(np.nan, -999.0))
                      for ind, key in enumerate(TIDATA['Xnorml']['valid'].keys())},
            'test': {ind: np.asarray(TIDATA['Xnorml']['test'][key].replace(np.nan, -999.0))
                     for ind, key in enumerate(TIDATA['Xnorml']['test'].keys())}
        }

        result = train_PC1.Pipeline(
            Xnorml_c['train'],
            pc_alpha,
            pc_type='run_pcstable',
            tau_min0=int(config_set['tau_min']),
            tau_max0=int(config_set['tau_max']),
            var_name=var_names,
            link_assumptions=onlyships_lag
        ).run_tigramite()

        del Xnorml_c
        gc.collect()
        results.append(result)

    savetos = {
        'dataframes': TIDATA['Xnorml'],
        'PC1_results': results,
        'var_names': var_names
    }

    output_dir = f'results/{int(config_set["target_lag"])}/shipsold/'
    output_path = f'{output_dir}results_fold_{split}.pkl'

    os.makedirs(output_dir, exist_ok=True)
    with open(output_path, 'wb') as handler:
        pickle.dump(savetos, handler)








00%|███████████████████████████████████████████| 24/24 [00:20<00:00,  1.18it/s]

In [8]:
TIDATA.keys()

dict_keys(['datastorer', 'X_nonorml', 'y', 'size', 'var_names', 'Xnorml', 'aligned_train'])

In [9]:
var_names = performance_scores.scores_seeds(seed=0,target=target,lag=int(config_set['target_lag']),exp='shipsold').read_stored()['var_names']

In [10]:
var_names

Index(['DELV24', 'T200', 'T250', 'LAT', 'CSST', 'PSLV', 'Z850', 'D200', 'EPOS',
       'SHDC', 'RHMD', 'TWAC', 'G200', 'TADV', 'SHGC', 'POT', 'POT2', 'LHRD',
       'VSHR', 'PER', 'VPER', 'pc20'],
      dtype='object')

In [12]:
var_names = performance_scores.scores_seeds(seed=0,target=target,lag=int(config_set['target_lag']),exp='shipsnew').read_stored()['var_names']

In [13]:
var_names

Index(['DELV24', 'T200', 'T250', 'LAT', 'CSST', 'PSLV', 'Z850', 'D200', 'EPOS',
       'SHDC', 'RHMD', 'TWAC', 'G200', 'TADV', 'SHGC', 'POT', 'POT2', 'LHRD',
       'VSHR', 'PER', 'VPER', 'R001', 'R000', 'SHMD', 'PVOR', 'SHL1', 'SHL0',
       'pc20'],
      dtype='object')