## AUTOGLUON experiments using the formatted CSV files as inputs

### check the environment first 

In [1]:
import sys

In [2]:
print(sys.executable) 

/home/nicolasf/anaconda3/envs/ML/bin/python


In [3]:
# Parameters 

GCM = 'ECMWF'
var_name = 'TMEAN'
target_type = 'cat3_categories'
region_name = 'ESI'
skpca = True 
standardized = False
shuffle = True # whether or not to shuffle the instances (both traning and test instances)

### load external modules 

In [4]:
%matplotlib inline

In [5]:
import os
import pathlib
from shutil import copytree, rmtree

In [6]:
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [7]:
HOME = pathlib.Path.home()

In [8]:
from matplotlib import pyplot as plt

In [9]:
import proplot as plot

In [10]:
import numpy as np

In [11]:
np.random.seed(42)

In [12]:
import pandas as pd

In [13]:
import sklearn 
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import pca

### import autogluon, tabular prediction, see [https://autogluon.mxnet.io/tutorials/tabular_prediction/tabular-quickstart.html](https://autogluon.mxnet.io/tutorials/tabular_prediction/tabular-quickstart.html)

In [14]:
import autogluon as ag
from autogluon import TabularPrediction as task

  Optimizer.opt_registry[name].__name__))


### load local modules 

In [15]:
sys.path.append('../../../../ml4seas/')

In [16]:
from GCM import prepare_data_CSV_to_AUTOML
from evaluation import calc_accuracy_sco

In [17]:
dpath = HOME / 'research' / 'Smart_Ideas' / 'outputs' / 'CSVs'

In [18]:
list(dpath.glob("*.csv"))

[PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/CSVs/GCMs_std_and_targets_cat3_and_anomalies_RAIN_test_set.csv'),
 PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/CSVs/GCMs_and_targets_cat3_and_anomalies_TMEAN_training_set.csv'),
 PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/CSVs/GCMs_std_and_targets_cat3_and_anomalies_TMEAN_training_set.csv'),
 PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/CSVs/GCMs_and_targets_cat3_and_anomalies_RAIN_test_set.csv'),
 PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/CSVs/GCMs_std_and_targets_cat3_and_anomalies_RAIN_training_set.csv'),
 PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/CSVs/GCMs_std_and_targets_cat3_and_anomalies_TMEAN_test_set.csv'),
 PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/CSVs/GCMs_and_targets_cat3_and_anomalies_TMEAN_test_set.csv'),
 PosixPath('/home/nicolasf/research/Smart_Ideas/outputs/CSVs/GCMs_and_targets_cat3_and_anomalies_RAIN_training_set.csv')]

In [19]:
if standardized: 
    train_data = pd.read_csv(dpath / f'GCMs_std_and_targets_cat3_and_anomalies_{var_name}_training_set.csv', index_col=0, parse_dates=True) 
    test_data = pd.read_csv(dpath / f'GCMs_std_and_targets_cat3_and_anomalies_{var_name}_test_set.csv', index_col=0, parse_dates=True)
else: 
    train_data = pd.read_csv(dpath / f'GCMs_and_targets_cat3_and_anomalies_{var_name}_training_set.csv', index_col=0, parse_dates=True) 
    test_data = pd.read_csv(dpath / f'GCMs_and_targets_cat3_and_anomalies_{var_name}_test_set.csv', index_col=0, parse_dates=True)    

### get the training data 

In [20]:
train_data, GCMs_name_train, _, _ = prepare_data_CSV_to_AUTOML(train_data, GCM=GCM, region_name=region_name, target_type=target_type, scaling=False, doPCA=False)

Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


In [21]:
train_data.shape

(286, 4930)

In [22]:
train_data.columns

Index(['(-70.0, 70.0)', '(-70.0, 72.5)', '(-70.0, 75.0)', '(-70.0, 77.5)',
       '(-70.0, 80.0)', '(-70.0, 82.5)', '(-70.0, 85.0)', '(-70.0, 87.5)',
       '(-70.0, 90.0)', '(-70.0, 92.5)',
       ...
       '(60.0, 280.0)', '(60.0, 282.5)', '(60.0, 285.0)', '(60.0, 287.5)',
       '(60.0, 290.0)', '(60.0, 292.5)', '(60.0, 295.0)', '(60.0, 297.5)',
       '(60.0, 300.0)', 'WSI_cat3_categories'],
      dtype='object', length=4930)

### get the test data  

In [23]:
test_data, GCMs_name_test, _, _ = prepare_data_CSV_to_AUTOML(test_data, GCM=GCM, region_name=region_name, target_type=target_type, scaling=False, doPCA=False)

In [24]:
test_data.shape

(33, 4930)

### stratified k-fold 

In [25]:
from sklearn.model_selection import StratifiedKFold

### the percentage of variance to keep 

In [26]:
percent_variance = 0.9

### root path for saving the parameters of all the AUTOGLUON experiments 

In [27]:
saved_models = pathlib.Path('./saved_models/AUTOGLUON_v2/')

In [28]:
opath = saved_models.joinpath(f'./autogluon_exp_SKPCA_{GCM}_pred_{region_name}_reg_{var_name}_targetvar_{target_type}_target_type')

In [29]:
if not opath.exists(): 
    opath.mkdir(parents=True)

### checks on the shape and content of the training and test data 

In [30]:
train_data.shape

(286, 4930)

In [31]:
train_data.columns

Index(['(-70.0, 70.0)', '(-70.0, 72.5)', '(-70.0, 75.0)', '(-70.0, 77.5)',
       '(-70.0, 80.0)', '(-70.0, 82.5)', '(-70.0, 85.0)', '(-70.0, 87.5)',
       '(-70.0, 90.0)', '(-70.0, 92.5)',
       ...
       '(60.0, 280.0)', '(60.0, 282.5)', '(60.0, 285.0)', '(60.0, 287.5)',
       '(60.0, 290.0)', '(60.0, 292.5)', '(60.0, 295.0)', '(60.0, 297.5)',
       '(60.0, 300.0)', 'WSI_cat3_categories'],
      dtype='object', length=4930)

In [32]:
target_col = f"{region_name}_{target_type}"; print(target_col)

WSI_cat3_categories


### if shuffle is set to True, we first completely shuffle the training and the test instances 

In [33]:
if shuffle: 
    train_data = train_data.sample(frac=1.)
    test_data = test_data.sample(frac=1.)

### initialise a stratified K-Fold object, which will return train and test indices 

In [34]:
kfold = StratifiedKFold(n_splits=10).split(train_data.drop(labels=[target_col],axis=1).values, train_data.loc[:,target_col].values)

In [35]:
test_indices = []
y_preds = []
leader_board = []
perfs = []

for k, (train, test) in enumerate(kfold):
    
    print(f"ENTERING FOLD {k} ---- ")
    
    # saves the test indices
    test_indices.append(test)
    
    # get the numpy array containing the training set initial features (grid points)
    X_train = train_data.drop(labels=[target_col],axis=1).values[train]
    
    # get the numpy array containing the training set target values (y)
    y_train =  train_data.loc[:,target_col].values[train]
    
    # get the numpy array containing the test set initial features (grid points)
    X_test = train_data.drop(labels=[target_col],axis=1).values[test]
    
    # get the numpy array containing the test set target values (y)
    y_test = train_data.loc[:,target_col].values[test]
    
    # -----------
    # standardize 
    
    # initialise the scaler (standard scaler)
    scaler = StandardScaler() 
    
    # fit on the training set features array, and transform to obtain standardized values
    X_train_std = scaler.fit_transform(X_train)
    
    # apply the transformation on the test set initial features 
    X_test_std = scaler.transform(X_test)
    
    # -----------------------------
    # Principal Component Analysis 
    
    # instantiate the pca class, with percent of variance to keep  
    
    skpca = pca.PCA(n_components=percent_variance)
    
    # fit on the training initial (standardized) fedatures array, and transform to obtain the PCs
    X_train_PC = skpca.fit_transform(X_train_std)
    
    # apply the transformation on the test set standardized features 
    X_test_PC = skpca.transform(X_test_std)
    
    # assign the training set PCs to a DataFrame 
    df_train = pd.DataFrame(X_train_PC) 
    
    # add the target values to the training DataFrame 
    df_train.loc[:,target_col] = y_train
    
    # assign the test set Pcs to a DataFrame 
    df_test = pd.DataFrame(X_test_PC) 
    
    # add the target values to the test DataFrame
    df_test.loc[:,target_col] = y_test
    
    # fit the task predictor on the training set DataFrame 
    predictor = task.fit(train_data=df_train, label=target_col, auto_stack=True, output_directory=opath)
    
    # predict the probabilities for each class from the test set features DataFrame (droping the target values column)
#     y_pred_proba = predictor.predict_proba(df_test.drop(labels=[region_name],axis=1))
    
    # predict the class value itself
    y_pred = predictor.predict(df_test.drop(labels=[target_col],axis=1))
    
    # records the probabilities for the classes on the test set 
    y_preds.append(y_pred)
    
    # get the leaderboard DataFrame 
    d = predictor.leaderboard(silent=True)
    
    # records the leaderboard DataFrame 
    leader_board.append(d)
    
    perfs.append(predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True, silent=True))
    
    print(f"EXITING FOLD {k} ---- ")

Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v2/autogluon_exp_SKPCA_ECMWF_pred_WSI_reg_TMEAN_targetvar_cat3_categories_target_type/
Train Data Rows:    255
Train Data Columns: 34
Preprocessing data ...
Here are the first 10 unique label values in your data:  [3 1 2]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed)
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Feature Generator processed 255 data points with 33 features
Original Features:
	float features: 33
Generated Features:
	int features: 0
All Features:
	float features: 33
	int features: 0
	Data preprocessing and feature engineering runtime = 0.06s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will

ENTERING FOLD 0 ---- 


	0.5569	 = Validation accuracy score
	2.63s	 = Training runtime
	0.76s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5608	 = Validation accuracy score
	2.6s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.5961	 = Validation accuracy score
	2.15s	 = Training runtime
	0.55s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.6078	 = Validation accuracy score
	2.18s	 = Training runtime
	0.59s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.5804	 = Validation accuracy score
	0.18s	 = Training runtime
	0.59s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0.6039	 = Validation accuracy score
	0.17s	 = Training runtime
	0.59s	 = Validation runtime
Fitting model: LightGBMClassifier_STACKER_l0 ...
	0.5686	 = Validation accuracy score
	1.46s	 = Training runtime
	0.03s	 = Validation runtime
Fitting model: CatboostC

EXITING FOLD 0 ---- 
ENTERING FOLD 1 ---- 


AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini_STACKER_l0 ...
	0.5391	 = Validation accuracy score
	2.69s	 = Training runtime
	0.6s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5391	 = Validation accuracy score
	2.66s	 = Training runtime
	0.6s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.5312	 = Validation accuracy score
	2.14s	 = Training runtime
	0.54s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.5547	 = Validation accuracy score
	2.2s	 = Training runtime
	0.54s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.5195	 = Validation accuracy score
	0.1s	 = Training runtime
	0.58s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0

EXITING FOLD 1 ---- 
ENTERING FOLD 2 ---- 


	0.5331	 = Validation accuracy score
	2.61s	 = Training runtime
	0.76s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5253	 = Validation accuracy score
	2.62s	 = Training runtime
	0.6s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.5525	 = Validation accuracy score
	2.12s	 = Training runtime
	0.55s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.5525	 = Validation accuracy score
	2.22s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.537	 = Validation accuracy score
	0.18s	 = Training runtime
	0.59s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0.5642	 = Validation accuracy score
	0.16s	 = Training runtime
	0.58s	 = Validation runtime
Fitting model: LightGBMClassifier_STACKER_l0 ...
	0.5292	 = Validation accuracy score
	1.78s	 = Training runtime
	0.03s	 = Validation runtime
Fitting model: CatboostCl

EXITING FOLD 2 ---- 
ENTERING FOLD 3 ---- 


	0.5543	 = Validation accuracy score
	2.57s	 = Training runtime
	0.7s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5271	 = Validation accuracy score
	2.6s	 = Training runtime
	0.55s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.5504	 = Validation accuracy score
	2.09s	 = Training runtime
	0.55s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.5543	 = Validation accuracy score
	2.1s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.531	 = Validation accuracy score
	0.1s	 = Training runtime
	0.58s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0.5736	 = Validation accuracy score
	0.1s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: LightGBMClassifier_STACKER_l0 ...
	0.6008	 = Validation accuracy score
	1.58s	 = Training runtime
	0.03s	 = Validation runtime
Fitting model: CatboostClassi

EXITING FOLD 3 ---- 
ENTERING FOLD 4 ---- 


	0.562	 = Validation accuracy score
	2.66s	 = Training runtime
	0.97s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5271	 = Validation accuracy score
	2.69s	 = Training runtime
	0.74s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.5465	 = Validation accuracy score
	2.16s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.5736	 = Validation accuracy score
	2.13s	 = Training runtime
	0.69s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.5078	 = Validation accuracy score
	0.13s	 = Training runtime
	0.58s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0.5504	 = Validation accuracy score
	0.13s	 = Training runtime
	0.58s	 = Validation runtime
Fitting model: LightGBMClassifier_STACKER_l0 ...
	0.5736	 = Validation accuracy score
	1.37s	 = Training runtime
	0.03s	 = Validation runtime
Fitting model: CatboostC

EXITING FOLD 4 ---- 
ENTERING FOLD 5 ---- 


Here are the first 10 unique label values in your data:  [1 2 3]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed)
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Feature Generator processed 258 data points with 32 features
Original Features:
	float features: 32
Generated Features:
	int features: 0
All Features:
	float features: 32
	int features: 0
	Data preprocessing and feature engineering runtime = 0.2s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini_STACKER_l0 ...
	0.5155	 = Validation accuracy score
	2.73s	 = Training runtime
	0.86s	 = Validation runtime
Fitting model: RandomForestClassifierEntr

EXITING FOLD 5 ---- 
ENTERING FOLD 6 ---- 


	0.5388	 = Validation accuracy score
	2.61s	 = Training runtime
	0.77s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5426	 = Validation accuracy score
	2.69s	 = Training runtime
	0.61s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.5426	 = Validation accuracy score
	2.16s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.5659	 = Validation accuracy score
	2.14s	 = Training runtime
	0.55s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.5271	 = Validation accuracy score
	0.19s	 = Training runtime
	0.59s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0.5465	 = Validation accuracy score
	0.12s	 = Training runtime
	0.58s	 = Validation runtime
Fitting model: LightGBMClassifier_STACKER_l0 ...
	0.593	 = Validation accuracy score
	2.04s	 = Training runtime
	0.03s	 = Validation runtime
Fitting model: CatboostC

EXITING FOLD 6 ---- 
ENTERING FOLD 7 ---- 


	0.5543	 = Validation accuracy score
	2.65s	 = Training runtime
	0.86s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5543	 = Validation accuracy score
	2.7s	 = Training runtime
	0.63s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.5891	 = Validation accuracy score
	2.13s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.5814	 = Validation accuracy score
	2.1s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.5388	 = Validation accuracy score
	0.16s	 = Training runtime
	0.58s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0.5504	 = Validation accuracy score
	0.17s	 = Training runtime
	0.59s	 = Validation runtime
Fitting model: LightGBMClassifier_STACKER_l0 ...
	0.5969	 = Validation accuracy score
	1.45s	 = Training runtime
	0.03s	 = Validation runtime
Fitting model: CatboostCl

EXITING FOLD 7 ---- 
ENTERING FOLD 8 ---- 


AutoGluon will early stop models using evaluation metric: accuracy
Fitting model: RandomForestClassifierGini_STACKER_l0 ...
	0.5233	 = Validation accuracy score
	2.49s	 = Training runtime
	0.54s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5194	 = Validation accuracy score
	2.57s	 = Training runtime
	0.54s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.531	 = Validation accuracy score
	2.04s	 = Training runtime
	0.54s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.562	 = Validation accuracy score
	1.95s	 = Training runtime
	0.54s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.5426	 = Validation accuracy score
	0.06s	 = Training runtime
	0.56s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0.5504	 = Validation accuracy score
	0.06s	 = Training runtime
	0.56s	 = Validation runtime
Fitting model: LightGBMClassifier_STACKER_l

EXITING FOLD 8 ---- 
ENTERING FOLD 9 ---- 


	0.5465	 = Validation accuracy score
	2.6s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: RandomForestClassifierEntr_STACKER_l0 ...
	0.5349	 = Validation accuracy score
	2.64s	 = Training runtime
	0.6s	 = Validation runtime
Fitting model: ExtraTreesClassifierGini_STACKER_l0 ...
	0.5659	 = Validation accuracy score
	2.1s	 = Training runtime
	0.55s	 = Validation runtime
Fitting model: ExtraTreesClassifierEntr_STACKER_l0 ...
	0.5271	 = Validation accuracy score
	2.14s	 = Training runtime
	0.57s	 = Validation runtime
Fitting model: KNeighborsClassifierUnif_STACKER_l0 ...
	0.531	 = Validation accuracy score
	0.09s	 = Training runtime
	0.58s	 = Validation runtime
Fitting model: KNeighborsClassifierDist_STACKER_l0 ...
	0.5543	 = Validation accuracy score
	0.13s	 = Training runtime
	0.59s	 = Validation runtime
Fitting model: LightGBMClassifier_STACKER_l0 ...
	0.5698	 = Validation accuracy score
	1.51s	 = Training runtime
	0.03s	 = Validation runtime
Fitting model: CatboostClas

EXITING FOLD 9 ---- 


### Now what are the best model(s) in the leaderboard for each fold 

In [36]:
len(leader_board)

10

In [37]:
top = []
for i in range(len(leader_board)): 
    top.append(leader_board[i].iloc[[0],:])

In [38]:
top = pd.concat(top, axis=0)

In [39]:
top

Unnamed: 0,model,score_val,fit_time,pred_time_val,stack_level
10,weighted_ensemble_k0_l1,0.635294,0.376105,0.000801,1
10,weighted_ensemble_k0_l1,0.597656,0.388626,0.000796,1
10,weighted_ensemble_k0_l1,0.607004,0.334271,0.000849,1
10,weighted_ensemble_k0_l1,0.616279,0.373659,0.000762,1
10,weighted_ensemble_k0_l1,0.589147,0.373688,0.000776,1
10,weighted_ensemble_k0_l1,0.620155,0.34092,0.000751,1
10,weighted_ensemble_k0_l1,0.616279,0.365451,0.000757,1
10,weighted_ensemble_k0_l1,0.624031,0.384156,0.00074,1
10,weighted_ensemble_k0_l1,0.589147,0.370661,0.000735,1
10,weighted_ensemble_k0_l1,0.589147,0.394797,0.000734,1


### Now retrain over the WHOLE training set 

In [40]:
train_data.shape

(286, 4930)

In [41]:
train_data.columns

Index(['(-70.0, 70.0)', '(-70.0, 72.5)', '(-70.0, 75.0)', '(-70.0, 77.5)',
       '(-70.0, 80.0)', '(-70.0, 82.5)', '(-70.0, 85.0)', '(-70.0, 87.5)',
       '(-70.0, 90.0)', '(-70.0, 92.5)',
       ...
       '(60.0, 280.0)', '(60.0, 282.5)', '(60.0, 285.0)', '(60.0, 287.5)',
       '(60.0, 290.0)', '(60.0, 292.5)', '(60.0, 295.0)', '(60.0, 297.5)',
       '(60.0, 300.0)', 'WSI_cat3_categories'],
      dtype='object', length=4930)

### first step: get the values, scale and PCA using the whole training data this time 

In [42]:
# get the numpy array containing the training set initial features (grid points)
X_train = train_data.drop(labels=[target_col],axis=1).values

# get the numpy array containing the training set target values (y)
y_train =  train_data.loc[:,target_col].values

# -----------
# standardize 

# initialise the scaler (standard scaler)
scaler = StandardScaler() 

# fit on the training set features array, and transform to obtain standardized values
X_train_std = scaler.fit_transform(X_train)

# apply the transformation on the test set initial features 
X_test_std = scaler.transform(X_test)

# -----------------------------
# Principal Component Analysis 

# instantiate the pca class, with percent of variance to keep  

skpca = pca.PCA(n_components=percent_variance)

# fit on the training initial (standardized) fedatures array, and transform to obtain the PCs
X_train_PCs = skpca.fit_transform(X_train_std)

# assign the training set PCs to a DataFrame 
df_train = pd.DataFrame(X_train_PCs) 

# add the target values to the training DataFrame 
df_train.loc[:,target_col] = y_train

### whether to tune or to using bagging and multi-layer stack ensembling

In [43]:
tune = False

In [44]:
if tune: 
    opath = opath.joinpath('tuned')
    if not opath.exists(): 
        opath.mkdir(parents=True)
    predictor = task.fit(train_data=df_train, label=target_col, auto_stack=False, hyperparameter_tune=True, output_directory=opath) 
else: 
    opath = opath.joinpath('ensemble')
    if not opath.exists(): 
        opath.mkdir(parents=True)    
    predictor = task.fit(train_data=df_train, label=target_col, auto_stack=True,  hyperparameter_tune=False, output_directory=opath) 

Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v2/autogluon_exp_SKPCA_ECMWF_pred_WSI_reg_TMEAN_targetvar_cat3_categories_target_type/ensemble/
Train Data Rows:    286
Train Data Columns: 34
Preprocessing data ...
Here are the first 10 unique label values in your data:  [1 2 3]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed)
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Feature Generator processed 286 data points with 33 features
Original Features:
	float features: 33
Generated Features:
	int features: 0
All Features:
	float features: 33
	int features: 0
	Data preprocessing and feature engineering runtime = 0.08s ...
AutoGluon will gauge predictive performance using evaluation metric: accuracy
To change this, specify the eval_metric argument of fit()
AutoG

### print the fit summary 

In [45]:
fit_summary = predictor.fit_summary()

*** Summary of fit() ***
Estimated performance of each model:
                                    model  score_val   fit_time  pred_time_val  stack_level
10                weighted_ensemble_k0_l1   0.643357   0.358242       0.000788            1
7           CatboostClassifier_STACKER_l0   0.643357   4.687348       0.045573            0
6           LightGBMClassifier_STACKER_l0   0.611888   1.590439       0.032051            0
8          NeuralNetClassifier_STACKER_l0   0.597902  12.160850       0.529975            0
9     LightGBMClassifierCustom_STACKER_l0   0.597902   7.346374       0.031115            0
3     ExtraTreesClassifierEntr_STACKER_l0   0.597902   2.296293       0.550567            0
2     ExtraTreesClassifierGini_STACKER_l0   0.583916   2.164485       0.596634            0
0   RandomForestClassifierGini_STACKER_l0   0.576923   2.679837       0.845425            0
1   RandomForestClassifierEntr_STACKER_l0   0.576923   2.672497       0.707136            0
5     KNeighborsCl

In [46]:
fit_summary.keys()

dict_keys(['model_types', 'model_performance', 'model_best', 'model_paths', 'model_fit_times', 'model_pred_times', 'num_bagging_folds', 'stack_ensemble_levels', 'feature_prune', 'hyperparameter_tune', 'hyperparameters_userspecified', 'num_classes', 'model_hyperparams', 'leaderboard'])

In [47]:
fit_summary['model_best']

'weighted_ensemble_k0_l1'

### get the leaderboard DataFrame 

In [48]:
d = predictor.leaderboard(silent=True)

In [49]:
d.sort_values(by='score_val', ascending=False)

Unnamed: 0,model,score_val,fit_time,pred_time_val,stack_level
10,weighted_ensemble_k0_l1,0.643357,0.358242,0.000788,1
7,CatboostClassifier_STACKER_l0,0.643357,4.687348,0.045573,0
6,LightGBMClassifier_STACKER_l0,0.611888,1.590439,0.032051,0
8,NeuralNetClassifier_STACKER_l0,0.597902,12.16085,0.529975,0
9,LightGBMClassifierCustom_STACKER_l0,0.597902,7.346374,0.031115,0
3,ExtraTreesClassifierEntr_STACKER_l0,0.597902,2.296293,0.550567,0
2,ExtraTreesClassifierGini_STACKER_l0,0.583916,2.164485,0.596634,0
0,RandomForestClassifierGini_STACKER_l0,0.576923,2.679837,0.845425,0
1,RandomForestClassifierEntr_STACKER_l0,0.576923,2.672497,0.707136,0
5,KNeighborsClassifierDist_STACKER_l0,0.566434,0.106958,0.583987,0


### Now evaluate on the independent test set 

In [50]:
test_data.shape

(33, 4930)

In [51]:
test_data.columns

Index(['(-70.0, 70.0)', '(-70.0, 72.5)', '(-70.0, 75.0)', '(-70.0, 77.5)',
       '(-70.0, 80.0)', '(-70.0, 82.5)', '(-70.0, 85.0)', '(-70.0, 87.5)',
       '(-70.0, 90.0)', '(-70.0, 92.5)',
       ...
       '(60.0, 280.0)', '(60.0, 282.5)', '(60.0, 285.0)', '(60.0, 287.5)',
       '(60.0, 290.0)', '(60.0, 292.5)', '(60.0, 295.0)', '(60.0, 297.5)',
       '(60.0, 300.0)', 'WSI_cat3_categories'],
      dtype='object', length=4930)

In [52]:
X_test = test_data.iloc[:,:-1].values 

In [53]:
X_test_std = scaler.transform(X_test)

In [54]:
X_test_PCs = skpca.transform(X_test_std)

In [55]:
y_hat_proba = predictor.predict_proba(pd.DataFrame(X_test_PCs, index=test_data.index))

In [56]:
eval_test = pd.DataFrame(test_data.iloc[:,-1].values.astype(np.int32), index=test_data.index)

In [57]:
eval_test = pd.concat([eval_test, pd.DataFrame(y_hat_proba * 100., index=test_data.index, columns=range(1,4))], axis=1)

In [58]:
eval_test.dropna().shape

(33, 4)

### calculate the "SCO" accuracy, which includes a tolerance of 5% 

In [59]:
print(f"SCO accuracy on the test data {calc_accuracy_sco(eval_test)}")

SCO accuracy on the test data 0.8181818181818182


### now evaluate using the evaluate method of the predictor

In [60]:
test_data.shape

(33, 4930)

In [61]:
test_data.columns

Index(['(-70.0, 70.0)', '(-70.0, 72.5)', '(-70.0, 75.0)', '(-70.0, 77.5)',
       '(-70.0, 80.0)', '(-70.0, 82.5)', '(-70.0, 85.0)', '(-70.0, 87.5)',
       '(-70.0, 90.0)', '(-70.0, 92.5)',
       ...
       '(60.0, 280.0)', '(60.0, 282.5)', '(60.0, 285.0)', '(60.0, 287.5)',
       '(60.0, 290.0)', '(60.0, 292.5)', '(60.0, 295.0)', '(60.0, 297.5)',
       '(60.0, 300.0)', 'WSI_cat3_categories'],
      dtype='object', length=4930)

In [62]:
predictor.evaluate_predictions(eval_test.iloc[:,0], eval_test.iloc[:,1:].idxmax(axis=1), auxiliary_metrics=False, detailed_report=False)

Evaluation: accuracy on test data: 0.7878787878787878


0.7878787878787878