In [1]:
# Parameters 

application = 'Apple_and_Pears'
varname = 'RAIN_BC'
num_quantiles = 3
target_type = f'cat{num_quantiles}'

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
%matplotlib inline

In [5]:
import sys 
import pathlib

In [6]:
import matplotlib.pyplot as plt 

In [7]:
import numpy as np 
import pandas as pd 

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [9]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold

In [10]:
import autogluon as ag
from autogluon import TabularPrediction as task

In [11]:
np.random.seed(42)

In [12]:
HOME = pathlib.Path.home()
CWD = pathlib.Path.cwd()

### read the target variable 

In [13]:
ipath_target = pathlib.Path(HOME.joinpath(f"research/Smart_Ideas/outputs/targets/application_cases/{application}/SEASONAL/{varname}"))

In [14]:
target = pd.read_csv(ipath_target.joinpath(f"Seasonal_{varname}_sum_anomalies_and_Q{num_quantiles}_categories.csv"), index_col=0, parse_dates=True)

In [15]:
target = target.loc[:,[target_type]]

### reads the SSTs, seasonal anomalies, pre-computed 

In [16]:
ipath_sst = pathlib.Path('/media/nicolasf/END19101/data/GCMs/processed/CDS/ECMWF/SST') 

In [17]:
interp = False

In [18]:
if interp: 
    lfiles_sst = list(ipath_sst.glob("CDS_ECMWF_SST_*_seasonal_anomalies_interp_1981_2010_clim.nc")) 
else: 
    lfiles_sst = list(ipath_sst.glob("CDS_ECMWF_SST_*_seasonal_anomalies_1981_2010_clim.nc")) 

In [19]:
lfiles_sst.sort() 

In [20]:
lfiles_sst[0]

PosixPath('/media/nicolasf/END19101/data/GCMs/processed/CDS/ECMWF/SST/CDS_ECMWF_SST_1981-01_seasonal_anomalies_1981_2010_clim.nc')

In [21]:
lfiles_sst[-1]

PosixPath('/media/nicolasf/END19101/data/GCMs/processed/CDS/ECMWF/SST/CDS_ECMWF_SST_2019-12_seasonal_anomalies_1981_2010_clim.nc')

In [22]:
import xarray as xr

In [23]:
dset_sst = xr.open_dataset(lfiles_sst[0])

In [24]:
dset_sst

In [25]:
dset_sst = xr.open_mfdataset(lfiles_sst, concat_dim='init_time', combine='nested')

In [26]:
dset_sst

Unnamed: 0,Array,Chunk
Bytes,12.20 GB,26.06 MB
Shape,"(468, 4, 25, 181, 360)","(1, 4, 25, 181, 360)"
Count,1872 Tasks,468 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.20 GB 26.06 MB Shape (468, 4, 25, 181, 360) (1, 4, 25, 181, 360) Count 1872 Tasks 468 Chunks Type float32 numpy.ndarray",4  468  360  181  25,

Unnamed: 0,Array,Chunk
Bytes,12.20 GB,26.06 MB
Shape,"(468, 4, 25, 181, 360)","(1, 4, 25, 181, 360)"
Count,1872 Tasks,468 Chunks
Type,float32,numpy.ndarray


### rename the time dimension 

In [27]:
dset_sst = dset_sst.rename({'init_time':'time'})

### shift the time dimension to the END of the month 

In [28]:
dset_sst_time = dset_sst.time.to_index()

In [29]:
dset_sst['time'] = (('time'), dset_sst_time + pd.offsets.MonthEnd(0)) 

### choose the step 

In [30]:
dset_sst = dset_sst.sel(step=3)

### choose the domain 

In [31]:
dset_sst = dset_sst.sel(lat=slice(-60, 40), lon=slice(120, 360-70)) 

In [32]:
dset_sst

Unnamed: 0,Array,Chunk
Bytes,808.28 MB,1.73 MB
Shape,"(468, 25, 101, 171)","(1, 25, 101, 171)"
Count,2808 Tasks,468 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 808.28 MB 1.73 MB Shape (468, 25, 101, 171) (1, 25, 101, 171) Count 2808 Tasks 468 Chunks Type float32 numpy.ndarray",468  1  171  101  25,

Unnamed: 0,Array,Chunk
Bytes,808.28 MB,1.73 MB
Shape,"(468, 25, 101, 171)","(1, 25, 101, 171)"
Count,2808 Tasks,468 Chunks
Type,float32,numpy.ndarray


### calculates the ensemble mean 

In [33]:
dset_sst = dset_sst.mean('member')

In [34]:
dset_sst = dset_sst.stack(s=('lat','lon'))

In [35]:
dset_sst

Unnamed: 0,Array,Chunk
Bytes,32.33 MB,69.08 kB
Shape,"(468, 17271)","(1, 17271)"
Count,4212 Tasks,468 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 32.33 MB 69.08 kB Shape (468, 17271) (1, 17271) Count 4212 Tasks 468 Chunks Type float32 numpy.ndarray",17271  468,

Unnamed: 0,Array,Chunk
Bytes,32.33 MB,69.08 kB
Shape,"(468, 17271)","(1, 17271)"
Count,4212 Tasks,468 Chunks
Type,float32,numpy.ndarray


### drop the land points 

In [36]:
dset_sst = dset_sst.dropna('s')

  x = np.divide(x1, x2, out)


In [37]:
dset_sst

Unnamed: 0,Array,Chunk
Bytes,28.84 MB,61.62 kB
Shape,"(468, 15404)","(1, 15404)"
Count,4680 Tasks,468 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 28.84 MB 61.62 kB Shape (468, 15404) (1, 15404) Count 4680 Tasks 468 Chunks Type float32 numpy.ndarray",15404  468,

Unnamed: 0,Array,Chunk
Bytes,28.84 MB,61.62 kB
Shape,"(468, 15404)","(1, 15404)"
Count,4680 Tasks,468 Chunks
Type,float32,numpy.ndarray


In [38]:
dset_sst.load()

  x = np.divide(x1, x2, out)


In [39]:
sst_data = dset_sst['sst'].data

In [40]:
sst_data.shape

(468, 15404)

In [41]:
df_sst = pd.DataFrame(sst_data, index=dset_sst.time.to_index())

In [42]:
df_sst

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,15394,15395,15396,15397,15398,15399,15400,15401,15402,15403
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1981-01-31,0.420477,0.402159,0.373720,0.362539,0.345699,0.307961,0.290995,0.285870,0.273656,0.187753,...,0.903952,1.003092,1.047032,0.942453,0.805086,0.730551,-0.369667,0.997024,1.691779,1.819921
1981-02-28,0.226975,0.206600,0.173060,0.164778,0.154950,0.128402,0.138299,0.145644,0.120826,0.068853,...,0.497912,0.682878,0.737187,0.617653,0.513984,0.516849,-0.682379,0.400085,1.012216,0.989149
1981-03-31,0.489565,0.427872,0.482587,0.518652,0.438454,0.326950,0.349469,0.410430,0.353357,0.311130,...,0.396609,0.510071,0.468583,0.455429,0.355425,0.437743,-0.153569,0.324657,0.776263,0.693833
1981-04-30,0.324038,0.393320,0.477153,0.444463,0.305492,0.205404,0.227977,0.252811,0.200682,0.133969,...,0.034266,0.037054,0.039491,0.090713,0.162139,0.275909,-0.585697,-0.350706,0.273264,-0.012705
1981-05-31,0.233295,0.292978,0.300281,0.256164,0.210682,0.181671,0.179627,0.190652,0.127618,0.095004,...,0.154811,-0.025500,0.088265,-0.061710,-0.084159,0.252453,-0.243840,0.240405,0.624972,0.697301
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-31,0.406261,0.462367,0.434161,0.396276,0.433629,0.503420,0.521224,0.506116,0.526794,0.463996,...,1.138187,1.217199,1.234264,0.910974,0.704313,0.128739,2.164932,2.025982,1.495981,1.433221
2019-09-30,0.448047,0.374718,0.281313,0.288863,0.383048,0.428772,0.417531,0.405470,0.437318,0.436709,...,1.012474,1.079373,1.165598,1.133055,0.731830,0.144130,1.796153,1.926061,1.497076,1.494471
2019-10-31,0.222869,0.191776,0.131536,0.154404,0.232343,0.265822,0.235059,0.228903,0.255634,0.238201,...,0.426517,0.596215,0.748149,0.400051,0.119670,-0.145228,1.471656,1.648478,1.423466,1.481002
2019-11-30,0.807123,0.791505,0.781172,0.790165,0.772367,0.766414,0.784490,0.847736,0.890010,0.927771,...,0.847689,0.891759,0.685061,0.329910,0.355276,0.360305,0.622526,0.864910,1.098253,1.226317


### make sure the target is at the right frequency 

In [43]:
target.index.freq = 'M'

In [44]:
target.index

DatetimeIndex(['1979-03-31', '1979-04-30', '1979-05-31', '1979-06-30',
               '1979-07-31', '1979-08-31', '1979-09-30', '1979-10-31',
               '1979-11-30', '1979-12-31',
               ...
               '2019-03-31', '2019-04-30', '2019-05-31', '2019-06-30',
               '2019-07-31', '2019-08-31', '2019-09-30', '2019-10-31',
               '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', name='time', length=490, freq='M')

In [45]:
df = pd.concat([df_sst, target], axis=1)

In [46]:
df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,15395,15396,15397,15398,15399,15400,15401,15402,15403,cat3
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1979-03-31,,,,,,,,,,,...,,,,,,,,,,3
1979-04-30,,,,,,,,,,,...,,,,,,,,,,3
1979-05-31,,,,,,,,,,,...,,,,,,,,,,3
1979-06-30,,,,,,,,,,,...,,,,,,,,,,1
1979-07-31,,,,,,,,,,,...,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-31,0.406261,0.462367,0.434161,0.396276,0.433629,0.503420,0.521224,0.506116,0.526794,0.463996,...,1.217199,1.234264,0.910974,0.704313,0.128739,2.164932,2.025982,1.495981,1.433221,2
2019-09-30,0.448047,0.374718,0.281313,0.288863,0.383048,0.428772,0.417531,0.405470,0.437318,0.436709,...,1.079373,1.165598,1.133055,0.731830,0.144130,1.796153,1.926061,1.497076,1.494471,2
2019-10-31,0.222869,0.191776,0.131536,0.154404,0.232343,0.265822,0.235059,0.228903,0.255634,0.238201,...,0.596215,0.748149,0.400051,0.119670,-0.145228,1.471656,1.648478,1.423466,1.481002,3
2019-11-30,0.807123,0.791505,0.781172,0.790165,0.772367,0.766414,0.784490,0.847736,0.890010,0.927771,...,0.891759,0.685061,0.329910,0.355276,0.360305,0.622526,0.864910,1.098253,1.226317,3


In [47]:
df = df.dropna(axis=0)

In [48]:
df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,15395,15396,15397,15398,15399,15400,15401,15402,15403,cat3
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1981-01-31,0.420477,0.402159,0.373720,0.362539,0.345699,0.307961,0.290995,0.285870,0.273656,0.187753,...,1.003092,1.047032,0.942453,0.805086,0.730551,-0.369667,0.997024,1.691779,1.819921,3
1981-02-28,0.226975,0.206600,0.173060,0.164778,0.154950,0.128402,0.138299,0.145644,0.120826,0.068853,...,0.682878,0.737187,0.617653,0.513984,0.516849,-0.682379,0.400085,1.012216,0.989149,3
1981-03-31,0.489565,0.427872,0.482587,0.518652,0.438454,0.326950,0.349469,0.410430,0.353357,0.311130,...,0.510071,0.468583,0.455429,0.355425,0.437743,-0.153569,0.324657,0.776263,0.693833,2
1981-04-30,0.324038,0.393320,0.477153,0.444463,0.305492,0.205404,0.227977,0.252811,0.200682,0.133969,...,0.037054,0.039491,0.090713,0.162139,0.275909,-0.585697,-0.350706,0.273264,-0.012705,2
1981-05-31,0.233295,0.292978,0.300281,0.256164,0.210682,0.181671,0.179627,0.190652,0.127618,0.095004,...,-0.025500,0.088265,-0.061710,-0.084159,0.252453,-0.243840,0.240405,0.624972,0.697301,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-31,0.406261,0.462367,0.434161,0.396276,0.433629,0.503420,0.521224,0.506116,0.526794,0.463996,...,1.217199,1.234264,0.910974,0.704313,0.128739,2.164932,2.025982,1.495981,1.433221,2
2019-09-30,0.448047,0.374718,0.281313,0.288863,0.383048,0.428772,0.417531,0.405470,0.437318,0.436709,...,1.079373,1.165598,1.133055,0.731830,0.144130,1.796153,1.926061,1.497076,1.494471,2
2019-10-31,0.222869,0.191776,0.131536,0.154404,0.232343,0.265822,0.235059,0.228903,0.255634,0.238201,...,0.596215,0.748149,0.400051,0.119670,-0.145228,1.471656,1.648478,1.423466,1.481002,3
2019-11-30,0.807123,0.791505,0.781172,0.790165,0.772367,0.766414,0.784490,0.847736,0.890010,0.927771,...,0.891759,0.685061,0.329910,0.355276,0.360305,0.622526,0.864910,1.098253,1.226317,3


In [49]:
train_data = df.loc[None:'2015-12',:]

In [50]:
test_data = df.loc['2016-01':None, :]

### saved models 

In [51]:
saved_models = pathlib.Path('./saved_models/AUTOGLUON_v3/')

In [52]:
opath = saved_models.joinpath(f'./autogluon_exp_SKPCA_SSTsim_1981_2010_pred_{application}_reg_{varname}_targetvar_{target_type}_target_type')

In [53]:
if not opath.exists(): 
    opath.mkdir(parents=True)

### initialise repeated stratified cross-validation

In [54]:
kfold = RepeatedStratifiedKFold(n_splits=5, n_repeats=2).split(train_data.drop(labels=[target_type],axis=1).values, train_data.loc[:,target_type].values)

In [55]:
percent_variance = 0.8

In [56]:
test_indices = []
y_preds = []
leader_board = []
perfs = []

for k, (train, test) in enumerate(kfold):
    
    print(f"ENTERING FOLD {k} ---- ")
    
    # saves the test indices
    test_indices.append(test)
    
    # get the numpy array containing the training set initial features (grid points)
    X_train = train_data.drop(labels=[target_type],axis=1).values[train]
    
    # get the numpy array containing the training set target values (y)
    y_train =  train_data.loc[:,target_type].values[train]
    
    # get the numpy array containing the test set initial features (grid points)
    X_test = train_data.drop(labels=[target_type],axis=1).values[test]
    
    # get the numpy array containing the test set target values (y)
    y_test = train_data.loc[:,target_type].values[test]
    
    # -----------
    # standardize 
    
    # initialise the scaler (standard scaler)
    scaler = StandardScaler() 
    
    # fit on the training set features array, and transform to obtain standardized values
    X_train_std = scaler.fit_transform(X_train)
    
    # apply the transformation on the test set initial features 
    X_test_std = scaler.transform(X_test)
    
    # -----------------------------
    # Principal Component Analysis 
    
    # instantiate the pca class, with percent of variance to keep  
    
    skpca = PCA(n_components=percent_variance)
    
    # fit on the training initial (standardized) fedatures array, and transform to obtain the PCs
    X_train_PC = skpca.fit_transform(X_train_std)
    
    # apply the transformation on the test set standardized features 
    X_test_PC = skpca.transform(X_test_std)
    
    # assign the training set PCs to a DataFrame 
    df_train = pd.DataFrame(X_train_PC) 
    
    # add the target values to the training DataFrame 
    df_train.loc[:,target_type] = y_train
    
    # assign the test set Pcs to a DataFrame 
    df_test = pd.DataFrame(X_test_PC) 
    
    # add the target values to the test DataFrame
    df_test.loc[:,target_type] = y_test
    
    # fit the task predictor on the training set DataFrame 
    predictor = task.fit(train_data=df_train, label=target_type, auto_stack=True, output_directory=opath)
    
    # predict the probabilities for each class from the test set features DataFrame (droping the target values column)
#     y_pred_proba = predictor.predict_proba(df_test.drop(labels=[region_name],axis=1))
    
    # predict the class value itself
    y_pred = predictor.predict(df_test.drop(labels=[target_type],axis=1))
    
    # records the probabilities for the classes on the test set 
    y_preds.append(y_pred)
    
    # get the leaderboard DataFrame 
    d = predictor.leaderboard(silent=True)
    
    # records the leaderboard DataFrame 
    leader_board.append(d)
    
    perfs.append(predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True, silent=True))
    
    print(f"EXITING FOLD {k} ---- ")

ENTERING FOLD 0 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 6
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 5 features
Original Features (raw dtypes):
	float32 features: 5
Original Features (inferred dtypes):
	float features: 5
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 5
Final Features:
	float features: 5
	Data preprocessing and feature engineering runtime

EXITING FOLD 0 ---- 
ENTERING FOLD 1 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 6
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 5 features
Original Features (raw dtypes):
	float32 features: 5
Original Features (inferred dtypes):
	float features: 5
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 5
Final Features:
	float features: 5
	Data preprocessing and feature engineering runtime

EXITING FOLD 1 ---- 
ENTERING FOLD 2 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 15
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 14 features
Original Features (raw dtypes):
	float32 features: 14
Original Features (inferred dtypes):
	float features: 14
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 14
Final Features:
	float features: 14
	Data preprocessing and feature engineering r

EXITING FOLD 2 ---- 
ENTERING FOLD 3 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 6
Preprocessing data ...
Here are the 3 unique label values in your data:  [2, 3, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 5 features
Original Features (raw dtypes):
	float32 features: 5
Original Features (inferred dtypes):
	float features: 5
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 5
Final Features:
	float features: 5
	Data preprocessing and feature engineering runtime

EXITING FOLD 3 ---- 
ENTERING FOLD 4 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 6
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 5 features
Original Features (raw dtypes):
	float32 features: 5
Original Features (inferred dtypes):
	float features: 5
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 5
Final Features:
	float features: 5
	Data preprocessing and feature engineering runtime

EXITING FOLD 4 ---- 
ENTERING FOLD 5 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 6
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 5 features
Original Features (raw dtypes):
	float32 features: 5
Original Features (inferred dtypes):
	float features: 5
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 5
Final Features:
	float features: 5
	Data preprocessing and feature engineering runtime

EXITING FOLD 5 ---- 
ENTERING FOLD 6 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 16
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 15 features
Original Features (raw dtypes):
	float32 features: 15
Original Features (inferred dtypes):
	float features: 15
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 15
Final Features:
	float features: 15
	Data preprocessing and feature engineering r

EXITING FOLD 6 ---- 
ENTERING FOLD 7 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 6
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 5 features
Original Features (raw dtypes):
	float32 features: 5
Original Features (inferred dtypes):
	float features: 5
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 5
Final Features:
	float features: 5
	Data preprocessing and feature engineering runtime

EXITING FOLD 7 ---- 
ENTERING FOLD 8 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 6
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 5 features
Original Features (raw dtypes):
	float32 features: 5
Original Features (inferred dtypes):
	float features: 5
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 5
Final Features:
	float features: 5
	Data preprocessing and feature engineering runtime

EXITING FOLD 8 ---- 
ENTERING FOLD 9 ---- 


Beginning AutoGluon training ...
AutoGluon will save models to saved_models/AUTOGLUON_v3/autogluon_exp_SKPCA_SSTsim_1981_2010_pred_Apple_and_Pears_reg_RAIN_BC_targetvar_cat3_target_type/
AutoGluon Version:  0.0.12
Train Data Rows:    336
Train Data Columns: 6
Preprocessing data ...
Here are the 3 unique label values in your data:  [3, 2, 1]
AutoGluon infers your prediction problem is: multiclass  (because dtype of label-column == int, but few unique label-values observed).
If this is wrong, please specify `problem_type` argument in fit() instead (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])

Train Data Class Count: 3
Feature Generator processed 336 data points with 5 features
Original Features (raw dtypes):
	float32 features: 5
Original Features (inferred dtypes):
	float features: 5
Generated Features (special dtypes):
Final Features (raw dtypes):
	float features: 5
Final Features:
	float features: 5
	Data preprocessing and feature engineering runtime

EXITING FOLD 9 ---- 


In [57]:
df_perfs = pd.DataFrame(perfs)

In [58]:
df_perfs

Unnamed: 0,accuracy,accuracy_score,balanced_accuracy_score,matthews_corrcoef,classification_report
0,0.404762,0.404762,0.40125,0.105614,"{'1': {'precision': 0.4411764705882353, 'recal..."
1,0.369048,0.369048,0.369762,0.056294,"{'1': {'precision': 0.4090909090909091, 'recal..."
2,0.52381,0.52381,0.520009,0.286932,"{'1': {'precision': 0.5405405405405406, 'recal..."
3,0.47619,0.47619,0.475324,0.213434,"{'1': {'precision': 0.4838709677419355, 'recal..."
4,0.428571,0.428571,0.424691,0.141425,"{'1': {'precision': 0.5161290322580645, 'recal..."
5,0.404762,0.404762,0.405948,0.107614,"{'1': {'precision': 0.37037037037037035, 'reca..."
6,0.488095,0.488095,0.485495,0.231641,"{'1': {'precision': 0.5333333333333333, 'recal..."
7,0.369048,0.369048,0.366767,0.052239,"{'1': {'precision': 0.375, 'recall': 0.4137931..."
8,0.380952,0.380952,0.381287,0.071444,"{'1': {'precision': 0.39285714285714285, 'reca..."
9,0.464286,0.464286,0.461728,0.194108,"{'1': {'precision': 0.4444444444444444, 'recal..."


In [59]:
df_perfs.mean()

accuracy                   0.430952
accuracy_score             0.430952
balanced_accuracy_score    0.429226
matthews_corrcoef          0.146074
dtype: float64