# This script tests the inclusion of different features for the classification scheme

In [1]:
#import the machine learning tools

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import cross_val_score
import joblib
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from itertools import product
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import recall_score

In [2]:
#choose classifier and test range of results
clf1 = KNeighborsClassifier(n_neighbors=1)
clf2 = RandomForestClassifier(max_depth=100)
clf3 = MLPClassifier(alpha=0.01,hidden_layer_sizes=(500, ), max_iter=1500)

clf = VotingClassifier(estimators=[('kn', clf1), ('rf', clf2), ('nn', clf3)], voting='soft')#, weights=[2, 1, 2])

In [3]:
# import build-in
import numpy as np

# import local
import utility_classification as ut_cla
import labels_create_ENSO as labels

In [4]:
# Test nino3/4/1.2 regions DJF

In [5]:
#set up path to the data
path = '/home/nicola/Documents/classification_clean/data/regions/'

#create dictionary with all input data
dict_data_struc ={
        1:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 42,
           'firstmon' :10},
        2:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :10},
        3:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 42,
           'firstmon' :10},
        4:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :10},
        5:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :10},
        6:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' :42,
           'firstmon' :10},
        7:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :10},
        8:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 42,
           'firstmon' :10},
        9:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :10},
        10:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :10}, 
        11:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 42,
           'firstmon' :10},
        12:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :10},
        13:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 42,
           'firstmon' :10},
        14:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :10},
        15:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :10}, 
        16:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 46,
           'firstmon' :10},
        17:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 46,
           'firstmon' :10},
        18:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 46,
           'firstmon' :10},
        19:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 46,
           'firstmon' :10},
        20:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 46,
           'firstmon' :10}, 
        21:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 5,
           'firstmon' :10},
        22:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 5,
           'firstmon' :10},
        23:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 5,
           'firstmon' :10},
        24:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 5,
           'firstmon' :10},
        25:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 5,
           'firstmon' :10},       
        26:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        27:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        28:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        29:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        30:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        31:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 40,
           'firstmon' :10},
        32:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 40,
           'firstmon' :10},
        33:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 40,
           'firstmon' :10},
        34:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 40,
           'firstmon' :10},
        35:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 40,
           'firstmon' :10},
        36:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        37:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        38:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        39:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        40:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        41:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :2},
        42:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :2},
        43:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :2},
        44:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :2},
        45:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :2},
        46:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        47:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        48:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        49:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        50:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        51:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        52:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        53:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        54:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        55:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        56:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        57:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        58:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        59:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        60:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        61:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        62:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        63:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        64:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        65:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        66:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        67:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        68:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        69:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        70:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        71:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        72:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        73:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        74:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        75:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        76:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        77:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        78:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        79:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        80:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        81:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        82:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        83:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        84:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        85:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        86:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        87:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        88:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        89:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        90:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        91:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        92:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        93:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        94:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        95:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        96:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :10},
        97:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        98:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :10},
        99:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        100:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        101:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 0,
           'firstmon' :5},
        102:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :5},
        103:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 0,
           'firstmon' :5},
        104:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :5},
        105:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :5},
        106:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3.nc',
           'crop' : 26,
           'firstmon' :10},
        107:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 26,
           'firstmon' :10},
        108:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4.nc',
           'crop' : 26,
           'firstmon' :10},
        109:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 26,
           'firstmon' :10},
        110:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 26,
           'firstmon' :10}
                 }

In [6]:
#process all data
result = ut_cla.preprocess_from_dict(dict_data_struc, path, verbose=False)

# Concatenate 
def concat_features(result, keys, axis=0):
    list_= [result[key] for key in keys]
    return np.concatenate(list_,axis=axis)
    
keys=[1,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101] # key to concatenate
concat_N3= concat_features(result, keys)

keys=[3,8,13,18,23,28,33,38,43,48,53,58,63,68,73,78,83,88,93,98,103] # key to concatenate
concat_N4= concat_features(result, keys)

keys=[5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105] # key to concatenate
concat_N12= concat_features(result, keys)
##

keys=[106,108,110] # key to concatenate
input_testa= concat_features(result, [106], axis=1)
input_testb= concat_features(result, [108], axis=1)
input_testc= concat_features(result, [110], axis=1)

In [7]:
#create DJF means
concat_N3_DJF=np.mean(concat_N3[:,2:5],axis=1)
concat_N4_DJF=np.mean(concat_N4[:,2:5],axis=1)
concat_N12_DJF=np.mean(concat_N12[:,2:5],axis=1)
input_test2a=np.mean(input_testa[:,2:5],axis=1)
input_test2b=np.mean(input_testb[:,2:5],axis=1)
input_test2c=np.mean(input_testc[:,2:5],axis=1)

concat2_N3_DJF=concat_N3_DJF.reshape(1325,1)
concat2_N4_DJF=concat_N4_DJF.reshape(1325,1)
concat2_N12_DJF=concat_N12_DJF.reshape(1325,1)
input_test3a=input_test2a.reshape(123,1)
input_test3b=input_test2b.reshape(123,1)
input_test3c=input_test2c.reshape(123,1)

input_train3=np.concatenate([concat2_N3_DJF,concat2_N4_DJF,concat2_N12_DJF],axis=1)
input_test3=np.concatenate([input_test3a,input_test3b,input_test3c],axis=1)

labels_train3,labels_test3=labels.create_labels()

In [8]:
#Scores for Test nino3/4/1.2 regions DJF
clf.fit(input_train3, labels_train3)
pred=clf.predict(input_test3)
a_score=accuracy_score(labels_test3, pred)
p_score_CP=precision_score(labels_test3, pred,labels=['CP'], average=None)
p_score_EL=precision_score(labels_test3, pred,labels=['EL'], average=None)
p_score_LN=precision_score(labels_test3, pred,labels=['LN'], average=None)
p_score_NE=precision_score(labels_test3, pred,labels=['NE'], average=None)
r_score_CP=recall_score(labels_test3, pred,labels=['CP'], average=None)
r_score_EL=recall_score(labels_test3, pred,labels=['EL'], average=None)
r_score_LN=recall_score(labels_test3, pred,labels=['LN'], average=None)
r_score_NE=recall_score(labels_test3, pred,labels=['NE'], average=None)
clf_score=cross_val_score(clf, input_train3, labels_train3, cv=5).mean()

#print scores    
print('clf',clf_score)
print('acc',a_score)
print('CP',p_score_CP)
print('EL',p_score_EL)
print('LN',p_score_LN)
print('NE',p_score_NE)
print('R-CP',r_score_CP)
print('R-EL',r_score_EL)
print('R-LN',r_score_LN)
print('R-NE',r_score_NE)


clf 0.7335849056603774
acc 0.7398373983739838
CP [0.4375]
EL [0.68421053]
LN [1.]
NE [0.77027027]
R-CP [0.53846154]
R-EL [0.65]
R-LN [0.53846154]
R-NE [0.890625]


In [9]:
# Test nino3/4/1.2 regions October -> March

input_train=np.concatenate([concat_N3,concat_N4,concat_N12],axis=1)

keys=[106,108,110] # key to concatenate
input_test= concat_features(result, keys, axis=1)


#create labels
labels_train,labels_test=labels.create_labels()


In [10]:
#Scores for Test nino3/4/1.2 regions October -> March

clf.fit(input_train, labels_train)
pred=clf.predict(input_test)
a_score=accuracy_score(labels_test, pred)
p_score_CP=precision_score(labels_test, pred,labels=['CP'], average=None)
p_score_EL=precision_score(labels_test, pred,labels=['EL'], average=None)
p_score_LN=precision_score(labels_test, pred,labels=['LN'], average=None)
p_score_NE=precision_score(labels_test, pred,labels=['NE'], average=None)
r_score_CP=recall_score(labels_test, pred,labels=['CP'], average=None)
r_score_EL=recall_score(labels_test, pred,labels=['EL'], average=None)
r_score_LN=recall_score(labels_test, pred,labels=['LN'], average=None)
r_score_NE=recall_score(labels_test, pred,labels=['NE'], average=None)
clf_score=cross_val_score(clf, input_train, labels_train, cv=5).mean()

#print scores    
print('clf',clf_score)
print('acc',a_score)
print('CP',p_score_CP)
print('EL',p_score_EL)
print('LN',p_score_LN)
print('NE',p_score_NE)
print('R-CP',r_score_CP)
print('R-EL',r_score_EL)
print('R-LN',r_score_LN)
print('R-NE',r_score_NE)

clf 0.9079245283018867
acc 0.9349593495934959
CP [0.92307692]
EL [1.]
LN [1.]
NE [0.9]
R-CP [0.92307692]
R-EL [1.]
R-LN [0.76923077]
R-NE [0.984375]


In [11]:
# Test addition of extra nino regions 3E/W ect. 

In [12]:
#set up path to the data
path = '/home/nicola/Documents/classification_clean/data/regions/'

#create dictionary with all input data
dict_data_struc ={
        1:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 42,
           'firstmon' :10},
        2:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :10},
        3:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :10},
        4:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :10},
        5:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :10},
        6:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' :42,
           'firstmon' :10},
        7:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :10},
        8:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :10},
        9:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :10},
        10:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :10}, 
        11:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 42,
           'firstmon' :10},
        12:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :10},
        13:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :10},
        14:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :10},
        15:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :10}, 
        16:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 46,
           'firstmon' :10},
        17:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 46,
           'firstmon' :10},
        18:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 46,
           'firstmon' :10},
        19:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 46,
           'firstmon' :10},
        20:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 46,
           'firstmon' :10}, 
        21:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 5,
           'firstmon' :10},
        22:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 5,
           'firstmon' :10},
        23:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 5,
           'firstmon' :10},
        24:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 5,
           'firstmon' :10},
        25:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 5,
           'firstmon' :10},       
        26:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        27:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        28:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        29:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        30:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        31:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 40,
           'firstmon' :10},
        32:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 40,
           'firstmon' :10},
        33:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 40,
           'firstmon' :10},
        34:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 40,
           'firstmon' :10},
        35:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 40,
           'firstmon' :10},
        36:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        37:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        38:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        39:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        40:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        41:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :2},
        42:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :2},
        43:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :2},
        44:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :2},
        45:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :2},
        46:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        47:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        48:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        49:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        50:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        51:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        52:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        53:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        54:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        55:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        56:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        57:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        58:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        59:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        60:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        61:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        62:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        63:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        64:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        65:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        66:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        67:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        68:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        69:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        70:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        71:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        72:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        73:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        74:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        75:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        76:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        77:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        78:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        79:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        80:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        81:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        82:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        83:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        84:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        85:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        86:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        87:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        88:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        89:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        90:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        91:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        92:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        93:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        94:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        95:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        96:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        97:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        98:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        99:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        100:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        101:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :5},
        102:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :5},
        103:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :5},
        104:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :5},
        105:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :5},
        106:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 26,
           'firstmon' :10},
        107:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 26,
           'firstmon' :10},
        108:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 26,
           'firstmon' :10},
        109:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 26,
           'firstmon' :10},
        110:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 26,
           'firstmon' :10}
                 }

In [13]:
# Concatenate 
def concat_features(result, keys, axis=0):
    list_= [result[key] for key in keys]
    return np.concatenate(list_,axis=axis)
    
keys=[1,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101] # key to concatenate
concat_N3W= concat_features(result, keys)

keys=[2,7,12,17,22,27,32,37,42,47,52,57,62,67,72,77,82,87,92,97,102] # key to concatenate
concat_N3E= concat_features(result, keys)

keys=[3,8,13,18,23,28,33,38,43,48,53,58,63,68,73,78,83,88,93,98,103] # key to concatenate
concat_N4W= concat_features(result, keys)

keys=[4,9,14,19,24,29,34,39,44,49,54,59,64,69,74,79,84,89,94,99,104] # key to concatenate
concat_N4E= concat_features(result, keys)

keys=[5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105] # key to concatenate
concat_N12= concat_features(result, keys)
##

input_train=np.concatenate([concat_N3W,concat_N3E,concat_N4W,concat_N4E,concat_N12],axis=1)

keys=[106,107,108,109,110] # key to concatenate
input_test= concat_features(result, keys, axis=1)

#create labels
labels_train,labels_test=labels.create_labels()

In [14]:
#Scores for Test addition of extra nino regions 3E/W ect. 
clf.fit(input_train, labels_train)
pred=clf.predict(input_test)
a_score=accuracy_score(labels_test, pred)
p_score_CP=precision_score(labels_test, pred,labels=['CP'], average=None)
p_score_EL=precision_score(labels_test, pred,labels=['EL'], average=None)
p_score_LN=precision_score(labels_test, pred,labels=['LN'], average=None)
p_score_NE=precision_score(labels_test, pred,labels=['NE'], average=None)
r_score_CP=recall_score(labels_test, pred,labels=['CP'], average=None)
r_score_EL=recall_score(labels_test, pred,labels=['EL'], average=None)
r_score_LN=recall_score(labels_test, pred,labels=['LN'], average=None)
r_score_NE=recall_score(labels_test, pred,labels=['NE'], average=None)
clf_score=cross_val_score(clf, input_train, labels_train, cv=5).mean()

#print scores    
print('clf',clf_score)
print('acc',a_score)
print('CP',p_score_CP)
print('EL',p_score_EL)
print('LN',p_score_LN)
print('NE',p_score_NE)
print('R-CP',r_score_CP)
print('R-EL',r_score_EL)
print('R-LN',r_score_LN)
print('R-NE',r_score_NE)

clf 0.9064150943396226
acc 0.9186991869918699
CP [0.92307692]
EL [1.]
LN [1.]
NE [0.875]
R-CP [0.92307692]
R-EL [1.]
R-LN [0.69230769]
R-NE [0.984375]


In [15]:
# Test addition of more months - June -> March

In [16]:
#import new labels
import labels_create_ENSO2 as labels

In [17]:
#set up path to the data
path = '/home/nicola/Documents/classification_clean/data/regions/'

#create dictionary with all input data
dict_data_struc ={
        1:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 42,
           'firstmon' :6},
        2:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :6},
        3:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :6},
        4:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :6},
        5:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :6},
        6:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' :42,
           'firstmon' :6},
        7:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :6},
        8:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :6},
        9:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :6},
        10:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :6}, 
        11:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 42,
           'firstmon' :6},
        12:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :6},
        13:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :6},
        14:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :6},
        15:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :6}, 
        16:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 46,
           'firstmon' :6},
        17:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 46,
           'firstmon' :6},
        18:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 46,
           'firstmon' :6},
        19:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 46,
           'firstmon' :6},
        20:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 46,
           'firstmon' :6}, 
        21:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 5,
           'firstmon' :6},
        22:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 5,
           'firstmon' :6},
        23:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 5,
           'firstmon' :6},
        24:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 5,
           'firstmon' :6},
        25:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 5,
           'firstmon' :6},       
        26:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        27:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        28:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        29:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        30:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        31:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 40,
           'firstmon' :6},
        32:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 40,
           'firstmon' :6},
        33:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 40,
           'firstmon' :6},
        34:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 40,
           'firstmon' :6},
        35:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 40,
           'firstmon' :6},
        36:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        37:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        38:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        39:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        40:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        41:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        42:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        43:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        44:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        45:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        46:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        47:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        48:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        49:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        50:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        51:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        52:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        53:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        54:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        55:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        56:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        57:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        58:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        59:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        60:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        61:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        62:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        63:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        64:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        65:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        66:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        67:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        68:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        69:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        70:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        71:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        72:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        73:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        74:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        75:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        76:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        77:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        78:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        79:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        80:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        81:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        82:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        83:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        84:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        85:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        86:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        87:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        88:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        89:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        90:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        91:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        92:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        93:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        94:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        95:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        96:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        97:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        98:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        99:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        100:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        101:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :1},
        102:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :1},
        103:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :1},
        104:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :1},
        105:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :1},
        106:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 26,
           'firstmon' :6},
        107:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 26,
           'firstmon' :6},
        108:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 26,
           'firstmon' :6},
        109:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 26,
           'firstmon' :6},
        110:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 26,
           'firstmon' :6}
                 }

In [18]:
#process all data
result = ut_cla.preprocess_from_dict2(dict_data_struc, path, verbose=False) 


In [19]:
# Concatenate 
def concat_features(result, keys, axis=0):
    list_= [result[key] for key in keys]
    return np.concatenate(list_,axis=axis)
    
keys=[1,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101] # key to concatenate
concat_N3W= concat_features(result, keys)

keys=[2,7,12,17,22,27,32,37,42,47,52,57,62,67,72,77,82,87,92,97,102] # key to concatenate
concat_N3E= concat_features(result, keys)

keys=[3,8,13,18,23,28,33,38,43,48,53,58,63,68,73,78,83,88,93,98,103] # key to concatenate
concat_N4W= concat_features(result, keys)

keys=[4,9,14,19,24,29,34,39,44,49,54,59,64,69,74,79,84,89,94,99,104] # key to concatenate
concat_N4E= concat_features(result, keys)

keys=[5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105] # key to concatenate
concat_N12= concat_features(result, keys)
##

input_train=np.concatenate([concat_N3W,concat_N3E,concat_N4W,concat_N4E,concat_N12],axis=1)

keys=[106,107,108,109,110] # key to concatenate
input_test= concat_features(result, keys, axis=1)

In [20]:
#create labels
labels_train,labels_test=labels.create_labels()

In [21]:
#Scores for  Test addition of more months - June -> March
clf.fit(input_train, labels_train)
pred=clf.predict(input_test)
a_score=accuracy_score(labels_test, pred)
p_score_CP=precision_score(labels_test, pred,labels=['CP'], average=None)
p_score_EL=precision_score(labels_test, pred,labels=['EL'], average=None)
p_score_LN=precision_score(labels_test, pred,labels=['LN'], average=None)
p_score_NE=precision_score(labels_test, pred,labels=['NE'], average=None)
r_score_CP=recall_score(labels_test, pred,labels=['CP'], average=None)
r_score_EL=recall_score(labels_test, pred,labels=['EL'], average=None)
r_score_LN=recall_score(labels_test, pred,labels=['LN'], average=None)
r_score_NE=recall_score(labels_test, pred,labels=['NE'], average=None)
clf_score=cross_val_score(clf, input_train, labels_train, cv=5).mean()

#print scores    
print('clf',clf_score)
print('acc',a_score)
print('CP',p_score_CP)
print('EL',p_score_EL)
print('LN',p_score_LN)
print('NE',p_score_NE)
print('R-CP',r_score_CP)
print('R-EL',r_score_EL)
print('R-LN',r_score_LN)
print('R-NE',r_score_NE)

clf 0.9554716981132074
acc 0.959349593495935
CP [1.]
EL [1.]
LN [1.]
NE [0.92753623]
R-CP [0.92307692]
R-EL [1.]
R-LN [0.84615385]
R-NE [1.]


In [22]:
# Test addition of NSubtropical region

In [23]:
#set up path to the data
path = '/home/nicola/Documents/classification_clean/data/regions/'

#create dictionary with all input data
dict_data_struc ={
        1:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 42,
           'firstmon' :6},
        2:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :6},
        3:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :6},
        4:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :6},
        5:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :6},
        6:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' :42,
           'firstmon' :6},
        7:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :6},
        8:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :6},
        9:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :6},
        10:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :6}, 
        11:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 42,
           'firstmon' :6},
        12:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 42,
           'firstmon' :6},
        13:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 42,
           'firstmon' :6},
        14:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 42,
           'firstmon' :6},
        15:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 42,
           'firstmon' :6}, 
        16:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 46,
           'firstmon' :6},
        17:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 46,
           'firstmon' :6},
        18:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 46,
           'firstmon' :6},
        19:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 46,
           'firstmon' :6},
        20:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 46,
           'firstmon' :6}, 
        21:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 5,
           'firstmon' :6},
        22:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 5,
           'firstmon' :6},
        23:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 5,
           'firstmon' :6},
        24:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 5,
           'firstmon' :6},
        25:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 5,
           'firstmon' :6},       
        26:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        27:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        28:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        29:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        30:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        31:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 40,
           'firstmon' :6},
        32:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 40,
           'firstmon' :6},
        33:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 40,
           'firstmon' :6},
        34:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 40,
           'firstmon' :6},
        35:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 40,
           'firstmon' :6},
        36:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        37:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        38:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        39:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        40:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        41:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :10},
        42:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :10},
        43:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :10},
        44:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :10},
        45:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :10},
        46:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        47:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        48:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        49:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        50:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        51:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        52:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        53:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        54:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        55:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        56:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        57:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        58:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        59:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        60:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        61:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        62:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        63:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        64:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        65:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        66:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        67:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        68:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        69:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        70:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        71:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        72:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        73:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        74:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        75:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        76:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        77:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        78:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        79:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        80:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        81:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        82:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        83:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        84:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        85:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        86:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        87:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        88:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        89:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        90:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        91:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        92:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        93:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        94:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        95:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        96:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :6},
        97:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :6},
        98:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :6},
        99:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :6},
        100:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :6},
        101:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 0,
           'firstmon' :1},
        102:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 0,
           'firstmon' :1},
        103:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 0,
           'firstmon' :1},
        104:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 0,
           'firstmon' :1},
        105:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 0,
           'firstmon' :1},
        106:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3W.nc',
           'crop' : 26,
           'firstmon' :6},
        107:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino3E.nc',
           'crop' : 26,
           'firstmon' :6},
        108:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4W.nc',
           'crop' : 26,
           'firstmon' :6},
        109:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino4E.nc',
           'crop' : 26,
           'firstmon' :6},
        110:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_nino12.nc',
           'crop' : 26,
           'firstmon' :6},
        111:{'filename' : 'ERSSTv3b.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 42,
           'firstmon' :10},        
        112:{'filename' : 'ERSSTv4.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' :42,
           'firstmon' :10},
        113:{'filename' : 'ERSSTv5.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 42,
           'firstmon' :10},
        114:{'filename' : 'COBESST2.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 46,
           'firstmon' :10},
        115:{'filename' : 'COBEv1.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 5,
           'firstmon' :10},
        116:{'filename' : 'GODAS_1980_2020_processed.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        117:{'filename' : 'kaplan_sst.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 40,
           'firstmon' :10},
        118:{'filename' : 'GECCO2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        119:{'filename' : 'oisst-avhrr-v02r01__sst__monthly__UHAM-ICDC_198109_202004.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :2},
        120:{'filename' : 'ORAS4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        121:{'filename' : 'ORAs5_opa0_processed2.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        122:{'filename' : 'ORAs5_opa1_processed2.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        123:{'filename' : 'ORAs5_opa2_processed2.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        124:{'filename' : 'ORAs5_opa3_processed2.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        125:{'filename' : 'ORAs5_opa4_processed2.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        126:{'filename' : 'soda3.11.2_1980_2015_processed3.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        127:{'filename' : 'soda3.12.2_1980_2017_processed3.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        128:{'filename' : 'soda3.4.2_1980_2018_processed3.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        129:{'filename' : 'soda3.6.1_1980_2009_processed3.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        130:{'filename' : 'soda3.7.2_1980_2016_processed3.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :10},
        131:{'filename' : 'AMSRE_v07.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 0,
           'firstmon' :5},
        132:{'filename' : 'HadISST_sst.nc_regrid.nc_detrended.nc_deseason.nc_NStop.nc',
           'crop' : 26,
           'firstmon' :10}
                 }

In [24]:
#process all data
result = ut_cla.preprocess_from_dict2(dict_data_struc, path, verbose=False) 


#create labels
labels_train,labels_test=labels.create_labels()

In [25]:
# Concatenate 
def concat_features(result, keys, axis=0):
    list_= [result[key] for key in keys]
    return np.concatenate(list_,axis=axis)
    
keys=[1,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101] # key to concatenate
concat_N3W= concat_features(result, keys)

keys=[2,7,12,17,22,27,32,37,42,47,52,57,62,67,72,77,82,87,92,97,102] # key to concatenate
concat_N3E= concat_features(result, keys)

keys=[3,8,13,18,23,28,33,38,43,48,53,58,63,68,73,78,83,88,93,98,103] # key to concatenate
concat_N4W= concat_features(result, keys)

keys=[4,9,14,19,24,29,34,39,44,49,54,59,64,69,74,79,84,89,94,99,104] # key to concatenate
concat_N4E= concat_features(result, keys)

keys=[5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105] # key to concatenate
concat_N12= concat_features(result, keys)

keys=[111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131]
concat_NStop= concat_features(result, keys)
##

input_train=np.concatenate([concat_N3W,concat_N3E,concat_N4W,concat_N4E,concat_N12,concat_NStop],axis=1)

keys=[106,107,108,109,110,132] # key to concatenate
input_test= concat_features(result, keys, axis=1)

In [27]:

a_score=[0 for i in range(10)]
p_score_CP=[0 for i in range(10)]
p_score_EL=[0 for i in range(10)]
p_score_LN=[0 for i in range(10)]
p_score_NE=[0 for i in range(10)]
r_score_CP=[0 for i in range(10)]
r_score_EL=[0 for i in range(10)]
r_score_LN=[0 for i in range(10)]
r_score_NE=[0 for i in range(10)]
clf_score=[0 for i in range(10)]


clf1 = KNeighborsClassifier(n_neighbors=1)
clf3 = RandomForestClassifier(max_depth=100)
clf4 = MLPClassifier(alpha=0.01,hidden_layer_sizes=(500, ), max_iter=1500)


clf = VotingClassifier(estimators=[('kNN', clf1),('RF', clf3), ('NN', clf4)], voting='soft')#, weights=[2, 1, 2


for x in range(0, 10):
    print(x)

    #choose classifier to be 
    clf.fit(input_train, labels_train)
    pred=clf.predict(input_test)
    a_score[x]=accuracy_score(labels_test, pred)
    p_score_CP[x]=precision_score(labels_test, pred,labels=['CP'], average=None)
    p_score_EL[x]=precision_score(labels_test, pred,labels=['EL'], average=None)
    p_score_LN[x]=precision_score(labels_test, pred,labels=['LN'], average=None)
    p_score_NE[x]=precision_score(labels_test, pred,labels=['NE'], average=None)
    r_score_CP[x]=recall_score(labels_test, pred,labels=['CP'], average=None)
    r_score_EL[x]=recall_score(labels_test, pred,labels=['EL'], average=None)
    r_score_LN[x]=recall_score(labels_test, pred,labels=['LN'], average=None)
    r_score_NE[x]=recall_score(labels_test, pred,labels=['NE'], average=None)
    clf_score[x]=cross_val_score(clf, input_train, labels_train, cv=5).mean()

    
print('neural net')
print('clf mean',np.mean(clf_score))
print('clf max',np.max(clf_score))
print('clf min',np.min(clf_score))

print('acc mean',np.mean(a_score))
print('acc max',np.max(a_score))
print('acc min',np.min(a_score))

print('CP mean',np.mean(p_score_CP))
print('CP max',np.max(p_score_CP))
print('CP min',np.min(p_score_CP))

print('EL mean',np.mean(p_score_EL))
print('EL max',np.max(p_score_EL))
print('EL min',np.min(p_score_EL))

print('LN mean',np.mean(p_score_LN))
print('LN max',np.max(p_score_LN))
print('LN min',np.min(p_score_LN))

print('NE mean',np.mean(p_score_NE))
print('NE max',np.max(p_score_NE))
print('NE min',np.min(p_score_NE))

print('R-NE mean',np.mean(r_score_NE))
print('R-NE max',np.max(r_score_NE))
print('R-NE min',np.min(r_score_NE))

print('R-CP mean',np.mean(r_score_CP))
print('R-CP max',np.max(r_score_CP))
print('R-CP min',np.min(r_score_CP))

print('R-EL mean',np.mean(r_score_EL))
print('R-EL max',np.max(r_score_EL))
print('R-EL min',np.min(r_score_EL))

print('R-LN mean',np.mean(r_score_LN))
print('R-LN max',np.max(r_score_LN))
print('R-LN min',np.min(r_score_LN))

print('R-NE mean',np.mean(r_score_NE))
print('R-NE max',np.max(r_score_NE))
print('R-NE min',np.min(r_score_NE))

0
1
2
3
4
5
6
7
8
9
neural net
clf mean 0.960679245283019
clf max 0.9630188679245283
clf min 0.9577358490566038
acc mean 0.9512195121951219
acc max 0.9512195121951219
acc min 0.9512195121951219
CP mean 1.0
CP max 1.0
CP min 1.0
EL mean 1.0
EL max 1.0
EL min 1.0
LN mean 1.0
LN max 1.0
LN min 1.0
NE mean 0.9142857142857143
NE max 0.9142857142857143
NE min 0.9142857142857143
R-NE mean 1.0
R-NE max 1.0
R-NE min 1.0
R-CP mean 0.9230769230769231
R-CP max 0.9230769230769231
R-CP min 0.9230769230769231
R-EL mean 1.0
R-EL max 1.0
R-EL min 1.0
R-LN mean 0.8076923076923077
R-LN max 0.8076923076923077
R-LN min 0.8076923076923077
R-NE mean 1.0
R-NE max 1.0
R-NE min 1.0
