## Create Final Classifier
Create the final classifiers to be used for actually processing the rasters. These will be saved as joblib files.

In [6]:
from ErrorML.ErrorML import *
from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score
from sklearn.model_selection import StratifiedKFold
import joblib

In [7]:
def run_and_get_cm(X_train, y_train, X_test, y_test):
    classifier = create_pipeline(kind='gnb_pca_default')
    classifier.fit(X_train, y_train)
    test_acc = classifier.score(X_test, y_test)
    y_pred = classifier.predict(X_test)
    
    balanced_acc = balanced_accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average=None)
    recall = recall_score(y_test, y_pred, average=None)
    
    #print(recall)
    cm = confusion_matrix(y_test, y_pred)
    return balanced_acc, cm

In [8]:
def get_classifier(filename, output_filename, classes=[-2, -0.5, 0.5, 6.5], categorised=True, focal=False,
                       scale=False, exclude=None, absolute=False):
    df = load_data(filename)
    
    df.loc[df['Type'] == 'Exposed', 'Type'] = 0
    df.loc[df['Type'] == 'Submerged', 'Type'] = 1

    
    X, y = get_processed_data(df, classes=classes, categorised=categorised, focal=focal,
                              scale=scale, exclude=exclude, absolute=absolute)
    X = X.values
    print(X.shape)

    # Do we need to split into train and test again? Or can we use all data as training now
    # Using all data for the moment
    ros = RandomOverSampler()
    X_train, y_train = ros.fit_resample(X, y)

    classifier = create_pipeline(kind='gnb_pca_default')
    classifier.fit(X_train, y_train)
    
    joblib.dump(classifier, output_filename)
    

In [9]:
classes_10 = [-2, -0.5, -0.2, -0.1, -0.05, 0, 0.05, 0.1, 0.2, 0.5, 6.5]
classes_3 = [-2, -0.2, 0.2, 6.5]

In [10]:
get_classifier('2016_ValidationPts_ALL_Updated17May2019_ROBIN.csv',
               'results_newMay2017/classifiers/2016_3class_GNBPCA.jbl',
               classes=classes_3, focal=None, exclude=['Blur'])

['Blur']
(1522, 16)
(1522, 15)
Before BT
(1522, 15)
Index(['MaxSl_Foc', 'MinSl_Foc', 'StdSl_Foc', 'CQ_Mean_Foc', 'Rough40_Foc',
       'Slope', 'Rough40', 'CQ_Mean', 'Pt_Density', 'VEG_TREES', 'DepthRC_JD',
       'Shadow', 'Reflection', 'Type', 'Precsn_m'],
      dtype='object')
After BT
(1522, 15)
Index(['MaxSl_Foc', 'MinSl_Foc', 'StdSl_Foc', 'CQ_Mean_Foc', 'Rough40_Foc',
       'Slope', 'Rough40', 'CQ_Mean', 'Pt_Density', 'VEG_TREES', 'DepthRC_JD',
       'Shadow', 'Reflection', 'Type', 'Precsn_m'],
      dtype='object')
At return
(1522, 15)
(1522, 15)


In [11]:
get_classifier('2016_ValidationPts_ALL_Updated17May2019_ROBIN.csv',
               'results_newMay2017/classifiers/2016_10class_GNBPCA.jbl',
               classes=classes_10, focal=None, exclude='Blur')

Blur
(1522, 16)
(1522, 15)
Before BT
(1522, 15)
Index(['MaxSl_Foc', 'MinSl_Foc', 'StdSl_Foc', 'CQ_Mean_Foc', 'Rough40_Foc',
       'Slope', 'Rough40', 'CQ_Mean', 'Pt_Density', 'VEG_TREES', 'DepthRC_JD',
       'Shadow', 'Reflection', 'Type', 'Precsn_m'],
      dtype='object')
After BT
(1522, 15)
Index(['MaxSl_Foc', 'MinSl_Foc', 'StdSl_Foc', 'CQ_Mean_Foc', 'Rough40_Foc',
       'Slope', 'Rough40', 'CQ_Mean', 'Pt_Density', 'VEG_TREES', 'DepthRC_JD',
       'Shadow', 'Reflection', 'Type', 'Precsn_m'],
      dtype='object')
At return
(1522, 15)
(1522, 15)


In [12]:
get_classifier('2017_ValidationPts_ALL_Update17May2019_ROBIN.csv',
               'results_newMay2017/classifiers/2017_3class_GNBPCA.jbl',
               classes=classes_3, focal=None, exclude='Blur')

Blur
(2091, 16)
(2091, 15)
Before BT
(2091, 15)
Index(['MaxSl_Foc', 'MinSl_Foc', 'StdSl_Foc', 'CQ_Mean_Foc', 'Rough40_Foc',
       'Slope', 'Rough40', 'CQ_Mean', 'Pt_Density', 'VEG_TREES', 'DepthRC_JD',
       'Shadow', 'Reflection', 'Type', 'Precsn_m'],
      dtype='object')
After BT
(2091, 15)
Index(['MaxSl_Foc', 'MinSl_Foc', 'StdSl_Foc', 'CQ_Mean_Foc', 'Rough40_Foc',
       'Slope', 'Rough40', 'CQ_Mean', 'Pt_Density', 'VEG_TREES', 'DepthRC_JD',
       'Shadow', 'Reflection', 'Type', 'Precsn_m'],
      dtype='object')
At return
(2091, 15)
(2091, 15)


In [13]:
get_classifier('2017_ValidationPts_ALL_Update17May2019_ROBIN.csv',
               'results_newMay2017/classifiers/2017_10class_GNBPCA.jbl',
               classes=classes_10, focal=None, exclude='Blur')

Blur
(2091, 16)
(2091, 15)
Before BT
(2091, 15)
Index(['MaxSl_Foc', 'MinSl_Foc', 'StdSl_Foc', 'CQ_Mean_Foc', 'Rough40_Foc',
       'Slope', 'Rough40', 'CQ_Mean', 'Pt_Density', 'VEG_TREES', 'DepthRC_JD',
       'Shadow', 'Reflection', 'Type', 'Precsn_m'],
      dtype='object')
After BT
(2091, 15)
Index(['MaxSl_Foc', 'MinSl_Foc', 'StdSl_Foc', 'CQ_Mean_Foc', 'Rough40_Foc',
       'Slope', 'Rough40', 'CQ_Mean', 'Pt_Density', 'VEG_TREES', 'DepthRC_JD',
       'Shadow', 'Reflection', 'Type', 'Precsn_m'],
      dtype='object')
At return
(2091, 15)
(2091, 15)
