In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler

from joblib import dump, load
import geopandas as gpd
import pandas as pd
import numpy as np

In [2]:
train = gpd.read_file( "./data/train_data_final.geojson")
test =  gpd.read_file("./data/test_data_final.geojson")

In [3]:
X_train = train.loc[:,'NDVI_2000':'NDVI_2019']
y_train = train['label_0']
X_test = test.loc[:,'NDVI_2000':'NDVI_2019']
y_test = test['label_0']

In [4]:
pipe = Pipeline(
    [
        ('preproc_scaling', StandardScaler()),
        ('rf', RandomForestClassifier())
    
    ])

In [5]:
parameters = {'rf__n_estimators':[100, 200, 300, 400, 500],
              'rf__max_features':np.arange(1, X_train.shape[1]+1),
              'rf__max_depth':[1,10,25,30, 50,75]}

In [6]:
grid_search = GridSearchCV(pipe, 
                        parameters,
                        verbose=1,
                        cv=KFold(n_splits=5, shuffle=True, random_state=123),
                        n_jobs=16,
                        #return_train_score=True,
                        scoring='f1_macro',
                        #refit=False,
                       )


In [7]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 570 candidates, totalling 2850 fits


GridSearchCV(cv=KFold(n_splits=5, random_state=123, shuffle=True),
             estimator=Pipeline(steps=[('preproc_scaling', StandardScaler()),
                                       ('rf', RandomForestClassifier())]),
             n_jobs=16,
             param_grid={'rf__max_depth': [1, 10, 25, 30, 50, 75],
                         'rf__max_features': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19]),
                         'rf__n_estimators': [100, 200, 300, 400, 500]},
             scoring='f1_macro', verbose=1)

In [8]:
y_pred = grid_search.predict(X_test)

In [9]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.72      0.60      0.65       877
           1       0.88      0.91      0.89       900
           2       0.71      0.80      0.75       859

    accuracy                           0.77      2636
   macro avg       0.77      0.77      0.76      2636
weighted avg       0.77      0.77      0.77      2636



In [10]:
dump(grid_search, './models/cv_rf.joblib') 

['./models/cv_rf_2.joblib']