In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler
from joblib import dump, load
import geopandas as gpd
import xgboost as xgb
import pandas as pd

In [2]:
train = gpd.read_file( "./data/train_data_final.geojson")
test =  gpd.read_file("./data/test_data_final.geojson")

In [3]:
X_train = train.loc[:,'NDVI_2000':'NDVI_2019']
y_train = train['label_0']
X_test = test.loc[:,'NDVI_2000':'NDVI_2019']
y_test = test['label_0']

In [13]:
pipe = Pipeline(
    [
        ('preproc_scaling', StandardScaler()),
        ('xgboost', xgb.XGBClassifier(n_jobs=16,
                                      early_stopping_rounds=100))
    
    ])

In [14]:
params = {"xgboost__objective":["multi:softprob"],
          "xgboost__learning_rate": [0.0001, 0.001, 0.01, 0.1],
          "xgboost__max_depth": [2, 5, 10],
          "xgboost__n_estimators":[10000]} 

In [15]:
grid_search = GridSearchCV(pipe,
                   params,
                   verbose=2, 
                   n_jobs=16, 
                   cv=KFold(n_splits=5, shuffle=True, random_state=123),
                  )

In [16]:
%%time
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits



KeyboardInterrupt



In [None]:
y_pred = grid_search.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
dump(grid_search, './models/cv_xgb.joblib') 