In [1]:
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, KFold
from joblib import dump, load
import geopandas as gpd
import xgboost as xgb
import numpy as np
import pandas as pd

In [2]:
pipe_rf = load('./models/cv_rf.joblib') 
pipe_lr = load('./models/cv_lr.joblib') 
pipe_xgb = load('./models/cv_xgb.joblib') 

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [3]:
train = gpd.read_file( "./data/train_data_final.geojson")
test =  gpd.read_file("./data/test_data_final.geojson")

In [4]:
test.columns

Index(['NDVI_2000', 'NDVI_2001', 'NDVI_2002', 'NDVI_2003', 'NDVI_2004',
       'NDVI_2005', 'NDVI_2006', 'NDVI_2007', 'NDVI_2008', 'NDVI_2009',
       'NDVI_2010', 'NDVI_2011', 'NDVI_2012', 'NDVI_2013', 'NDVI_2014',
       'NDVI_2015', 'NDVI_2016', 'NDVI_2017', 'NDVI_2018', 'NDVI_2019',
       'NDVI_2020', 'id', 'label', 'geometry'],
      dtype='object')

In [5]:
X = train.loc[:,'NDVI_2000':'NDVI_2019']
y = train['label']
X_test = test.loc[:,'NDVI_2000':'NDVI_2019']

In [6]:
best_model_rf = pipe_rf.best_estimator_
best_model_lr = pipe_lr.best_estimator_
best_model_xgb = pipe_xgb.best_estimator_

In [7]:
print("Classification report de RF")
print(classification_report(test['label'],best_model_rf.predict(X_test)))
print("\n")
print("Classification report de reg logística")
print(classification_report(test['label'],best_model_lr.predict(X_test)))
print("\n")
print("Classification report de XGBoost")
#print(classification_report(test['label.x'],best_model_xgb.predict(X_test)))
#print("\n")

Classification report de RF
              precision    recall  f1-score   support

           0       0.72      0.59      0.65       877
           1       0.88      0.91      0.89       900
           2       0.70      0.81      0.75       859

    accuracy                           0.77      2636
   macro avg       0.77      0.77      0.76      2636
weighted avg       0.77      0.77      0.76      2636



Classification report de reg logística
              precision    recall  f1-score   support

           0       0.56      0.49      0.52       877
           1       0.77      0.77      0.77       900
           2       0.67      0.75      0.70       859

    accuracy                           0.67      2636
   macro avg       0.67      0.67      0.67      2636
weighted avg       0.67      0.67      0.67      2636



Classification report de XGBoost


In [9]:
test['predictions_rf'] = best_model_rf.predict(X_test)
test['predictions_lr'] = best_model_lr.predict(X_test)
#test['predictions_xgb'] = best_model_xgb.predict(X_test)

In [10]:
test.to_file("data/test_predictions.geojson", driver='GeoJSON')
#Para GEE en .shp
#test.to_file("data/preds/test_predictions.shp")


In [8]:
def exporto_clasif(modelo, nombre):
    report_data = classification_report(test['label'],modelo.predict(X_test), output_dict=True)
    dataframe = pd.DataFrame.from_dict(report_data).reset_index()
    dataframe = dataframe.rename(columns={"index": "metric"})
    
    dataframe.to_csv('data/preds/classification_report_'+nombre+'.csv', index = False)

In [9]:
exporto_clasif(best_model_rf, 'random_forest')
exporto_clasif(best_model_lr, 'reg_log')
#exporto_clasif(best_model_xgb, 'xgb')