In [2]:
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import classification_report

from joblib import load

import geopandas as gpd

import xgboost as xgb

import numpy as np
import pandas as pd

In [3]:
rf = load('./models/random_forest.joblib') 
lr = load('./models/logistic_reg.joblib') 
pipe_xgb = load('./models/xgb.joblib') 

In [4]:
train = gpd.read_file( "./data/train_data_final.geojson")
test =  gpd.read_file("./data/test_data_final.geojson")

In [5]:
years_cols = ['year_'+str(i) for i in range(0,20)]

In [6]:
cols_to_update_train = years_cols + train.columns[20:].tolist()
train.columns = cols_to_update_train

cols_to_update_test = years_cols + test.columns[20:].tolist()
test.columns = cols_to_update_test

In [7]:
X = train.loc[:,'year_0':'year_19']
y = train['label']
X_test = test.loc[:,'year_0':'year_19']

In [13]:
print("Classification report de RF")
print(classification_report(test['label'],rf.predict(X_test)))
print("\n")
print("Classification report de reg logística")
print(classification_report(test['label'],lr.predict(X_test)))
print("\n")
print("Classification report de XGBoost")
print(classification_report(test['label'],pipe_xgb.predict(X_test)))
print("\n")

Classification report de RF
              precision    recall  f1-score   support

           0       0.72      0.59      0.65       877
           1       0.88      0.91      0.89       900
           2       0.70      0.81      0.75       859

    accuracy                           0.77      2636
   macro avg       0.77      0.77      0.76      2636
weighted avg       0.77      0.77      0.76      2636



Classification report de reg logística
              precision    recall  f1-score   support

           0       0.56      0.49      0.52       877
           1       0.77      0.77      0.77       900
           2       0.67      0.75      0.70       859

    accuracy                           0.67      2636
   macro avg       0.67      0.67      0.67      2636
weighted avg       0.67      0.67      0.67      2636



Classification report de XGBoost
              precision    recall  f1-score   support

           0       0.67      0.60      0.63       877
           1       0.86  

In [14]:
test['predictions_rf'] = rf.predict(X_test)
test['predictions_lr'] = lr.predict(X_test)
test['predictions_xgb'] = pipe_xgb.predict(X_test)

In [15]:
def exporto_clasif(modelo, nombre):
    
    report_data = classification_report(test['label'],modelo.predict(X_test), output_dict=True)
    
    dataframe = pd.DataFrame.from_dict(report_data).reset_index()
    dataframe = dataframe.rename(columns={"index": "metric"})
    
    dataframe.to_csv('data/preds/classification_report_'+nombre+'.csv', index = False)

In [17]:
exporto_clasif(rf, 'random_forest')
exporto_clasif(lr, 'reg_log')
exporto_clasif(pipe_xgb, 'xgb')

In [16]:
test.to_file("data/test_predictions.geojson", driver='GeoJSON')