# Evaluación sobre verdad de campo original

In [None]:
import os
import json
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
from glob import glob
from sqlite3 import connect
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier

from utilities import *

In [None]:
sqlite_files = glob('../data/selection/*.sqlite')

data = pd.DataFrame()

for sf in sqlite_files:
    cnx = connect(sf)
    df = pd.read_sql_query("SELECT * FROM output", cnx)
    data = pd.concat([data, df], ignore_index=True)

In [None]:
data.head()

In [None]:
model = joblib.load('../model/randomforest_iter_01.joblib')

In [None]:
X = data.filter(regex='band_').to_numpy()
y = data['id'].to_numpy()

In [None]:
y_hat = model.predict_proba(X)

In [None]:
with open(f'../predictions/randomforest_probas_train.npy', 'wb') as f:
    np.save(f, y_hat)

In [None]:
max_probas = y_hat.max(axis=1)
classes = [model.classes_[a] for a in y_hat.argmax(axis=1)]

In [None]:
data = data.assign(pred_class=classes, pred_score=max_probas)

In [None]:
true_preds = data[data.id==data.pred_class]
min_score = true_preds.pred_score.min()

In [None]:
with open(f'../predictions/randomforest_minscore_train.npy', 'wb') as f:
    np.save(f, min_score)

In [None]:
print(f'{len(true_preds)}/{len(data)} pixeles fueron correctamente predichos.')
print(f'El mínimo score de predicción fue: {min_score}')

In [None]:
cutt_off = []
samples = []
for i in np.linspace(0,1,20):
    data['mask'] = data.pred_score.apply(lambda x: True if x>1-i else False)
    cantidades = data[(data.id==data.pred_class) & (data['mask']==True)].shape[0]
    cutt_off.append(i)
    samples.append(cantidades)


sns.set_style('whitegrid')
fig, ax = plt.subplots(figsize=(6,4))
sns.lineplot(x=cutt_off, y=samples)
plt.xlabel('Cut-off')
plt.ylabel('Samples')
plt.title('Cut-off vs. Samples', fontsize=18)
plt.savefig('../predictions/randomforest_train_cutoff.png')