# Modelo con sklearn

In [None]:
import os
import json
import joblib
import numpy as np
import pandas as pd
from glob import glob
from sqlite3 import connect
from tqdm.notebook import tqdm
from sklearn.ensemble import RandomForestClassifier

from utilities import *

## Iteración #01

### Armado de dataset de entrenamiento

Levantamos archivos `.sqlite` de ambos _tiles_ y los convertimos en un dataframe.

In [None]:
sqlite_files = glob('../data/selection/*.sqlite')

data = pd.DataFrame()

for sf in sqlite_files:
    cnx = connect(sf)
    df = pd.read_sql_query("SELECT * FROM output", cnx)
    data = pd.concat([data, df], ignore_index=True)

In [None]:
data.head()

In [None]:
# cantidad de pixeles por cultivo
# para el primer entrenamiento
(
    data
    .cultivo
    .value_counts()
    .to_frame()
    .rename(columns={'cultivo':'pixeles'})
    .T
)

### Entrenamiento del modelo

In [None]:
X = data.filter(regex='band_').to_numpy()
y = data['id'].to_numpy()

model = RandomForestClassifier(
    random_state=20220707,
    n_estimators=500,
    max_depth=10,
    n_jobs=-1,
    verbose=1
)

os.makedirs('../model', exist_ok=True)
with open('../model/randomforest_parameters.json','w') as f:
    json.dump(model.get_params(), f, ensure_ascii=False, indent=4)

model.fit(X, y)

with open('../model/randomforest_feature_impotances.txt','w') as f:
        _=f.writelines([f'{i}\n' for i in model.feature_importances_.tolist()])

output_file = '../model/randomforest_iter_01.joblib'
joblib.dump(model,'../model/randomforest_iter_01.joblib')

### Predicciones

In [None]:
os.makedirs('../predictions/', exist_ok=True)
tif_files = glob('../data/concat/*')

for tile in tif_files:
    width, height, transform = metadata_from_tile(tile)
    windows = sliding_windows(100, 100, width, height)
    preds = np.empty((10000,7))
    windows =list(windows)
    for window,_ in tqdm(windows, total=len(windows)):
        img_df = create_windowed_dataset(tile, window)
        res = model.predict_proba(img_df).astype(np.float64)
        preds = np.append(preds, res, axis = 0)
    with open(f'../predictions/randomforest_{tile}.npy', 'wb') as f:
        np.save(f, preds)