# Moldel Training

In [None]:
import pandas as pd
import pycaret.classification as pc

In [None]:
df = pd.read_csv('../../src/output/alunos_final.csv')
df.head(1)

In [None]:
s = pc.setup(df, target='status', 
          iterative_imputation_iters=10, 
          session_id=123,  
          train_size=0.99, 
          remove_outliers=True, 
          outliers_method='iforest',
          categorical_features=['ccr', 'nome_docente', 'turno'],
          numeric_features=['freq_turma'],
          fold_strategy='stratifiedkfold',
          fold=10,
          fold_shuffle=True,
          n_jobs=4,
          ) # use_gpu=True,

In [None]:
lightgbm = pc.create_model('lightgbm')

In [None]:
pc.evaluate_model(lightgbm)
lightgbm

In [None]:
pc.save_model(lightgbm, '../../src/models/LGBMClassifier')

In [None]:
pc.plot_model(lightgbm, plot='feature', scale=10, save=True)
pc.plot_model(lightgbm, plot='confusion_matrix', plot_kwargs = {'percent' : True}, save=True, scale=10)


values = pc.predict_model(lightgbm)
test = pc.pull()

#test is a pandas dataframe, print it to html

test.to_html('../../src/output/values.html')

In [None]:
# print values to csv

values = values.sort_values(by=['ccr', 'nome_docente'], ascending=True) #, 'ano'

values.to_csv('../../src/output/values.csv', index=False)

# Predição

Caso já tenha executado os passos anteriores, basta executar as celulas abaixo para realizar a predição.

In [None]:
import ipywidgets as widgets
from IPython.display import HTML
import pandas as pd
from pycaret.classification import predict_model, load_model

In [None]:
df = pd.read_csv('../../src/output/alunos_final.csv')
ccr = widgets.Dropdown(options = df['ccr'].sort_values().unique().tolist(), description='CCR:')
nome_docente = widgets.Dropdown(options = df['nome_docente'].sort_values().unique().tolist(), description='Docente:')
frequencia = widgets.SelectionSlider(options = df['freq_turma'].sort_values().unique().tolist(), description='Frequência:', disabled=False, continuous_update=True, orientation='horizontal', readout=True)
turno = widgets.Dropdown(options = df['turno'].sort_values().unique().tolist(), description='Turno:')
widgets.HBox([ccr, nome_docente, turno, frequencia])

In [None]:
# create a json file with the values of the dropdown menus
input_data = {
    'ccr': ccr.value,
    'nome_docente': nome_docente.value,
    'freq_turma': frequencia.value,
    'turno': turno.value,
}

prediction_model = load_model('../../src/models/LGBMClassifier')

input_data = pd.DataFrame([input_data])
    
new_prediction = predict_model(prediction_model, data=input_data)

# rename the column 'prediction_label' to 'status' and put in index 0 using insert and pop method
new_prediction.insert(0, 'status', new_prediction.pop('prediction_label'))

In [None]:
HTML(new_prediction.to_html(index=False))