# Question Generator - Implantação

Utiliza um transformer T5 pré treinado em português e disponibilizado pelo [huggingfaces](https://platiagro.github.io/tutorials/).<br>

### **Em caso de dúvidas, consulte os [tutoriais da PlatIAgro](https://platiagro.github.io/tutorials/).**

## Declaração de Classe para Predições em Tempo Real

A tarefa de implantação cria um serviço REST para predições em tempo-real.<br>
Para isso você deve criar uma classe `Model` que implementa o método `predict`.

In [2]:
%%writefile Model.py
import joblib
import pandas as pd
import numpy as np
from typing import List
from expander import DocExpander
from aux_functions import build_df_result


class Model:
    
    def __init__(self):
        self.loaded = False
        
        
    def load(self):
        
        artifacts = joblib.load("/tmp/data/qgenerator.joblib")
        self.model = artifacts["model"]
        self.expand_context = artifacts["expand_context"]
        self.infer_num_gen_sentences = artifacts["infer_num_gen_sentences"]
        self.column_context = artifacts["column_context"]
        self.column_question = artifacts["column_question"]
        self.column_doc_id = artifacts["column_doc_id"]
        self.loaded = True

    
    def expand(self,df):
        if self.expand_context:
            exp = DocExpander() 
            df_final = exp.expand_sql(df,context_column_name=self.column_context,questions_column_name = self.column_question)
        
        return df_final

    def predict(self, X, feature_names, meta=None):
        
        if not self.loaded:
            self.load()
            
        feature_names_pipeline = [self.column_doc_id, self.column_context]
        
        if feature_names != feature_names_pipeline:
            raise ValueError(f'feature_names deve ser {feature_names_pipeline}')
        
        
        df_input = pd.DataFrame(X,columns=feature_names)
        contexts = df_input[self.column_context].to_numpy()
        gen_questions_dict = self.model.forward(contexts=contexts, num_gen_sentences=self.infer_num_gen_sentences)
        df_result = build_df_result(df_input=df_input,
                                    gen_questions_dict=gen_questions_dict,
                                    column_doc_id=self.column_doc_id,
                                    column_question=self.column_question)
        
        df_result = self.expand(df_result)
            
        #return df_result.to_numpy()
        return df_result

Overwriting Model.py


In [1]:
# import pandas as pd
# df = pd.read_csv("/tmp/data/fabc_reports-3.csv")
# n_lines = 10
# contexts = df['context'][:n_lines]
# indexes = df.index[:n_lines]

# df_small = pd.DataFrame({'doc_id':indexes,'context':contexts})
# X = df_small.to_numpy()

In [4]:
# from Model import Model
# model = Model()
# result = model.predict(X,['doc_id','context'])
# result