In [2]:
import re
import numpy as np
import pandas as pd
from transformers import BertTokenizerFast
import sys
sys.path.append("../../src/model_DNN")
from deep_neural_net import DeepNeuralNetwork

# Carrega o tokenizer customizado
tokenizer = BertTokenizerFast.from_pretrained("../../src/data_processor/./custom_tokenizer")

# Função para limpeza do texto
def clean_text(text: str) -> str:
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    return text

# Função para processar textos
def process_texts(texts):
    tokenized = tokenizer(
        texts, padding="max_length", truncation=True, max_length=128
    )["input_ids"]
    return tokenized

# Carrega o modelo treinado
dnn = DeepNeuralNetwork()
dnn = dnn.load("../../models/modelo_dnn.pkl")

# Classe mock para compatibilidade com o modelo
class MockDataset:
    def __init__(self, X):
        self.X = X

def predict_from_txt(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        lines = [line.strip() for line in f.readlines() if line.strip()]
    
    valid_lines = [line for line in lines if line.lower() != "id;text"]
    
    ids, texts = zip(*[line.split(";", 1) for line in valid_lines if ";" in line])
    
    processed_texts = process_texts([clean_text(t) for t in texts])
    mock_dataset = MockDataset(np.array(processed_texts))
    predictions = dnn.predict(mock_dataset)
    
    df = pd.DataFrame()
    df["ID"] = ids
    df["Label"] = ["AI" if p[0] >= 0.5 else "Human" for p in predictions]
    df["Confiança"] = [p[0] for p in predictions]
    
    output_file = "results-s1.csv"
    df.to_csv(output_file, index=False, sep=";")
    print(f"Previsões salvas em {output_file}")

input_file = "../../datasets/dataset2_inputs.csv"
predict_from_txt(input_file)



Modelo carregado de ../../models/modelo_dnn.pkl
Previsões salvas em results-s1.csv
