In [None]:
import os
import time
import json
import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI
import csv
from concurrent.futures import ThreadPoolExecutor

# Carrega as variáveis do arquivo .env
load_dotenv()
deepseek_api = os.getenv("DEEPSEEK_API_KEY")
if not deepseek_api:
    raise ValueError("Chave da API DEEPSEEK_API_KEY não encontrada no arquivo .env.")

client = OpenAI(api_key=deepseek_api, base_url="https://api.deepseek.com")

def avaliar_resumo_com_prompt(prompt_text, summary_text):
    """
    Avalia o resumo usando a API da DeepSeek com retentativas e tratamento de erros.
    """
    max_retries = 3
    for attempt in range(max_retries):
        try:
            full_prompt = (
                "Considere o seguinte prompt que foi dado aos alunos:\n"
                f"{prompt_text}\n\n"
                "Agora, considere o seguinte resumo escrito pelo aluno:\n"
                f"{summary_text}\n\n"
                "Por favor, avalie o resumo com base na tarefa definida no prompt, e retorne uma resposta em JSON com as chaves 'content' e 'wording'.\n"
                "A nota para 'content' deve refletir a qualidade de representação da ideia principal e dos detalhes, e a nota para 'wording' deve refletir a clareza, precisão e fluência do texto.\n\n"
                "Resposta APENAS no formato JSON, sem nenhum texto adicional. Exemplo: {\"content\": 5, \"wording\": 5}"
            )
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[{"role": "user", "content": full_prompt}],
                temperature=0,
                max_tokens=200,
                response_format={"type": "json_object"}
            )
            resposta_texto = response.choices[0].message.content.strip()
            
            notas = json.loads(resposta_texto)
            content = notas.get("content")
            wording = notas.get("wording")
            
            if content is None or wording is None:
                raise ValueError("Resposta não contém 'content' ou 'wording'")
            if not isinstance(content, (int, float)) or not isinstance(wording, (int, float)):
                raise ValueError("Notas não são numéricas")
            
            return content, wording
        except json.JSONDecodeError as e:
            print(f"Tentativa {attempt + 1}: Erro ao decodificar JSON: {e}")
            print("Resposta recebida:", resposta_texto)
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
            else:
                return None, None
        except Exception as e:
            print(f"Tentativa {attempt + 1}: Erro ao avaliar resumo: {e}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
            else:
                return None, None
    return None, None

# Ler os arquivos de teste
prompts_test_df = pd.read_csv("prompts_train.csv")
summaries_test_df = pd.read_csv("summaries_train.csv", quoting=csv.QUOTE_MINIMAL)

# Merge dos dados
merged_test_df = summaries_test_df.merge(prompts_test_df, on="prompt_id", how="left")

# Preparar dados para processamento
prompt_texts = merged_test_df['prompt_text'].tolist()
summary_texts = merged_test_df['text'].tolist()
student_ids = merged_test_df['student_id'].tolist()
prompt_ids = merged_test_df['prompt_id'].tolist()

content_scores = []
wording_scores = []

print("Avaliando todos registros do conjunto de teste (prompt + resumo):")

# Processar em paralelo com ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=5) as executor:
    args = list(zip(prompt_texts, summary_texts))
    results = executor.map(lambda p: avaliar_resumo_com_prompt(*p), args)
    
    for content, wording in results:
        content_scores.append(content)
        wording_scores.append(wording)
        print(f"Processado - Content: {content}, Wording: {wording}")

# Criar DataFrame de submissão
submission = pd.DataFrame({
    'student_id': student_ids,
    'prompt_id': prompt_ids,
    'content': content_scores,
    'wording': wording_scores
})

submission.to_csv("submission_test.csv", index=False)
print("\nArquivo submission_test.csv gerado com sucesso!")


Avaliando todos registros do conjunto de teste (prompt + resumo):
Processado - Content: 4, Wording: 3
Processado - Content: 3, Wording: 4
Processado - Content: 4, Wording: 3
Processado - Content: 3, Wording: 2
Processado - Content: 4, Wording: 3
Processado - Content: 4, Wording: 3
Processado - Content: 3, Wording: 3
Processado - Content: 3, Wording: 2
Processado - Content: 3, Wording: 4
Processado - Content: 4, Wording: 3
Processado - Content: 1, Wording: 1
Processado - Content: 4, Wording: 3
Processado - Content: 3, Wording: 4
Processado - Content: 4, Wording: 5
Processado - Content: 4, Wording: 4
Processado - Content: 4, Wording: 3
Processado - Content: 3, Wording: 2
Processado - Content: 4, Wording: 3
Processado - Content: 4, Wording: 3
Processado - Content: 4, Wording: 3
Processado - Content: 3, Wording: 2
Processado - Content: 3, Wording: 2
Processado - Content: 3, Wording: 2
Processado - Content: 4, Wording: 4
Processado - Content: 4, Wording: 3
Processado - Content: 4, Wording: 