In [2]:
import os
import time
import json
import pandas as pd
from dotenv import load_dotenv
import google.generativeai as genai
import csv

# Load environment variables
load_dotenv()
gemini_api = os.getenv("GEMINI_API_KEY")
if not gemini_api:
    raise ValueError("Chave da API GEMINI_API_KEY não encontrada no arquivo .env.")

# Configure Gemini
genai.configure(api_key=gemini_api)

def extract_json_from_response(response_text):
    """Tenta extrair o JSON da resposta removendo formatação markdown."""
    try:
        clean_text = response_text.strip().replace('```json', '').replace('```', '')
        return json.loads(clean_text)
    except json.JSONDecodeError:
        start = response_text.find('{')
        end = response_text.rfind('}') + 1
        if start != -1 and end != 0:
            return json.loads(response_text[start:end])
        raise

def avaliar_resumo_com_prompt(prompt_text, summary_text):
    """
    Avalia o resumo usando a API do Gemini com retentativas e tratamento de erros.
    """
    max_retries = 3
    for attempt in range(max_retries):
        try:
            full_prompt = (
                f"Consider the following prompt that was given to the students:\n"
                f"{prompt_text}\n\n"
                f"Now, consider the following summary written by the student:\n"
                f"{summary_text}\n\n"
                "Please evaluate the summary based on the task defined in the prompt and return a JSON response "
                "with the keys 'content' and 'wording'. The score for 'content' should reflect the quality of "
                "representing the main idea and details, while the score for 'wording' should reflect the clarity, "
                "accuracy, and fluency of the text. The grades go from 0 to 10.\n"
                "Response MUST be in JSON format only, with no additional text. Example: {{\"content\": 5, \"wording\": 5}}"
            )

            model = genai.GenerativeModel('gemma-3-27b-it')
            response = model.generate_content(
                full_prompt,
                generation_config=genai.types.GenerationConfig(
                    temperature=0,
                    max_output_tokens=200
                )
            )
            
            resposta_texto = response.text.strip()
            notas = extract_json_from_response(resposta_texto)
            
            content = notas.get("content")
            wording = notas.get("wording")
            
            if content is None or wording is None:
                raise ValueError(f"Resposta incompleta: {resposta_texto}")
            if not isinstance(content, (int, float)) or not isinstance(wording, (int, float)):
                raise ValueError(f"Formato numérico inválido: {resposta_texto}")
            
            return float(content), float(wording)
            
        except json.JSONDecodeError as e:
            print(f"Tentativa {attempt + 1}: Erro ao decodificar JSON: {e}")
            print("Resposta recebida:", resposta_texto)
            time.sleep(2 ** attempt)
        except ValueError as e:
            print(f"Tentativa {attempt + 1}: {e}")
            time.sleep(2 ** attempt)
        except Exception as e:
            print(f"Tentativa {attempt + 1}: Erro inesperado: {str(e)}")
            time.sleep(2 ** attempt)
    
    print(f"Falha após {max_retries} tentativas para: {summary_text[:50]}...")
    return None, None

# Ler e processar dados
prompts_test_df = pd.read_csv("prompts_train.csv")
summaries_test_df = pd.read_csv("summaries_train.csv", quoting=csv.QUOTE_MINIMAL)
merged_test_df = summaries_test_df.merge(prompts_test_df, on="prompt_id", how="left")

content_scores = []
wording_scores = []

print("Avaliando todos registros do conjunto de teste (prompt + resumo):")

# Processamento síncrono
for i, row in merged_test_df.iterrows():
    content, wording = avaliar_resumo_com_prompt(row['prompt_text'], row['text'])
    content_scores.append(content)
    wording_scores.append(wording)
    print(f"Processado {i+1}/{len(merged_test_df)} - Content: {content}, Wording: {wording}")
    time.sleep(1)  # Adiciona um delay entre requisições

# Criar DataFrame de submissão
submission = pd.DataFrame({
    'student_id': merged_test_df['student_id'],
    'prompt_id': merged_test_df['prompt_id'],
    'content': content_scores,
    'wording': wording_scores
})

submission.to_csv("submission_test.csv", index=False)
print("\nArquivo submission_test.csv gerado com sucesso!")

Avaliando todos registros do conjunto de teste (prompt + resumo):
Processado 1/994 - Content: 6.0, Wording: 5.0
Processado 2/994 - Content: 4.0, Wording: 3.0
Processado 3/994 - Content: 8.0, Wording: 6.0
Processado 4/994 - Content: 3.0, Wording: 2.0
Processado 5/994 - Content: 7.0, Wording: 6.0
Processado 6/994 - Content: 8.0, Wording: 7.0
Processado 7/994 - Content: 6.0, Wording: 4.0
Processado 8/994 - Content: 4.0, Wording: 3.0
Processado 9/994 - Content: 6.0, Wording: 7.0
Processado 10/994 - Content: 6.0, Wording: 4.0
Processado 11/994 - Content: 1.0, Wording: 2.0
Processado 12/994 - Content: 8.0, Wording: 6.0
Processado 13/994 - Content: 6.0, Wording: 4.0
Processado 14/994 - Content: 8.0, Wording: 7.0
Processado 15/994 - Content: 7.0, Wording: 6.0
Processado 16/994 - Content: 7.0, Wording: 6.0
Processado 17/994 - Content: 6.0, Wording: 4.0
Processado 18/994 - Content: 7.0, Wording: 6.0
Processado 19/994 - Content: 7.0, Wording: 4.0
Processado 20/994 - Content: 7.0, Wording: 6.0
Pro

In [10]:
import pprint
for model in genai.list_models():
    pprint.pprint(model)

Model(name='models/chat-bison-001',
      base_model_id='',
      version='001',
      display_name='PaLM 2 Chat (Legacy)',
      description='A legacy text-only model optimized for chat conversations',
      input_token_limit=4096,
      output_token_limit=1024,
      supported_generation_methods=['generateMessage', 'countMessageTokens'],
      temperature=0.25,
      max_temperature=None,
      top_p=0.95,
      top_k=40)
Model(name='models/text-bison-001',
      base_model_id='',
      version='001',
      display_name='PaLM 2 (Legacy)',
      description='A legacy model that understands text and generates text as an output',
      input_token_limit=8196,
      output_token_limit=1024,
      supported_generation_methods=['generateText', 'countTextTokens', 'createTunedTextModel'],
      temperature=0.7,
      max_temperature=None,
      top_p=0.95,
      top_k=40)
Model(name='models/embedding-gecko-001',
      base_model_id='',
      version='001',
      display_name='Embedding Gecko