In [2]:
import json

# Cargar los datos
with open('./task3.json', 'r') as f:
    data = json.load(f)

# Obtener solo los textos de los drafts
test_texts = [draft['content'] for draft in data['drafts']]

# Si necesitas los IDs también
draft_ids = [draft['id'] for draft in data['drafts']]

# Imprimir información para verificar
print(f"Número de drafts cargados: {len(test_texts)}")
print("\nPrimer draft ID:", draft_ids[0])
print("Primer texto:", test_texts[0][:100], "...")  # Mostrar los primeros 100 caracteres

Número de drafts cargados: 10

Primer draft ID: S/2018/72
Primer texto: United Nations S/2018/72 – Security Council, Distr.: General – 30 January 2018 – Original: English – ...


In [3]:
# If use Together API
from together import Together

your_model_name = 'meta-llama/Llama-3.3-70B-Instruct-Turbo-Free'
your_api_key = '574bf95f6b33074e60f284c2c6316e72c5ef8226ce8ce4650dacca4d51ce1050'
client = Together(api_key=your_api_key)

In [4]:
from tqdm import tqdm
import pandas as pd
import random

def classify_texts(texts):
    results = []
    max_tokens = 7000  # Dejamos margen para el prompt y la respuesta
    
    for text in tqdm(texts):
        # Truncamos el texto si es muy largo
        if len(text.split()) > max_tokens:
            text = ' '.join(text.split()[:max_tokens]) + '...'
            
        user_prompt = f"""
        The provided document is a United Nations Security Council's draft resolution. Predict whether the draft resolution will be adopted or not. Answer with 'yes' (1) or 'no' (0) without any explanation.

        Text: "{text}"
        Answer:
        """
        
        try:
            response = client.chat.completions.create(
                model=your_model_name,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": user_prompt}
                ],
                max_tokens=5,
                temperature=0.0
            )
            result = response.choices[0].message.content.strip().lower()
            
            if result.startswith("yes") or result == "1":
                results.append(1)
            elif result.startswith("no") or result == "0":
                results.append(0)
            else:
                results.append(random.choice([0, 1]))
        except Exception as e:
            print(f"Error procesando texto: {e}")
            results.append(random.choice([0, 1]))  # Fallback en caso de error
            
    return results

pred = classify_texts(test_texts)

100%|███████████████████████████████████████████| 10/10 [01:16<00:00,  7.65s/it]


In [5]:
# calculate metrics
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_recall_fscore_support
from sklearn.metrics import roc_auc_score, average_precision_score, matthews_corrcoef
from imblearn.metrics import geometric_mean_score

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.metrics import roc_auc_score, balanced_accuracy_score, precision_recall_curve, auc

def calculate_metrics(pred, labels):
    # swap 0 and 1
    pred = [1 - x for x in pred]
    labels = [1 - x for x in labels]
    acc = accuracy_score(labels, pred)
    try:
        roc_auc = roc_auc_score(labels, pred)
    except ValueError:
        roc_auc = 0
    balanced_acc = balanced_accuracy_score(labels, pred)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, pred, average='binary')
    # pr_auc = average_precision_score(labels, pred)
    precision, recall, _ = precision_recall_curve(labels, pred)
    pr_auc = auc(recall, precision)
    mcc = matthews_corrcoef(labels, pred)
    g_mean = geometric_mean_score(labels, pred)
    tn, fp, fn, tp = confusion_matrix(labels, pred).ravel()
    specificity = tn / (tn + fp)

    print(f'Accuracy: {acc}')
    print(f'AUC: {roc_auc}')
    print(f'Balanced Accuracy: {balanced_acc}')
    print(f'Precision: {prec}')
    print(f'Recall: {rec}')
    print(f'F1: {f1}')
    print(f'PR AUC: {pr_auc}')
    print(f'MCC: {mcc}')
    print(f'G-Mean: {g_mean}')
    print(f'Specificity: {specificity}')

    print('Accuracy AUC Balanced_Acc Precision Recall F1 PR_AUC MCC G-Mean Specificity')
    print(f'{acc:.4f} {roc_auc:.4f} {balanced_acc:.4f} {prec:.4f} {rec:.4f} {f1:.4f} {pr_auc:.4f} {mcc:.4f} {g_mean:.4f} {specificity:.4f}')



In [8]:
# Calcular similitudes TF-IDF
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tf_cosine_scores = []
for i in range(len(test_texts)-1):  # Comparar cada texto con el siguiente
    tfidf = TfidfVectorizer().fit_transform([test_texts[i], test_texts[i+1]])
    score = cosine_similarity(tfidf[0], tfidf[1])[0][0]
    tf_cosine_scores.append(score)

print('Average Cosine Similarity (TF-IDF):', sum(tf_cosine_scores) / len(tf_cosine_scores))

Average Cosine Similarity (TF-IDF): 0.5529949286626793
