In [1]:
import json
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from transformers import pipeline
import os
import constants
from data_extraction import get_raw_dataset

def save_predictions(ids, predictions, output_file='./content/Result_llm.jsonl'):

    # Ensure output directory exists
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    
    results = []
    for id_, pred in zip(ids, predictions):
        results.append({"id": id_, "label": int(pred)})
    
    with open(output_file, 'w', encoding='utf-8') as f:
        for item in results:
            f.write(json.dumps(item) + '\n')
    
    print(f"Results saved to {output_file}")a

def zero_shot_classification():
    
    print("Loading test data...")
    X_test, ids_test = get_raw_dataset(mode='test')
    
    print("Initializing zero-shot classifier...")
    # Use a small model for zero-shot classification
    classifier = pipeline(
        "zero-shot-classification", 
        model="facebook/bart-large-mnli",
        device=0  # Use GPU (0), can be changed to -1 to use CPU if memory issues occur
    )
    
    predictions = []
    
    print("Processing test data...")
    # Use tqdm to display progress bar
    for text in tqdm(X_test):
        # Limit text length to avoid token limits
        truncated_text = text[:500]
        
        # Use zero-shot classification
        output = classifier(
            truncated_text,
            candidate_labels=["human-written", "machine-generated"],
        )
        
        # Map results to expected labels (0 for human, 1 for machine)
        pred_label = 1 if output["labels"][0] == "machine-generated" else 0
        predictions.append(pred_label)
    
    # Save results
    save_predictions(ids_test, predictions)
    

if __name__ == "__main__":
    print("Starting Zero-Shot Classification approach for LLM text detection")
    zero_shot_classification()

Starting Zero-Shot Classification approach for LLM text detection
Loading test data...
Initializing zero-shot classifier...



Device set to use cpu


Processing test data...


100%|██████████| 34272/34272 [24:25:56<00:00,  2.57s/it]        


Results saved to ./content/Result_llm.jsonl


In [17]:
import json
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from transformers import pipeline
import os
from data_extraction import get_raw_dataset

def save_predictions(ids, predictions, output_file='./content/Result_llm.jsonl'):
    """Save predictions to a JSONL file."""
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    
    results = []
    for id_, pred in zip(ids, predictions):
        results.append({"id": id_, "label": int(pred)})
    
    with open(output_file, 'w', encoding='utf-8') as f:
        for item in results:
            f.write(json.dumps(item) + '\n')
    
    print(f"Results saved to {output_file}")

def save_numpy_arrays(predictions, labels=None, prefix="LLM"):
    """Save predictions and labels as numpy arrays."""
    if not os.path.exists('predictions'):
        os.makedirs('predictions')
    
    # Save predictions
    pred_file = f'predictions/{prefix}_y_test_pred.npy'
    np.save(pred_file, np.array(predictions))
    print(f"Predictions saved to {pred_file}")
    
    # Save labels if available
    if labels is not None:
        label_file = f'predictions/{prefix}_y_test.npy'
        np.save(label_file, np.array(labels))
        print(f"Ground truth labels saved to {label_file}")

def zero_shot_classify(texts, device=0):
    """Classify texts using zero-shot classification."""
    # Initialize classifier
    classifier = pipeline(
        "zero-shot-classification", 
        model="facebook/bart-large-mnli",
        device=device  # Can be set to -1 for CPU
    )
    
    predictions = []
    for text in tqdm(texts):
        # Truncate text to avoid token limits
        truncated_text = text[:500]
        
        # Classify
        output = classifier(
            truncated_text,
            candidate_labels=["human-written", "machine-generated"],
        )
        
        # Map to binary labels (0 = human, 1 = machine)
        pred_label = 1 if output["labels"][0] == "machine-generated" else 0
        predictions.append(pred_label)
    
    return predictions

def evaluate_performance(predictions, labels):
    """Calculate and print evaluation metrics."""
    accuracy = accuracy_score(labels, predictions)
    macro_f1 = f1_score(labels, predictions, average='macro')
    micro_f1 = f1_score(labels, predictions, average='micro')
    
    print(f"Evaluation Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Macro F1: {macro_f1:.4f}")
    print(f"Micro F1: {micro_f1:.4f}")
    
    return accuracy, macro_f1, micro_f1

def main():
    # Check if predictions file already exists
    result_file = './content/Result_llm.jsonl'
    if os.path.exists(result_file):
        print(f"Predictions file {result_file} already exists.")
        # Load existing predictions
        test_predictions = []
        with open(result_file, 'r') as f:
            for line in f:
                data = json.loads(line)
                test_predictions.append(data['label'])
        print(f"Loaded {len(test_predictions)} predictions for test set")
    else:
        # Generate new predictions on test set
        print("Processing test data...")
        X_test, ids_test = get_raw_dataset(mode='test')
        test_predictions = zero_shot_classify(X_test)
        save_predictions(ids_test, test_predictions, result_file)
    
    # Extract ground truth from test file (if available)
    gold_file = './content/subtaskA_monolingual.jsonl'  
    true_labels = None
    if os.path.exists(gold_file):
        true_labels = []
        with open(gold_file, 'r') as f:
            for line in f:
                item = json.loads(line)
                true_labels.append(item['label'])
        print(f"Loaded {len(true_labels)} ground truth labels")
        
        # Evaluate test predictions if labels are available
        if len(true_labels) == len(test_predictions):
            print("\nTest Set Evaluation:")
            evaluate_performance(test_predictions, true_labels)
    
    # Save test predictions and labels as numpy arrays
    save_numpy_arrays(test_predictions, true_labels)
    
    # Process validation data
    print("\nProcessing validation data...")
    X_dev, y_dev = get_raw_dataset(mode='dev')
    print(f"Loaded {len(X_dev)} validation samples")
    
    # Generate predictions on validation set
    print("Making predictions on validation set...")
    dev_predictions = zero_shot_classify(X_dev, device=-1)  # Use CPU for validation
    
    # Evaluate validation predictions
    print("\nValidation Set Evaluation:")
    evaluate_performance(dev_predictions, y_dev)
    
    # Save validation predictions and labels
    if not os.path.exists('predictions'):
        os.makedirs('predictions')
    np.save('predictions/LLM_y_dev_pred.npy', np.array(dev_predictions))
    np.save('predictions/LLM_y_dev.npy', y_dev.to_numpy())
    print("Saved validation predictions to predictions/LLM_y_dev_pred.npy")

if __name__ == "__main__":
    print("Starting Zero-Shot Classification approach for LLM text detection")
    main()

Starting Zero-Shot Classification approach for LLM text detection
Predictions file ./content/Result_llm.jsonl already exists.
Loaded 34272 predictions for test set
Loaded 34272 ground truth labels

Test Set Evaluation:
Evaluation Results:
Accuracy: 0.5085
Macro F1: 0.4358
Micro F1: 0.5085
Predictions saved to predictions/LLM_y_test_pred.npy
Ground truth labels saved to predictions/LLM_y_test.npy

Processing validation data...
Loaded 5000 validation samples
Making predictions on validation set...


Device set to use cpu
100%|██████████| 5000/5000 [1:03:03<00:00,  1.32it/s]


Validation Set Evaluation:
Evaluation Results:
Accuracy: 0.5206
Macro F1: 0.4245
Micro F1: 0.5206
Saved validation predictions to predictions/LLM_y_dev_pred.npy



