# Review Classification Inference

Use trained Longformer model to classify reviews from 2021, 2022, 2023.

- Model: Fine-tuned Longformer
- Task: Binary classification (Real reviews vs AI-generated reviews)
- Data: ICLR 2021, 2022, 2023 review data


In [1]:
%pip install transformers datasets accelerate peft pandas tqdm -U
from google.colab import drive
drive.mount('/content/drive')


Collecting datasets
  Downloading datasets-4.4.1-py3-none-any.whl.metadata (19 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading datasets-4.4.1-py3-none-any.whl (511 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m140.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (47.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7

In [2]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
import pandas as pd
import torch
from transformers import LongformerTokenizer, LongformerForSequenceClassification
from peft import PeftModel
from tqdm import tqdm
import os
from datetime import datetime


PyTorch version: 2.8.0+cu126
CUDA available: True
GPU: NVIDIA L4


In [3]:
# Path to the trained LoRA model (modify as needed)
MODEL_PATH = '/content/drive/MyDrive/Notebooks/AI_review/finetuned_longformer_lora1'  # Change this to your LoRA model path
BASE_MODEL_NAME = 'allenai/longformer-base-4096'  # Base model name

# Model configuration
MAX_LENGTH = 2048
BATCH_SIZE = 8

# Data paths
DATA_PATHS = {
    '2021': '/content/drive/MyDrive/Notebooks/AI_review/iclr_2021_data/iclr_2021_reviews.csv',
    '2022': '/content/drive/MyDrive/Notebooks/AI_review/iclr_2022_data/iclr_2022_reviews.csv',
    '2023': '/content/drive/MyDrive/Notebooks/AI_review/iclr_2023_data/iclr_2023_reviews.csv',
}

# Output path
OUTPUT_DIR = './inference_results'
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("="*60)
print("Review Classification Inference Script (LoRA Model)")
print("="*60)
print(f"Base model: {BASE_MODEL_NAME}")
print(f"LoRA adapter path: {MODEL_PATH}")
print(f"Max length: {MAX_LENGTH}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Output directory: {OUTPUT_DIR}")
print("="*60)


Review Classification Inference Script (LoRA Model)
Base model: allenai/longformer-base-4096
LoRA adapter path: /content/drive/MyDrive/Notebooks/AI_review/finetuned_longformer_lora1
Max length: 2048
Batch size: 8
Output directory: ./inference_results


In [4]:
def load_model_and_tokenizer(lora_model_path, base_model_name):
    """Load LoRA fine-tuned model and tokenizer"""
    print(f"\nLoading LoRA model from: {lora_model_path}")
    print(f"Base model: {base_model_name}")

    # Detect device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Load tokenizer from LoRA model path
    print("\nLoading tokenizer...")
    tokenizer = LongformerTokenizer.from_pretrained(lora_model_path)

    # Load base model first
    print("Loading base model...")
    base_model = LongformerForSequenceClassification.from_pretrained(
        base_model_name,
        num_labels=2
    )

    # Load LoRA adapter
    print("Loading LoRA adapter...")
    model = PeftModel.from_pretrained(base_model, lora_model_path)

    # Move to device and set to eval mode
    model.to(device)
    model.eval()

    print("✓ LoRA model loaded successfully!")
    return model, tokenizer, device


def load_reviews_from_csv(csv_path):
    """Load review data from CSV file"""
    print(f"\nLoading data: {csv_path}")

    if not os.path.exists(csv_path):
        print(f"File does not exist, skipping: {csv_path}")
        return None

    df = pd.read_csv(csv_path)

    # Filter empty reviews
    df = df[df['review_text'].notna()]
    df = df[df['review_text'].str.strip().str.len() > 0]

    print(f"  ✓ Loaded {len(df)} valid reviews")
    return df


def predict_reviews(model, tokenizer, device, reviews, batch_size=8, max_length=2048):
    """Batch prediction for review list"""
    predictions = []
    probabilities = []

    print(f"\nStarting inference on {len(reviews)} reviews...")

    with torch.no_grad():
        # Process in batches
        for i in tqdm(range(0, len(reviews), batch_size), desc="Inference progress"):
            batch_texts = reviews[i:i + batch_size]

            # Tokenize
            inputs = tokenizer(
                batch_texts,
                return_tensors='pt',
                truncation=True,
                padding=True,
                max_length=max_length
            )

            # Move to device
            inputs = {k: v.to(device) for k, v in inputs.items()}

            # Predict
            outputs = model(**inputs)
            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
            preds = torch.argmax(probs, dim=-1)

            # Save results
            predictions.extend(preds.cpu().numpy().tolist())
            probabilities.extend(probs.cpu().numpy().tolist())

    return predictions, probabilities


def process_year_data(year, csv_path, model, tokenizer, device):
    """Process data for a specific year"""
    print("\n" + "="*60)
    print(f"Processing {year} data")
    print("="*60)

    # Load data
    df = load_reviews_from_csv(csv_path)
    if df is None:
        return None

    # Extract review texts
    reviews = df['review_text'].astype(str).tolist()

    # Make predictions
    predictions, probabilities = predict_reviews(
        model, tokenizer, device, reviews,
        batch_size=BATCH_SIZE,
        max_length=MAX_LENGTH
    )

    # Add prediction results to DataFrame
    df['predicted_label'] = predictions
    df['predicted_class'] = ['AI-generated' if p == 1 else 'Real' for p in predictions]
    df['prob_real'] = [prob[0] for prob in probabilities]
    df['prob_ai'] = [prob[1] for prob in probabilities]

    # Statistics
    real_count = predictions.count(0)
    ai_count = predictions.count(1)

    print("\nPrediction statistics:")
    print(f"  Real reviews (label 0): {real_count} ({real_count/len(predictions)*100:.1f}%)")
    print(f"  AI-generated (label 1): {ai_count} ({ai_count/len(predictions)*100:.1f}%)")

    return df


In [5]:
model, tokenizer, device = load_model_and_tokenizer(MODEL_PATH, BASE_MODEL_NAME)



Loading LoRA model from: /content/drive/MyDrive/Notebooks/AI_review/finetuned_longformer_lora1
Base model: allenai/longformer-base-4096
Using device: cuda

Loading tokenizer...
Loading base model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/597M [00:00<?, ?B/s]

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading LoRA adapter...


model.safetensors:   0%|          | 0.00/597M [00:00<?, ?B/s]

✓ LoRA model loaded successfully!


In [6]:
all_results = {}

for year, csv_path in DATA_PATHS.items():
    result_df = process_year_data(year, csv_path, model, tokenizer, device)
    if result_df is not None:
        all_results[year] = result_df

print(f"\nProcessed {len(all_results)} years of data successfully!")



Processing 2021 data

Loading data: /content/drive/MyDrive/Notebooks/AI_review/iclr_2021_data/iclr_2021_reviews.csv
  ✓ Loaded 388 valid reviews

Starting inference on 388 reviews...


Inference progress:   0%|          | 0/49 [00:00<?, ?it/s]Initializing global attention on CLS token...
Input ids are automatically padded to be a multiple of `config.attention_window`: 512
Inference progress: 100%|██████████| 49/49 [01:02<00:00,  1.28s/it]



Prediction statistics:
  Real reviews (label 0): 124 (32.0%)
  AI-generated (label 1): 264 (68.0%)

Processing 2022 data

Loading data: /content/drive/MyDrive/Notebooks/AI_review/iclr_2022_data/iclr_2022_reviews.csv
  ✓ Loaded 386 valid reviews

Starting inference on 386 reviews...


Inference progress: 100%|██████████| 49/49 [00:55<00:00,  1.14s/it]



Prediction statistics:
  Real reviews (label 0): 206 (53.4%)
  AI-generated (label 1): 180 (46.6%)

Processing 2023 data

Loading data: /content/drive/MyDrive/Notebooks/AI_review/iclr_2023_data/iclr_2023_reviews.csv
  ✓ Loaded 378 valid reviews

Starting inference on 378 reviews...


Inference progress: 100%|██████████| 48/48 [00:40<00:00,  1.17it/s]


Prediction statistics:
  Real reviews (label 0): 276 (73.0%)
  AI-generated (label 1): 102 (27.0%)

Processed 3 years of data successfully!





In [7]:
print("\n" + "="*60)
print("Saving inference results")
print("="*60)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

for year, df in all_results.items():
    # Save full results
    output_path = os.path.join(OUTPUT_DIR, f'inference_results_{year}_{timestamp}.csv')
    df.to_csv(output_path, index=False)
    print(f"✓ {year} results saved: {output_path}")

    # Save summary statistics
    summary = {
        'year': year,
        'total': len(df),
        'real_count': (df['predicted_label'] == 0).sum(),
        'ai_count': (df['predicted_label'] == 1).sum(),
        'real_percentage': f"{(df['predicted_label'] == 0).sum() / len(df) * 100:.2f}%",
        'ai_percentage': f"{(df['predicted_label'] == 1).sum() / len(df) * 100:.2f}%",
    }

    # Save summary to separate file
    summary_df = pd.DataFrame([summary])
    summary_path = os.path.join(OUTPUT_DIR, f'summary_{year}_{timestamp}.csv')
    summary_df.to_csv(summary_path, index=False)

print("\nAll results saved successfully!")



Saving inference results
✓ 2021 results saved: ./inference_results/inference_results_2021_20251106_035947.csv
✓ 2022 results saved: ./inference_results/inference_results_2022_20251106_035947.csv
✓ 2023 results saved: ./inference_results/inference_results_2023_20251106_035947.csv

All results saved successfully!


In [8]:
# Overall Summary - AI percentage for each year
if all_results:
    print("\n" + "="*60)
    print("Overall Summary Statistics")
    print("="*60)

    summary_data = []
    for year, df in all_results.items():
        ai_percentage = (df['predicted_label'] == 1).sum() / len(df) * 100
        summary_data.append({
            'year': year,
            'total': len(df),
            'ai_count': (df['predicted_label'] == 1).sum(),
            'ai_percentage': f"{ai_percentage:.2f}%",
        })

    summary_df = pd.DataFrame(summary_data)
    overall_summary_path = os.path.join(OUTPUT_DIR, f'overall_summary_{timestamp}.csv')
    summary_df.to_csv(overall_summary_path, index=False)

    print("\n" + summary_df.to_string(index=False))
    print(f"\n✓ Overall summary saved: {overall_summary_path}")



Overall Summary Statistics

year  total  ai_count ai_percentage
2021    388       264        68.04%
2022    386       180        46.63%
2023    378       102        26.98%

✓ Overall summary saved: ./inference_results/overall_summary_20251106_035947.csv
