In [None]:
# Install required libraries
# by_polishing 2,851
# from_title 2,963
# from_title_and_content 2,574
# Total HF 8,388

# news 4,800 samples
#Train (70%)
# 46,740 * 0.70 = ~32,718 ✅
# Val (10%)
# 46,740 * 0.10 = ~4,674
# Test (20%)
# 46,740 * 0.20 = ~9,348

!pip install -q transformers datasets scikit-learn accelerate pandas openpyxl huggingface_hub

In [None]:
# Handle imports
import pandas as pd
import wandb
from google.colab import drive
from datasets import Dataset, DatasetDict
from sklearn.preprocessing import LabelEncoder
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from transformers import EarlyStoppingCallback
from huggingface_hub import login
from typing import Literal

EDITOR = "COLAB" # COLAB OR KAGGLE
TRAINING_MODEL = "MODERN_ARABERT" # ARABERT OR MODERN_ARABERT
FINE_TUNED_MODEL_PATH = "dahoozi/arabic-human-vs-machine-aramodernbert"


__EDITOR_TO_STORAGE_PATH_MAP = {
    "COLAB": "/content/drive/MyDrive/ml_task_3",
    "KAGGLE": "/kaggle/input/ml-task-3"
}


__FULL_TRAIN_DATA_PATH = f"{__EDITOR_TO_STORAGE_PATH_MAP[EDITOR]}/ground_truth.csv"
__TRAIN_DATA_PATH = f"{__EDITOR_TO_STORAGE_PATH_MAP[EDITOR]}/train_70percent.csv"
__VAL_DATA_PATH = f"{__EDITOR_TO_STORAGE_PATH_MAP[EDITOR]}/val_10percent.csv"
__TEST_DATA_PATH = f"{__EDITOR_TO_STORAGE_PATH_MAP[EDITOR]}/test_20percent.csv"


__MODEL_CONFIG_MAP = {
    "ARABERT": {
        "model_name": "aubmindlab/bert-base-arabertv2",
        "max_length": 512
    },
    "MODERN_BERT": {
        "model_name": "answerdotai/ModernBERT-base",
        "max_length": 8192
    },
    "MODERN_ARABERT": {
        "model_name": "NAMAA-Space/AraModernBert-Base-V1.0",
        "max_length": 512  # max 8192
    }
}

__FINE_TUNED_MODEL_NAME_TO_HF_PATH_MAP = {
  "ARABERT": "dahoozi/arabic-news-human-vs-machine-arabertv2",
  "MODERN_ARABERT": "dahoozi/arabic-news-human-vs-machine-modernarabertv2"
}

def read_file(
    extension: Literal["csv", "excel"],
    split_type: Literal["full", "train70", "val10", "test20"]
) -> pd.DataFrame:
    """
    Reads a dataset file based on the extension and split type.

    Args:
        extension (str): One of ['csv', 'excel']
        split_type (str): One of ['full', 'train70', 'val10', 'test20']

    Returns:
        pd.DataFrame: Loaded DataFrame
    """
    # Auto-mount Drive if in COLAB
    if EDITOR == "COLAB":
      from google.colab import drive
      import os

      # Mount only if not already mounted
      if not os.path.ismount("/content/drive"):
          drive.mount("/content/drive")

    __SPLIT_TO_PATH_MAP = {
      "full": __FULL_TRAIN_DATA_PATH,
      "train70": __TRAIN_DATA_PATH,
      "val10": __VAL_DATA_PATH,
      "test20": __TEST_DATA_PATH
    }
    if split_type not in __SPLIT_TO_PATH_MAP:
        raise ValueError(f"Invalid split_type: {split_type}. Must be one of {list(__SPLIT_TO_PATH_MAP.keys())}.")
    full_path = __SPLIT_TO_PATH_MAP[split_type]

    if extension == 'csv':
        return pd.read_csv(full_path)
    elif extension == 'excel':
        return pd.read_excel(full_path)
    else:
        raise ValueError(f"Unsupported extension: {extension}")

In [None]:
# Explore data
import pandas as pd
import matplotlib.pyplot as plt

# Load CSV
df = read_file("csv", "full")

# Show first few rows
print(df.head())

# Show class distribution
class_counts = df['Class'].value_counts()
print("\nClass Distribution:\n", class_counts)

# Visualize data balance
print("--------------- \n")
class_counts.plot(kind='bar', title='Class Distribution')
plt.xlabel("Class")
plt.ylabel("Count")
plt.show()

print("--------------- \n")
# Show how many nulls are in each column
print("Class mappings", df['Class'].unique())
print("Null values: \n", df.isnull().sum())


print("--------------- \n")
df_10_each = df.groupby('Class').head(10).reset_index(drop=True)

df_10_each

In [None]:
# Data cleansing + split
# 30% of data: 10% for validation and 20% for testing

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Load HF data
splits = {
    'by_polishing': 'data/by_polishing-00000-of-00001.parquet',
    'from_title': 'data/from_title-00000-of-00001.parquet',
    'from_title_and_content': 'data/from_title_and_content-00000-of-00001.parquet'
}

hf_base = "hf://datasets/KFUPM-JRCAI/arabic-generated-abstracts/"

def load_and_flatten_hf(split_name):
    df = pd.read_parquet(hf_base + splits[split_name])
    samples = []

    for _, row in df.iterrows():
        # Human-written
        if isinstance(row['original_abstract'], str) and row['original_abstract'].strip():
            samples.append({'text': row['original_abstract'], 'label': 1})
        # Machine-generated
        for model in ['allam_generated_abstract', 'jais_generated_abstract', 'llama_generated_abstract', 'openai_generated_abstract']:
            if isinstance(row.get(model), str) and row[model].strip():
                samples.append({'text': row[model], 'label': 0})
    return pd.DataFrame(samples)

# Load and flatten all HF splits
df_hf = pd.concat([
    load_and_flatten_hf("by_polishing"),
    load_and_flatten_hf("from_title"),
    load_and_flatten_hf("from_title_and_content")
], ignore_index=True)

print("HF dataset size:", len(df_hf))

# Load news ground truth data
df_news = read_file("csv", "full")

# Map the class labels to binary
label_map = {'human': 1, 'machine': 0}
df_news["label"] = df_news["Class"].map(label_map)

# Rename content → text, keep only the needed columns
df_news = df_news[["content", "label"]].rename(columns={"content": "text"})

df_all = pd.concat([df_hf, df_news], ignore_index=True)
df_all = df_all.dropna(subset=["text", "label"]).reset_index(drop=True)


# First split: 70% training, 30% temp (val + test)
df_train, df_temp = train_test_split(
    df_all,
    test_size=0.30,
    stratify=df_all['label'],
    random_state=42,
    shuffle=True
)

# Step 2: Split ⅓ val (10%), ⅔ test (20%) from the temp set
df_val, df_test = train_test_split(
    df_temp,
    test_size=2/3,  # 2/3 of 30% = 20% of total
    stratify=df_temp['label'],
    random_state=42,
    shuffle=True
)

# Confirm sizes
print(f"Train size: {len(df_train)}")
print(f"Validation size: {len(df_val)}")
print(f"Test size: {len(df_test)}")

# Confirm class ratios are preserved
print("\nTrain class balance:\n", df_train['label'].value_counts(normalize=True))
print("\nValidation class balance:\n", df_val['label'].value_counts(normalize=True))
print("\nTest class balance:\n", df_test['label'].value_counts(normalize=True))

df_train.to_csv("/content/drive/MyDrive/ml_task_3/train_70percent.csv", index=False)
df_val.to_csv("/content/drive/MyDrive/ml_task_3/val_10percent.csv", index=False)
df_test.to_csv("/content/drive/MyDrive/ml_task_3/test_20percent.csv", index=False)

In [None]:
# Prepare training and validation data
import pandas as pd

# Load Excel files
train_df = read_file("csv", "train70")
val_df = read_file("csv", "val10")

# Convert to DatasetDict
dataset = DatasetDict({
    "train": Dataset.from_pandas(train_df.reset_index(drop=True)),
    "validation": Dataset.from_pandas(val_df.reset_index(drop=True)),
})

In [None]:
# Tokenize data


tokenizer = AutoTokenizer.from_pretrained(__MODEL_CONFIG_MAP[TRAINING_MODEL]['model_name'])

def tokenize_function(example):
    return tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=__MODEL_CONFIG_MAP[TRAINING_MODEL]['max_length']
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True)

In [None]:
# Prepare training arguments
training_args = TrainingArguments(
    output_dir="/kaggle/working/arabic-news-model",
    eval_strategy="steps",
    eval_steps=4000,
    save_steps=4000,
    save_strategy="steps",
    logging_dir="/content/logs",
    logging_strategy="steps",
    logging_steps=1000,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    warmup_ratio=0.05,
    learning_rate=2e-5,
    lr_scheduler_type="linear", # default is linear
    weight_decay=0.01,
    save_total_limit=5,
    load_best_model_at_end=True,
    fp16=True,
    metric_for_best_model="f1",
    report_to="wandb",
    run_name="aubmindlab/bert-base-arabertv2"
)

In [None]:
# Prepare evaluation metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="weighted"),
        "precision": precision_score(labels, predictions, average="weighted", zero_division=0),
        "recall": recall_score(labels, predictions, average="weighted", zero_division=0),
    }

In [None]:
# Prepare trainer

num_classes = 2
model = AutoModelForSequenceClassification.from_pretrained(__MODEL_CONFIG_MAP[TRAINING_MODEL]['model_name'], num_labels=num_classes)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
    report_to=["wandb"],
    run_name="aubmindlab/bert-base-arabertv2"
)



wandb.login(key="3c8106fa1e630049867cd6eb6d4be01845302f90")
wandb.init(project="arabic-ai-detection", name="aubmindlab/bert-base-arabertv2")

In [None]:
# Train data
# wandb API key: 3c8106fa1e630049867cd6eb6d4be01845302f90

trainer.train()


Model path: dahoozi/arabic-human-vs-machine-arabertv2
Metrics for aubmindlab/bert-base-arabertv2 with the following training arguments:
- 2e-5 LR
- Lieaner LR
- token max length 512
- total limits 5
- total epochs 3
- 5% of total steps wramup
- batch size 4
- eval + save strategy = steps
- eval starts after 2000 steps

| Step  | Training Loss | Validation Loss | Accuracy | F1       | Precision | Recall   |
|-------|----------------|------------------|----------|----------|-----------|----------|
| 2000  | 0.270600       | 0.134141         | 0.991001 | 0.991030 | 0.991109  | 0.991001 |
| 4000  | 0.131100       | 0.329885         | 0.980287 | 0.980049 | 0.980454  | 0.980287 |
| 6000  | 0.095700       | 0.112277         | 0.992715 | 0.992705 | 0.992704  | 0.992715 |
| 8000  | 0.087100       | 0.111725         | 0.992715 | 0.992708 | 0.992705  | 0.992715 |
| 10000 | 0.044300       | 0.090220         | 0.994000 | 0.994000 | 0.994000  | 0.994000 |
| 12000 | 0.023700       | 0.096035         | 0.994215 | 0.994210 | 0.994208  | 0.994215 |

train output:

TrainOutput(global_step=12252, training_loss=0.11042025534498742, metrics={'train_runtime': 7092.1076, 'train_samples_per_second': 13.82, 'train_steps_per_second': 1.728, 'total_flos': 2.578830386899968e+16, 'train_loss': 0.11042025534498742, 'epoch': 3.0})

macro f1_score on held-out 10%: 0.9932

--------

Model path: dahoozi/arabic-human-vs-machine-aramodernbertertv2
Metrics for NAMAA-Space/AraModernBert-Base-V1.0 with the following training arguments:
- 2e-5 LR
- Lieaner LR
- token max length 512
- total limits 5
- total epochs 3
- 5% of total steps wramup
- batch size 4
- eval + save strategy = steps
- eval starts after 2000 steps

| Step   | Training Loss | Validation Loss | Accuracy | F1       | Precision | Recall   |
|--------|----------------|------------------|----------|----------|-----------|----------|
| 2000   | 0.302200       | 0.262904         | 0.983930 | 0.983785 | 0.984006  | 0.983930 |
| 4000   | 0.130700       | 0.122283         | 0.991001 | 0.990989 | 0.990985  | 0.991001 |
| 6000   | 0.089200       | 0.093752         | 0.994429 | 0.994434 | 0.994444  | 0.994429 |
| 8000   | 0.073300       | 0.091986         | 0.993786 | 0.993789 | 0.993793  | 0.993786 |
| 10000  | 0.048700       | 0.090222         | 0.994643 | 0.994655 | 0.994687  | 0.994643 |
| 12000  | 0.055000       | 0.063524         | 0.995929 | 0.995935 | 0.995951  | 0.995929 |

train output:

TrainOutput(global_step=12252, training_loss=0.14420269082816956, metrics={'train_runtime': 8485.7311, 'train_samples_per_second': 11.55, 'train_steps_per_second': 1.444, 'total_flos': 3.339869876156621e+16, 'train_loss': 0.14420269082816956, 'epoch': 3.0})

macro f1_score on held-out 10%: 0.9923

In [None]:
# Save model + tokenizer

login(token="your hf token here")
model.push_to_hub(FINE_TUNED_MODEL_PATH)
tokenizer.push_to_hub(FINE_TUNED_MODEL_PATH)
# save_path = "/kaggle/working/my-model"

# model.save_pretrained(save_path)
# tokenizer.save_pretrained(save_path)
# model.save_pretrained("/kaggle/working/arabic-author-model")
# tokenizer.save_pretrained("/kaggle/working/arabic-author-model")

print("DONE")

In [None]:
# t-SNE Visualization

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import torch
from sklearn.manifold import TSNE
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder


model = AutoModel.from_pretrained(FINE_TUNED_MODEL_PATH)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL_PATH)

# Prepare your dataset again (small sample works best for t-SNE)

sample_df = read_file("csv", "train70").sample(1000)  # smaller sample
texts = sample_df["text"].tolist()
machine_or_human = sample_df["label"].tolist()

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(machine_or_human)

embeddings = []

for text in texts:
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().cpu()
        embeddings.append(cls_embedding)

embeddings = torch.stack(embeddings).numpy()


tsne = TSNE(n_components=2, random_state=42, perplexity=30)
reduced = tsne.fit_transform(embeddings)


plt.figure(figsize=(12, 8))
sns.scatterplot(x=reduced[:, 0], y=reduced[:, 1], hue=label_encoder.inverse_transform(labels), palette="tab20", s=50)
plt.title("Author Clustering Based on Fine-tuned AraBERT CLS Embeddings")
plt.xlabel("t-SNE Dimension 1")
plt.ylabel("t-SNE Dimension 2")
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.show()

In [None]:
# Sample to be inferred
paragraph = """
مقدمة الطبعة الأولى

بقلم الدكتور محمد حسين هيكل

(1) كانت مصر إلى حين قدوم الحملة الفرنسوية إليها في سنة 1798 م بعيدةً عن الاحتكاك

بدول أوروبا، خلا ما كان من مرور بعض التُّجار والمَتاجر بأرضها في ذهابهم وعودتهم بين

الغرب والشرق.

وكانت بحكم خضوعها لاستبداد المماليك-تحت سيادة تركيا-تسُود فيها

الدسائس، ويعمل كلٌّ من أمرائها لما يجرُّ إليه النفع، وكانت الحركة العلمية والأدبية

خامدةً فيها خمودَها في سائر بلاد الدولة العثمانية، وبلغ من ذلك أن تدلَّى علماء الفقه

الإسلامي، الذين كانوا في مختلف العصور فخرَ مصر وزينتها، وفتر نشاطهم وفسد نتاجهم في

ذلك العصر، فأما الأدب من شعر ونثر فلم تقُم له إلى ذلك العصر قائمةٌ منذ امتدَّ سلطان

الأتراك على مصر، وإنك لتعجب حين تقرأ كاتبًا كالجبرتي، أو ابن إياس؛ لضعف تأليفه

ولغته، ولسقم ما فيه من آثار الأدب، شعرًا كانت هذه الآثار أم نثرًا.

فلما جاء الفرنسيون إلى مصر، وتغلغلوا فيها، وسارت مع حملة الجنود حملة العلماء، رأى

المصريون مظهرًا جديدًا من مظاهر الحياة لم يكن لهم في تاريخهم الأخير به عهد.

ولما جاء

محمد علي في سنة 1806 م، وقام بما قام به من الإصلاح في مصر بأن بعث البعوث من أبنائها

إلى أوروبا، وبعث إلى جوانب الحياة من صور النشاط ما حرَّك النفوس وأثار طلعتها، هبَّ

على البلاد نسيمٌ صالح لعله أول بشائر البعث لأمم الشرق العربي كافة.
"""

In [None]:
# Inference
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL_PATH)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

train_df = read_file("csv", "train70")
val_df = read_file("csv", "val10")
all_classes = pd.concat([train_df["label"], val_df["label"]])
label_encoder = LabelEncoder()
label_encoder.fit(all_classes)

# OR load .joblib file
# label_encoder = joblib.load("label_encoder.joblib")

def predict_author(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        pred_idx = torch.argmax(probs, dim=-1).item()
        author = label_encoder.inverse_transform([pred_idx])[0]
    return author, probs.cpu().numpy().flatten()

predicted_author, probabilities = predict_author(paragraph)
author_labels = label_encoder.inverse_transform(np.arange(len(probabilities)))
print("📝 Arabert fine tuned results:\n")
print("📄 Paragraph:\n", paragraph)
print("🧠 Predicted Class:", predicted_author)
print("🔢 Probabilities:\n", probabilities)

for author, prob in zip(author_labels, probabilities):
    print(f"{author:30s}: {prob:.4f}")

In [None]:
# Heldout inference + macro f1-score
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder
# --- CONFIG ---

batch_size = 16
max_len = 512

# Load label encoder and transform labels
train_df = read_file("csv", "train70")
val_df = read_file("csv", "val10")

# --- LOAD DATA ---
df = read_file("csv", "test20")

# Load tokenizer and model from fine-tuned directory
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL_PATH)
model.eval()

# Optional: Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# --- DATASET CLASS ---
class AuthorDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        inputs = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            "input_ids": inputs["input_ids"].squeeze(0),
            "attention_mask": inputs["attention_mask"].squeeze(0),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }

# --- CREATE DATALOADER ---
dataset = AuthorDataset(
    texts=df["text"].tolist(),
    labels=df["label"].tolist(),
    tokenizer=tokenizer,
    max_len=max_len
)
loader = DataLoader(dataset, batch_size=batch_size)

# --- RUN INFERENCE ---
predictions, targets = [], []

with torch.no_grad():
    for batch in loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)

        predictions.extend(preds.cpu().tolist())
        targets.extend(labels.cpu().tolist())

# --- CALCULATE ACCURACY ---
# macro F1 score
acc = f1_score(targets, predictions, average="macro")

print(f"f1_score on held-out 10%: {acc:.4f}")

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import numpy as np
from google.colab import drive
import os

drive.mount("/content/drive")
# === Load model and tokenizer ===
model_path = "dahoozi/arabic-human-vs-machine-aramodernbert"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# === Prepare label encoder ===
# UPDATE THIS TO DEV SAMPLES

val_df = pd.read_csv("/content/drive/MyDrive/ml_task_3/ground_truth.csv")

# === Load input data ===

# === Batch prediction function ===
def batch_predict(texts, batch_size=16):
    predicted_labels = []
    for i in tqdm(range(0, len(texts), batch_size)):
        batch_texts = texts[i:i+batch_size]
        encodings = tokenizer(batch_texts.tolist(), return_tensors="pt", padding=True, truncation=True, max_length=8000)
        encodings = {k: v.to(device) for k, v in encodings.items()}

        with torch.no_grad():
            outputs = model(**encodings)
            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
            preds = torch.argmax(probs, dim=-1).cpu().numpy()
            authors = label_encoder.inverse_transform(preds)
            predicted_labels.extend(authors)
    return predicted_labels

# === Perform batch inference ===
val_df['label'] = batch_predict(val_df['content'])

# === Output to CSV ===
output_df = val_df[['label']]
output_df.to_csv("/content/drive/MyDrive/ml_task_3/predicted_class.csv", index=False, encoding='utf-8-sig')

In [None]:
# EVAL submission

import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import numpy as np
from google.colab import drive

drive.mount("/content/drive")
# === Load model and tokenizer ===
model_path = "dahoozi/arabic-human-vs-machine-aramodernbert"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# === Prepare label encoder ===
# train_df = pd.read_csv("/content/drive/MyDrive/ml_task_3/ground_truth.csv")
val_df = pd.read_csv("/content/drive/MyDrive/ml_task_3/test_unlabeled.csv")
# all_classes = train_df["Class"]
# label_encoder = LabelEncoder()
# label_encoder.fit(all_classes)
label_map = {'human': 1, 'machine': 0}
inverse_label_map = {v: k for k, v in label_map.items()}

# === Load input data ===

# === Batch prediction function ===
def batch_predict(texts, batch_size=16):
    predicted_labels = []
    for i in tqdm(range(0, len(texts), batch_size)):
        batch_texts = texts[i:i+batch_size]
        encodings = tokenizer(batch_texts.tolist(), return_tensors="pt", padding=True, truncation=True, max_length=8000)
        encodings = {k: v.to(device) for k, v in encodings.items()}

        with torch.no_grad():
            outputs = model(**encodings)
            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
            preds = torch.argmax(probs, dim=-1).cpu().numpy()
            authors = [inverse_label_map[pred] for pred in preds]
            # authors = label_encoder.inverse_transform(preds)
            predicted_labels.extend(authors)
    return predicted_labels

# === Perform batch inference ===
val_df['label'] = batch_predict(val_df['content'])

# === Output to CSV ===
output_df = val_df[['label']]
output_df.to_csv("/content/drive/MyDrive/ml_task_3/predictions_content_test_v3.csv", index=False, encoding='utf-8-sig')