## Setup

In [None]:
!nvidia-smi

In [None]:
import psutil

ram_gb = psutil.virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [None]:
!pip install -qU transformers datasets peft wandb vllm accelerate bitsandbytes wandb faker evaluate sacrebleu rouge_chinese

In [None]:
# !pip install -qU torch=="2.4.1+cu121" torchvision=="0.19.1+cu121" torchaudio=="2.4.1+cu121" --index-url https://download.pytorch.org/whl/cu121
!pip install -qU packaging ninja
!pip install -qU flash-attn

In [None]:
import flash_attn
print(flash_attn.__version__)

In [None]:
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

## Evaluate LLM Model on Arabic Authorship Styling Task

In [None]:
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
torch_dtype = "auto" # None, torch.float16
device = "cuda"

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype = torch_dtype
)

tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
model

### Read Data

In [None]:
import os
from google.colab import drive
drive.mount('/gdrive')

data_dir = "/gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1"
train_path = os.path.join(data_dir, "AuthorshipStyleTransferTrain.xlsx")
validation_path = os.path.join(data_dir, "AuthorshipStyleTransferVal.xlsx")

In [None]:
import pandas as pd

df = pd.read_excel(train_path)
test_df = pd.read_excel(validation_path)

test_df

In [None]:
train_word_counts = df['text_in_author_style'].str.split().str.len()
val_word_counts = test_df['text_in_author_style'].str.split().str.len()

max_train_word_counts, min_train_word_counts = train_word_counts.max(), train_word_counts.min()
max_val_word_counts, min_val_word_counts = val_word_counts.max(), val_word_counts.min()

print(f"{max_train_word_counts=}\n{min_train_word_counts=}\n{max_val_word_counts=}\n{min_val_word_counts=}")


In [None]:
from tqdm.notebook import tqdm
import numpy as np

# Helper function for batching
def count_tokens_batch(texts, tokenizer, batch_size=512):
    token_counts = []
    for i in tqdm(range(0, len(texts), batch_size)):
        batch_texts = texts[i:i+batch_size]
        batch_encodings = tokenizer(batch_texts, add_special_tokens=False, truncation=False)
        batch_counts = [len(ids) for ids in batch_encodings['input_ids']]
        token_counts.extend(batch_counts)
    return np.array(token_counts)

# For training set
train_texts = df['text_in_author_style'].tolist()
train_token_counts = count_tokens_batch(train_texts, tokenizer)

# For validation/test set
val_texts = test_df['text_in_author_style'].tolist()
val_token_counts = count_tokens_batch(val_texts, tokenizer)

# Show stats
print(f"max_train_token_counts={train_token_counts.max()}")
print(f"min_train_token_counts={train_token_counts.min()}")
print(f"max_val_token_counts={val_token_counts.max()}")
print(f"min_val_token_counts={val_token_counts.min()}")


In [None]:
max_new_tokens = int(max(train_token_counts.max(), val_token_counts.max()) + 500)
min_new_tokens = 1

In [None]:
src_text = test_df.iloc[0]['text_in_msa']
transfered_text = test_df.iloc[0]['text_in_author_style']
author_name = test_df.iloc[0]['author']

print(f"Author: {author_name}")
print("**"*10)
print(f"Source Text: {src_text}")
print("**"*10)
print(f"Transfered Text: {transfered_text}")

In [None]:
system_message = "\n".join([
    "You are an advanced NLP experts specializing in text style transfer and semantic preservation.",
    "Your role is to transform input text into the unique writing style of a specified author, ensuring the transformed output retains the original meaning, nuances, and intent.",
    "Always follow instructions carefully, preserve the semantics of the source text, and ensure the generated output accurately reflects the distinctive style and tone of the given author.",
    "Generate the ouptut in the same text language.",
    "Do not generate any introduction or conclusion."
])

user_message = "\n".join([
            "<task> Transform the following source text into the writing style of the specified author.",
            f"<author> {author_name.strip()}",
            f"<source> {src_text.strip()}",
            "<output>"
        ])

llm_messages = [
    {
        "role": "system",
        "content": system_message,
    },
    {
        "role": "user",
        "content": user_message
    }
]

In [None]:
text = tokenizer.apply_chat_template(
    llm_messages,
    tokenize=False,
    add_generation_prompt=True
)

model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=max_new_tokens,
    min_new_tokens=min_new_tokens,
    do_sample=False, top_k=None, temperature=None, top_p=None,
)

generated_ids = [
    output_ids[len(input_ids):]
    for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [None]:
print('LLM Transfered Text:\n')
print(response)

## Finetuning Phase

Format Finetuning Dataset

Reduce training dataset to 10% of its original size (while preserving the class `author` distribution), and then split 10% of that reduced data for validation

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# 1. Reduce df to n% of original size, stratified by 'author'
df_reduced = df.groupby('author', group_keys=False).apply(lambda x: x.sample(frac=0.2, random_state=42))
df_reduced = df_reduced.sample(frac=1, random_state=42).reset_index(drop=True)  # Shuffle

# 2. Split df_10 into train (90%) and validation (10%), stratified
train_df, val_df = train_test_split(
    df_reduced,
    test_size=0.1,
    random_state=42,
    stratify=df_reduced['author']
)

# 3. Calculate normalized value counts (proportion) for each split
original_dist = df['author'].value_counts(normalize=True)
reduced_dist = df_reduced['author'].value_counts(normalize=True)
train_dist = train_df['author'].value_counts(normalize=True)
val_dist = val_df['author'].value_counts(normalize=True)
test_dist = test_df['author'].value_counts(normalize=True)

# 4. Combine into a single DataFrame for display
distribution_table = pd.DataFrame({
    'Original': original_dist,
    'Reduced10%': reduced_dist,
    'Train': train_dist,
    'Validation': val_dist,
    'Test': test_dist,
}).fillna(0)

# 5. Display as a table in Colab/Notebook
distribution_table.style.format("{:.2%}")


here if want to train on the full training dataset

In [None]:
# import pandas as pd
# from sklearn.model_selection import train_test_split

# # Assuming df is your DataFrame
# train_df, val_df = train_test_split(
#     df,
#     test_size=0.2,
#     random_state=42,
#     stratify=df['author']
# )

# # Calculate normalized value counts (proportion) for each split
# original_dist = df['author'].value_counts(normalize=True)
# train_dist = train_df['author'].value_counts(normalize=True)
# val_dist = val_df['author'].value_counts(normalize=True)
# test_dist = test_df['author'].value_counts(normalize=True)

# # Combine into a single DataFrame for display
# distribution_table = pd.DataFrame({
#     'Original': original_dist,
#     'Train': train_dist,
#     'Validation': val_dist,
#     'Test': test_dist,
# }).fillna(0)  # In case some authors don't appear in a split

# # Display as a table in Colab
# distribution_table.style.format("{:.2%}")


Install LLaMA-Factory

In [None]:
!git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
!cd LLaMA-Factory && pip install -e .

Login to HuggingFace <img src="https://huggingface.co/front/assets/huggingface_logo.svg" alt="Hugging Face" width="40"/> , W&B


In [None]:
from google.colab import userdata
import wandb

wandb.login(key=userdata.get('wandb_colab'))
hf_token = userdata.get('HF_API_KEY')
!huggingface-cli login --token {hf_token}

In [None]:
import random

all_authors = sorted(train_df['author'].unique())

system_message = "\n".join([
    "You are an advanced NLP experts specializing in text style transfer and semantic preservation.",
    "Your role is to transform input text into the unique writing style of a specified author, ensuring the transformed output retains the original meaning, nuances, and intent.",
    "Always follow instructions carefully, preserve the semantics of the source text, and ensure the generated output accurately reflects the distinctive style and tone of the given author.",
    "Generate the ouptut in the same text language.",
    "Do not generate any introduction or conclusion."
])

PROMPT_TEMPLATES = [
    "<task> Emulate the style of <author> {author}: “{source}” →",
    "<task> Rewrite the following in {author}'s voice:\n“{source}”\nResult:",
    "As if written by {author}, transform:\n\"{source}\"\n--",
    "Write in {author}'s unique style. Source:\n{source}\nOutput:"
]

# Define a small pool of classification prompts
CLASSIFICATION_TEMPLATES = [
    "<task> Identify the author of this text:\n“{source}”\nAnswer:",
    "Given the following Arabic sentence, choose the author:\n\"{source}\"\nAuthor:",
    "Text:\n{source}\nWhich author wrote this? →"
]


def format_chat(example):
    prompt = "\n".join([
            "<task> Transform the following source text into the writing style of the specified author.",
            f"<author> {example['author'].strip()}",
            f"<source> {example['text_in_msa'].strip()}",
            "<output>"
        ])


    message = {
        "system": system_message,
        "instruction": prompt,
        "input": "",
        "output": example['text_in_author_style'].strip(),
        "history": []
    }
    return message


def format_style(example):
    """Create one style-transfer example with prompt variation."""
    tmpl = random.choice(PROMPT_TEMPLATES)
    instruction = tmpl.format(
        author=example['author'].strip(),
        source=example['text_in_msa'].strip()
    )
    return {
        "system": system_message,
        "instruction": instruction,
        "input": "",
        "output": example['text_in_author_style'].strip(),
        "history": []
    }

def format_classification(example, all_authors, k=3):
    """Classification example with k random distractors + correct author."""
    # pick k random negative authors
    negatives = random.sample([a for a in all_authors if a != example['author']], k)
    choices = negatives + [example['author']]
    random.shuffle(choices)

    tmpl = random.choice(CLASSIFICATION_TEMPLATES)
    instruction = tmpl.format(source=example['text_in_author_style'].strip())
    # append the multiple-choice list
    instruction += " [" + " | ".join(choices) + "]"

    return {
        "system": system_message,
        "instruction": instruction,
        "input": "",
        "output": example['author'].strip(),
        "history": []
    }



# 4) Build a mixed multi-task formatter
def format_multi(example, p_style=0.8):
    if random.random() < p_style:
        return format_style(example)
    else:
        return format_classification(example, all_authors, k=3)


In [None]:
from tqdm.notebook import tqdm
tqdm.pandas()

# train_ds = train_df.progress_apply(format_chat, axis=1)
# val_ds = val_df.progress_apply(format_chat, axis=1)
# test_ds = test_df.progress_apply(format_chat, axis=1)

train_ds = train_df.progress_apply(format_multi, axis=1)
val_ds   = val_df.progress_apply(lambda ex: format_style(ex), axis=1)   # eval only style
test_ds  = test_df.progress_apply(lambda ex: format_style(ex), axis=1)


In [None]:
import os
import json
import random

output_dir = os.path.join(data_dir, "datasets", "llamafactory-finetune-data")
os.makedirs(output_dir, exist_ok=True)

# convert pandas Series to list
train_list = list(train_ds)
val_list = list(val_ds)
test_list = list(test_ds)

# Shuffle each list in-place
random.shuffle(train_list)
random.shuffle(val_list)
random.shuffle(test_list)

with open(os.path.join(output_dir, "train_20p_prompt_variation.json"), "w", encoding="utf-8") as dest:
    json.dump(train_list, dest, ensure_ascii=False, default=str)

with open(os.path.join(output_dir, "validation_20p_prompt_variation.json"), "w", encoding="utf-8") as dest:
    json.dump(val_list, dest, ensure_ascii=False, default=str)

with open(os.path.join(output_dir, "test_prompt_variation.json"), "w", encoding="utf-8") as dest:
    json.dump(test_list, dest, ensure_ascii=False, default=str)


In [None]:
output_dir = os.path.join(data_dir, "datasets", "llamafactory-finetune-data")

print(os.path.join(output_dir, "train_20p_prompt_variation.json"))
print(os.path.join(output_dir, "validation_20p_prompt_variation.json"))
print(os.path.join(output_dir, "test_prompt_variation.json"))

## Start Finetuning

In [None]:
# # Configure LLaMA-Factory for the new datasets

# # update /content/LLaMA-Factory/data/dataset_info.json and append
import json

# Path to llama-factory dataset JSON file
json_path = "/content/LLaMA-Factory/data/dataset_info.json"

# New entries to add
new_entries = {
    "authorship_styling_finetune_train": {
        "file_name": "/gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/datasets/llamafactory-finetune-data/train_20p_prompt_variation.json",
        "columns": {
            "prompt": "instruction",
            "query": "input",
            "response": "output",
            "system": "system",
            "history": "history"
        }
    },
    "authorship_styling_finetune_val": {
        "file_name": "/gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/datasets/llamafactory-finetune-data/validation_20p_prompt_variation.json",
        "columns": {
            "prompt": "instruction",
            "query": "input",
            "response": "output",
            "system": "system",
            "history": "history"
        }
    }
}

# Step 1: Load the current JSON data
with open(json_path, "r") as f:
    data = json.load(f)

# Step 2: Update the dictionary with new entries
data.update(new_entries)

# Step 3: Write the updated data back to the file
with open(json_path, "w") as f:
    json.dump(data, f, indent=4)

print("Datasets appended successfully.")

In [None]:
# metrics.py
from evaluate import load

bleu_metric  = load("bleu")
chrf_metric  = load("chrf")
model_id = "Qwen/Qwen2.5-1.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

def compute_metrics_fn(eval_preds):
    preds, labels = eval_preds
    # decode
    decoded_preds  = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    # BLEU expects list of list-of-references
    bleu_res = bleu_metric.compute(predictions=decoded_preds, references=[[l] for l in decoded_labels])
    chrf_res = chrf_metric.compute(predictions=decoded_preds, references=[[l] for l in decoded_labels])
    return {
        "bleu": bleu_res["bleu"],
        "charf": chrf_res["score"]
    }


In [None]:
%%writefile /content/LLaMA-Factory/examples/train_lora/authorship_style_transfer_finetune.yaml

### model
model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
# model_name_or_path: google/gemma-3-1b-it
trust_remote_code: true

### method
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 32
## default (lora_rank*2)
lora_alpha: 64
lora_dropout: 0.05
##  rank-stabilized LoRA -> the update factor in RoLA becoma alpha/sqrt(r) instead of alpha/r
# use_rslora: true
##q_proj,k_proj,v_proj
lora_target: all
flash_attn: fa2
## adding special token while fine-tuning maight cause vllm failed after training
# add_special_tokens: <|task|>,<|author|>,<|source|>,<|output|>
## Quantization method to use for on-the-fly, {bnb,gptq,awq,aqlm,quanto,eetq,hqq}, default bnb
# quantization_method: awq
## The number of bits to quantize the model using on-the-fly quantization {4, 8, 16}, default: none
# quantization_bit: 4


### dataset
dataset: authorship_styling_finetune_train
eval_dataset: authorship_styling_finetune_val
template: qwen
cutoff_len: 3500
# max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16

# <— enable generation & metrics
# predict_with_generate: true
# bleu is not supported by llama-factory ?!!
metric_for_best_model: eval_loss
# greater_is_better: true
# compute_metrics: "compute_metrics_fn"   # point to your function
# compute_accuracy: true
### output
# resume_from_checkpoint: /gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/llm-finetuning/models/checkpoint-2000
output_dir: /gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/llm-finetuning/qwen-20p-lora32/
logging_steps: 10
save_steps: 500
plot_loss: true
log_level: error
# overwrite_output_dir: true

### train
per_device_train_batch_size: 2
gradient_accumulation_steps: 4
learning_rate: 5.0e-6
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0
bf16: true
ddp_timeout: 180000000

### eval
# val_size: 0.1
per_device_eval_batch_size: 2
eval_strategy: steps
eval_steps: 100
eval_on_start: True
# <— early stopping after N eval steps without improvement
early_stopping_steps: 3     # stop if bleu doesn’t improve for n evals
# load_best_model_at_end: true


report_to: wandb
run_name: authorship-style-transfer-finetune-llamafactory

# push_to_hub: true
export_hub_model_id: "Tami3/authorship-style-transfer"
hub_private_repo: true
hub_strategy: checkpoint


In [None]:
from huggingface_hub import create_repo

hf_username = !huggingface-cli whoami

hf_username = hf_username[0]
create_repo(f"{hf_username}/authorship-style-transfer", private=True, exist_ok=True)

### Fine-tune model via LLaMA Board

In [None]:
%cd /content/LLaMA-Factory/
!GRADIO_SHARE=1 llamafactory-cli webui

# !cd LLaMA-Factory/ !GRADIO_SHARE=1 llamafactory-cli webui

In [None]:
!cd /content/LLaMA-Factory/ && llamafactory-cli train -h

In [None]:
!cd /content/LLaMA-Factory/ && llamafactory-cli train /content/LLaMA-Factory/examples/train_lora/authorship_style_transfer_finetune.yaml

## Fine-tuned Model Evaluation

In [None]:
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

# model_id = "Qwen/Qwen2.5-1.5B-Instruct"
model_id = "google/gemma-3-1b-it"
torch_dtype = "auto" # None, torch.float16
device = "cuda"

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype = torch_dtype
)

tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
finetuned_model_id = "/gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/llm-finetuning/gemma-12july-test-vllm"
model.load_adapter(finetuned_model_id)

In [None]:
idx = 400

src_text = test_df.iloc[idx]['text_in_msa']
transfered_text = test_df.iloc[idx]['text_in_author_style']
author_name = test_df.iloc[idx]['author']

print(f'{author_name=}')

system_message = "\n".join([
    "You are an advanced NLP experts specializing in text style transfer and semantic preservation.",
    "Your role is to transform input text into the unique writing style of a specified author, ensuring the transformed output retains the original meaning, nuances, and intent.",
    "Always follow instructions carefully, preserve the semantics of the source text, and ensure the generated output accurately reflects the distinctive style and tone of the given author.",
    "Generate the ouptut in the same text language.",
    "Do not generate any introduction or conclusion."
])

user_message = "\n".join([
            "<task> Transform the following source text into the writing style of the specified author.",
            f"<author> {author_name.strip()}",
            f"<source> {src_text.strip()}",
            "<output>"
        ])

llm_messages = [
    {
        "role": "system",
        "content": [{'type': 'text', 'text': system_message}],
    },
    {
        "role": "user",
        "content": [{'type': 'text', 'text': user_message}]
    }
]

In [None]:
def generate_resp(messages):
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=3500,
        do_sample=True, top_k=20, temperature=0.7, top_p=0.8,
    )

    generated_ids = [
        output_ids[len(input_ids):]
        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response

response = generate_resp(llm_messages)

In [None]:
print(response)

#### Tip for Qwen2.5

Qwen2.5 oftenly produce chinese characters with some responses. To skip this, use the next class to generate responses.

Source:
`https://jupyter267.medium.com/how-to-eliminate-the-chance-of-generating-chinese-in-qwen-2-5-2cf919bb0fdc`



In [None]:
class Generator:
    def __init__(self, model, tokenizer):

        self.model, self.tokenizer = model, tokenizer
        self.mask = None

    def generate(self, messages:list, max_new_tokens: int=2000, temperature:float=0.1):

        def logits_processor(token_ids, logits):
          # logits_processor default recieve the logits which is the score matrix of each time-step
          """
              A processor to ban Chinese character
          """
          if self.mask is None:
              # as we don't know where the Chinses tokens locate at which index
              # in the vocabulary but we know how it looks like and the range of it

              # decode all the tokens in the vocabulary in order
              token_ids = torch.arange(logits.size(-1))
              decoded_tokens = self.tokenizer.batch_decode(token_ids.unsqueeze(1), skip_special_tokens=True)

              # create a mask tensor to exclude positions of Chinese characters.
              # since this process uses a for loop and is time-consuming,
              # the result will be stored as a property for later use to ensure it only runs once.
              self.mask = torch.tensor([
                  # loop through each token in the vocabulary and compare it to Chinese characters.
                  any(0x4E00 <= ord(c) <= 0x9FFF or 0x3400 <= ord(c) <= 0x4DBF or 0xF900 <= ord(c) <= 0xFAFF for c in
                      token)
                  for token in decoded_tokens
              ])

          # mask the score by - inf
          logits[:, self.mask] = -float("inf")
          return logits

        # this step transforms the messages into a string,
        # adding special tokens e.g separate tokens between system content user queries
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )

        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)

        generated_ids = self.model.generate(
            **model_inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            # add the logits_processor here
            logits_processor=[logits_processor]
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]
        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

        return response

In [None]:
# define an object
llm = Generator(model, tokenizer)

# generate a response without chinese characters
response = llm.generate(details_extraction_messages)
print( parse_json(response) )

response = llm.generate(translation_messages)
print( parse_json(response) )

## vLLM

In [None]:
# base_model_id = "Qwen/Qwen2.5-1.5B-Instruct"
# base_model_id = "google/gemma-3-1b-it"
# adapter_model_id = "/gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/llm-finetuning/gemma-12july-test-vllm"


# !nohup vllm serve "{base_model_id}" --gpu-memory-utilization 0.8 --max_lora_rank 16 --enable-lora --lora-modules ast-lora="{adapter_model_id}" &


vllm in terminal

In [None]:
vllm serve "google/gemma-3-1b-it" --dtype=half --gpu-memory-utilization 0.8 --max_lora_rank 32 --enable-lora --lora-modules ast-lora="/gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/llm-finetuning/gemma-20p-lora32/checkpoint-1000"

In [None]:
!tail -n 10 nohup.out

### Inference

In [None]:
base_model_id = "Qwen/Qwen2.5-1.5B-Instruct"
base_model_id = "google/gemma-3-1b-it"

tokenizer = AutoTokenizer.from_pretrained(base_model_id)

prompt = tokenizer.apply_chat_template(
    llm_messages,
    tokenize=False,
    add_generation_prompt=True
)

In [None]:
import requests

vllm_model_id = "ast-lora"

llm_response = requests.post("http://localhost:8000/v1/completions", json={
    "model": vllm_model_id,
    "prompt": prompt,
    "max_tokens": 2000,
    "temperature": 0.7
})

llm_response.json()

batch inference

In [None]:
def construct_llm_messages(row):
    author_name = row['author']
    src_text = row['text_in_msa']

    system_message = "\n".join([
        "You are an advanced NLP experts specializing in text style transfer and semantic preservation.",
        "Your role is to transform input text into the unique writing style of a specified author, ensuring the transformed output retains the original meaning, nuances, and intent.",
        "Always follow instructions carefully, preserve the semantics of the source text, and ensure the generated output accurately reflects the distinctive style and tone of the given author.",
        "Generate the ouptut in the same text language.",
        "Do not generate any introduction or conclusion."
    ])

    user_message = "\n".join([
        "<task> Transform the following source text into the writing style of the specified author.",
        f"<author> {author_name.strip()}",
        f"<source> {src_text.strip()}",
        "<output>"
    ])

    llm_messages = [
        {
            "role": "system",
            "content": [{'type': 'text', 'text': system_message}],
        },
        {
            "role": "user",
            "content": [{'type': 'text', 'text': user_message}]
        }
    ]

    return llm_messages


In [None]:
# df2 = test_df.sample(n=100)
df2 = pd.read_csv("/gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/datasets/llamafactory-finetune-data/predictions_100s.csv")

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import requests

def process_row(row, tokenizer, model_id="ast-lora"):
    # 1. Construct llm_messages
    llm_messages = construct_llm_messages(row)
    # 2. Build prompt
    prompt = tokenizer.apply_chat_template(
        llm_messages,
        tokenize=False,
        add_generation_prompt=True
    )
    # 3. Call API
    try:
        response = requests.post(
            "http://localhost:8000/v1/completions",
            json={
                "model": model_id,
                "prompt": prompt,
                "max_tokens": 2000,
                "temperature": 0.7
            },
            timeout=30
        )
        if response.status_code == 200:
            return response.json().get("choices", [{}])[0].get("text", "")
        else:
            print(f"Error: {response.status_code}")
            return f"Error: {response.status_code}"
    except Exception as e:
        print(f"Exception: {str(e)}")
        return f"Exception: {str(e)}"



def batch_process(df, tokenizer, model_id="ast-lora"):
    responses = []
    for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
        try:
            response = process_row(row, tokenizer, model_id)
            responses.append(response)
        except Exception as exc:
            responses.append(f"Exception: {str(exc)}")
    return responses

# Usage
test_df['gemma_lora32_response'] = batch_process(test_df, tokenizer, model_id="ast-lora")

In [None]:
data_dir = "/gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/datasets/llamafactory-finetune-data/testset_predictions.csv"
test_df.to_csv(data_dir, index=False)

In [None]:
!pip install -q sacrebleu

In [None]:
import sacrebleu

import sacrebleu

# Prepare lists of hypotheses and references
gemma32_preds = test_df['gemma_rslora64'].tolist()
# gemma32_preds = df2['gemma_lora32'].tolist()
# gemma16_preds = df2['gemma_lora16'].tolist()
# qwen16_preds = df2['qwen_llm_response'].tolist()

refs = test_df['text_in_author_style'].tolist()

# sacrebleu expects a list of references (for multi-ref support), so wrap refs in a list
gemma32_bleu = sacrebleu.corpus_bleu(gemma32_preds, [refs])
gemma32_chrf = sacrebleu.corpus_chrf(gemma32_preds, [refs])

# gemma32_bleu = sacrebleu.corpus_bleu(gemma32_preds, [refs])
# gemma32_chrf = sacrebleu.corpus_chrf(gemma32_preds, [refs])

# gemma16_bleu = sacrebleu.corpus_bleu(gemma16_preds, [refs])
# gemma16_chrf = sacrebleu.corpus_chrf(gemma16_preds, [refs])

# qwen16_bleu = sacrebleu.corpus_bleu(qwen16_preds, [refs])
# qwen16_chrf = sacrebleu.corpus_chrf(qwen16_preds, [refs])

print(f"{gemma32_bleu.score=:.2f}")

print(f"{gemma32_chrf.score=:.2f}")
# print("BLEU details:", bleu)
# print("chrF details:", chrf)


merge and deploy to HF

In [None]:
%%writefile /content/LLaMA-Factory/examples/merge_lora/authorship_style_transfer_finetune.yaml

### model

model_name_or_path: google/gemma-3-1b-it
adapter_name_or_path: /gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/llm-finetuning/gemma-20p-lora32/checkpoint-1000
template: gemma2
trust_remote_code: true

# LoRA fine-tuning type
finetuning_type: lora
stage: sft

### export
export_dir: /gdrive/MyDrive/AraGenEval shared task/AuthorshipStyleTransferTask1/llm-finetuning/gemma-20p-lora32/merged
export_size: 5
export_device: auto  # choices: [cpu, auto]
export_legacy_format: false

# push_to_hub: true
export_hub_model_id: "Tami3/authorship-style-transfer"
# hub_private_repo: true
# hub_strategy: checkpoint


In [None]:
!cd /content/LLaMA-Factory/ && llamafactory-cli export /content/LLaMA-Factory/examples/merge_lora/authorship_style_transfer_finetune.yaml

In [None]:
# Uninstall the current transformers version
!pip uninstall -y transformers

# Install a specific version that should support Qwen 2.5
# You might need to adjust the version number based on the latest releases
# and compatibility with other libraries you are using (like LLaMA-Factory)
!pip install -qU transformers>=4.40.0

# Re-run the cell with the error after this installation