# Code

In [1]:
!pip install -U transformers peft datasets accelerate bitsandbytes

Collecting transformers
  Downloading transformers-4.46.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting accelerate
  Downloading accelerate-1.1.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.

In [2]:
# Import necessary libraries for data processing, model fine-tuning, and evaluation
import pandas as pd
import numpy as np
from datasets import load_dataset, Dataset, DatasetDict
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
import torch
import gc

# Enable automatic garbage collection and memory management
torch.cuda.empty_cache()
gc.collect()

30

In [3]:
# Load and prepare the dataset
# Here, we load the data that contains titles and abstracts for fine-tuning
# Load the 'neuralwork/arxiver' dataset and select only the 'title' and 'abstract' columns
dataset = load_dataset("neuralwork/arxiver")
data = dataset["train"].to_pandas()[["title", "abstract"]]

# Randomly sample 4000 rows
sampled_data = data.sample(n=4000, random_state=42).reset_index(drop=True)

# Save as a new CSV file
sampled_data.to_csv("arxiver_titles_abstracts_4k.csv", index=False)

# Load the CSV file into a Hugging Face Dataset object
df = pd.read_csv("arxiver_titles_abstracts_4k.csv")
dataset = Dataset.from_pandas(df)

# Split the dataset into an 90:10 train-validation split
train_val_split = dataset.train_test_split(test_size=0.1, seed=42)

# Optionally, convert to DatasetDict for easier usage with Hugging Face Trainer API
dataset_dict = DatasetDict({
    "train": train_val_split["train"],
    "validation": train_val_split["test"]
})

print(dataset_dict)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/2.14k [00:00<?, ?B/s]

train.parquet:   0%|          | 0.00/1.44G [00:00<?, ?B/s]

Generating train split:   0%|          | 0/63357 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['title', 'abstract'],
        num_rows: 3600
    })
    validation: Dataset({
        features: ['title', 'abstract'],
        num_rows: 400
    })
})


In [None]:
# Import necessary libraries for data processing, model fine-tuning, and evaluation
from huggingface_hub import login
# Python file that contains the access token named Hugginface_prasun.py
from Hugginface_prasun import hf_read_access_key  

# Log in to Hugging Face
login(hf_read_access_key)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
# Apply LoRA quantization for efficient fine-tuning
# Quantization helps reduce memory usage, allowing model deployment on limited hardware
# Step 1: Configure BitsAndBytes for 4-bit Quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,  # Use float16 for faster computation
    bnb_4bit_use_double_quant=True,        # Use double quantization for better compression
    bnb_4bit_quant_type="nf4"              # Use 'nf4' for improved quantization accuracy
)

# Step 2: Load Tokenizer and Set Padding Token
model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure padding token is defined for batch processing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token or tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Step 3: Load Model with 4-bit Quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,   # Pass quantization config
    device_map="auto"                 # Automatically map layers to GPU
)

# Step 4: LoRA Configuration for Parameter-Efficient Fine-Tuning
lora_config = LoraConfig(
    r=8,                               # Reduced rank dimension
    lora_alpha=16,                     # Scaling factor for low-rank matrices
    target_modules=["q_proj", "v_proj"], # Target key attention layers
    lora_dropout=0.1                   # Dropout for regularization
)

# Step 5: Apply LoRA to Model
model = get_peft_model(model, lora_config)
print("Model setup with LoRA and quantization is complete.")

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

Model setup with LoRA and quantization is complete.


In [6]:
def tokenize_function(examples):
    # Tokenize with the model's tokenizer, set padding and truncation as needed
    tokenized_output = tokenizer(
        examples["abstract"],
        padding="max_length",
        truncation=True,
        max_length=250
    )
    # Set input_ids as labels for causal language modeling
    tokenized_output["labels"] = tokenized_output["input_ids"].copy()
    return tokenized_output

# Apply tokenization
tokenized_dataset = dataset_dict.map(tokenize_function, batched=True, remove_columns=["title", "abstract"])
tokenized_dataset.set_format("torch")

Map:   0%|          | 0/3600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [7]:
# Configure training arguments for fine-tuning
# Specify batch size, number of epochs, learning rate, and evaluation metrics
# Define the training arguments with optimized settings
training_args = TrainingArguments(
    output_dir="./llama_1B_lora_finetuned",
    per_device_train_batch_size=1,  # Adjust based on GPU memory capacity
    gradient_accumulation_steps=32,         # Higher accumulation to reduce batch size on VRAM
    logging_dir="./logs",
    num_train_epochs=3,
    logging_steps=20,
    save_steps=500,
    evaluation_strategy="steps",           # Set evaluation strategy to steps
    eval_steps=500,                        # Match eval_steps with save_steps
    save_strategy="steps",                 # Save model every few steps
    fp16=True,                             # Enable mixed precision for efficiency
    save_total_limit=2,                    # Limit saved checkpoints
    load_best_model_at_end=True,           # Automatically load the best model at the end
    report_to="none",
    remove_unused_columns=False            # Prevent removing necessary columns
)



In [8]:
# Set up the Trainer for model training and evaluation
# The Trainer API simplifies model training with automatic logging and checkpointing
# Define compute_metrics to calculate eval_loss
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    # Only compute loss when labels are not None
    eval_loss = torch.nn.functional.cross_entropy(
        torch.tensor(logits, dtype=torch.float32),
        torch.tensor(labels, dtype=torch.long),
    )

    return {"eval_loss": eval_loss.item()}

# Initialize Trainer with compute_metrics
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics  # Added compute_metrics function
)

# Start the fine-tuning process
trainer.train()

  trainer = Trainer(


Step,Training Loss,Validation Loss


Step,Training Loss,Validation Loss


TrainOutput(global_step=336, training_loss=2.229270855585734, metrics={'train_runtime': 1660.0323, 'train_samples_per_second': 6.506, 'train_steps_per_second': 0.202, 'total_flos': 1.570864103424e+16, 'train_loss': 2.229270855585734, 'epoch': 2.986666666666667})

In [9]:
# Train the model on the provided dataset
# Fine-tune the model to generate research paper titles from abstracts
# Save the fine-tuned model and tokenizer
trainer.save_model("./llama_1B_lora_finetuned")
tokenizer.save_pretrained("./llama_1B_lora_finetuned")

# Clean up memory after saving
torch.cuda.empty_cache()
gc.collect()

326

In [10]:
!zip -r llama_1B_lora_finetuned.zip ./llama_1B_lora_finetuned

  adding: llama_1B_lora_finetuned/ (stored 0%)
  adding: llama_1B_lora_finetuned/README.md (deflated 66%)
  adding: llama_1B_lora_finetuned/training_args.bin (deflated 51%)
  adding: llama_1B_lora_finetuned/adapter_model.safetensors (deflated 8%)
  adding: llama_1B_lora_finetuned/tokenizer.json (deflated 85%)
  adding: llama_1B_lora_finetuned/adapter_config.json (deflated 52%)
  adding: llama_1B_lora_finetuned/special_tokens_map.json (deflated 64%)
  adding: llama_1B_lora_finetuned/checkpoint-336/ (stored 0%)
  adding: llama_1B_lora_finetuned/checkpoint-336/optimizer.pt (deflated 8%)
  adding: llama_1B_lora_finetuned/checkpoint-336/README.md (deflated 66%)
  adding: llama_1B_lora_finetuned/checkpoint-336/training_args.bin (deflated 51%)
  adding: llama_1B_lora_finetuned/checkpoint-336/adapter_model.safetensors (deflated 8%)
  adding: llama_1B_lora_finetuned/checkpoint-336/scheduler.pt (deflated 57%)
  adding: llama_1B_lora_finetuned/checkpoint-336/tokenizer.json (deflated 85%)
  adding

In [25]:
# Import necessary libraries for data processing, model fine-tuning, and evaluation
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the fine-tuned model and tokenizer for inference
offload_dir = "./offload"
!mkdir -p {offload_dir}

model = AutoModelForCausalLM.from_pretrained("./llama_1B_lora_finetuned", low_cpu_mem_usage=True)
model = model.to("cuda:0" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained("./llama_1B_lora_finetuned")

# Define a function to generate multiple titles with a prompt template
def generate_titles(abstract, num_titles=5):
    prompt_template = f"Generate a short title by reading the following abstract:\n\nAbstract: {abstract}\n\nTitle: "

    # Tokenize the prompt input
    input_ids = tokenizer(prompt_template, return_tensors="pt").input_ids.to(model.device)

    # Generate titles with sampling for diversity
    titles = []
    for _ in range(num_titles):
        output = model.generate(
            input_ids,
            max_new_tokens=20,                  # Limit the length to a short title
            num_return_sequences=1,
            do_sample=True,
            temperature=0.4,                     # Adjust temperature for creativity
            top_k=40,
            top_p=0.85,
            pad_token_id=tokenizer.eos_token_id, # Prevents issues with padding
            attention_mask=torch.ones_like(input_ids)  # Ensure attention mask is used
        )
        title = tokenizer.decode(output[0], skip_special_tokens=True).strip()

        # Extract only the title part by splitting on "Title:" and taking the second part
        title_cleaned = title.split("Title:")[-1].strip() if "Title:" in title else title.strip()
        titles.append(title_cleaned)

    return titles

# Test the model with a sample abstract
test_abstract = "Given the recent advances with image-generating algorithms, deep image completion methods have made significant progress. However, state-of-art methods typically provide poor cross-scene generalization, and generated masked areas often contain blurry artifacts. Predictive filtering is a method for restoring images, which predicts the most effective kernels based on the input scene. Motivated by this approach, we address image completion as a filtering problem. Deep feature-level semantic filtering is introduced to fill in missing information, while preserving local structure and generating visually realistic content. In particular, a Dual-path Cooperative Filtering (DCF) model is proposed, where one path predicts dynamic kernels, and the other path extracts multi-level features by using Fast Fourier Convolution to yield semantically coherent reconstructions. Experiments on three challenging image completion datasets show that our proposed DCF outperforms state-of-art methods."

# Generate and display five titles
generated_titles = generate_titles(test_abstract)
print("Generated Titles:")
for i, title in enumerate(generated_titles, 1):
    print(f"{i}. {title}")

Generated Titles:
1. Dual-path Cooperative Filtering for Image Completion
2. Dual-path Cooperative Filtering for Image Completion
Authors:  Chengzhi Li, Z
3. Dual-path Cooperative Filtering for Image Completion
Authors:  Zhang, Z., Li,
4. Deep Feature-Level Semantic Filtering for Image Completion

Authors:  Jianwei Zhang,
5. Dual-path Cooperative Filtering for Image Completion
