In [1]:
# %%
%pip install datasets
%pip install peft
%pip install -U bitsandbytes




In [2]:
# %%
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForSeq2Seq,
    AutoModelForCausalLM,
    AutoTokenizer
)
from datasets import load_dataset, load_from_disk
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import psutil
import os
import numpy as np


In [9]:
# %%
def format_example(example):
    """Format a single example for fine-tuning."""
    return f"""Problem: {example['markdown_description']}\n\nSolution: {example['canonical_solution']}"""

def get_memory_usage():
    """Get current memory usage in MB."""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024 / 1024


In [10]:
# %%
def prepare_dataset(data):
    """Prepare the Effibench dataset for fine-tuning."""

    print("Sample record:", dataset['train'][0])  # Inspect a single record

    def format_function(examples):
        inputs = [f"Problem: {md}" for md in examples["markdown_description"]]
        outputs = [f"Solution: {cs}" for cs in examples["canonical_solution"]]
        return {"input_text": inputs, "output_text": outputs}

    # Tokenization function
    def tokenize_function(examples):
        inputs = tokenizer(
            examples["input_text"], padding="max_length", truncation=True, max_length=1024
        )
        outputs = tokenizer(
            examples["output_text"], padding="max_length", truncation=True, max_length=1024
        )
        inputs["labels"] = outputs["input_ids"]
        return inputs

    # Format the dataset
    formatted_dataset = dataset.map(
        format_function,
        remove_columns=dataset["train"].column_names,
        batched=True  # Process in batches
    )

    # Tokenize the dataset
    tokenized_dataset = formatted_dataset.map(
        tokenize_function,
        batched=True
    )

    return tokenized_dataset


In [11]:
# %%
# Initialize memory tracking
initial_memory = get_memory_usage()
memory_measurements = []


In [6]:
# %%
# Model configuration
model_name = "NousResearch/Llama-2-7b-hf"  # Using 7B parameter model as it's more consumer-friendly

# QLoRA configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
model = prepare_model_for_kbit_training(model)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
# %%
# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ2SEQ_LM"
)

# Apply LoRA
model = get_peft_model(model, lora_config)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


In [8]:
# %%
# Prepare dataset
dataset = load_dataset("DONG19/EffiBench")

In [9]:
dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
dataset = prepare_dataset(dataset)

Sample record: {'problem_idx': 2617, 'task_name': 'Minimum Number of Visited Cells in a Grid', 'description': '\n\n<p>You are given a <strong>0-indexed</strong> <code>m x n</code> integer matrix <code>grid</code>. Your initial position is at the <strong>top-left</strong> cell <code>(0, 0)</code>.</p>\n\n<p>Starting from the cell <code>(i, j)</code>, you can move to one of the following cells:</p>\n\n<ul>\n\t<li>Cells <code>(i, k)</code> with <code>j &lt; k &lt;= grid[i][j] + j</code> (rightward movement), or</li>\n\t<li>Cells <code>(k, j)</code> with <code>i &lt; k &lt;= grid[i][j] + i</code> (downward movement).</li>\n</ul>\n\n<p>Return <em>the minimum number of cells you need to visit to reach the <strong>bottom-right</strong> cell</em> <code>(m - 1, n - 1)</code>. If there is no valid path, return <code>-1</code>.</p>\n\n<p>&nbsp;</p>\n<p><strong class="example">Example 1:</strong></p>\n<img alt="" src="https://fastly.jsdelivr.net/gh/doocs/leetcode@main/solution/2600-2699/2617.Minim

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [31]:
# Save dataset to Google Drive
dataset.save_to_disk("/content/drive/MyDrive/CS 6158/CS6158 Research Project/dataset")

Saving the dataset (0/1 shards):   0%|          | 0/800 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/200 [00:00<?, ? examples/s]

In [10]:
train_dataset = dataset["train"]
print(train_dataset)

Dataset({
    features: ['input_text', 'output_text', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 800
})


In [11]:
test_dataset = dataset["test"]
print(test_dataset)

Dataset({
    features: ['input_text', 'output_text', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 200
})


In [12]:
# %%
# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=100,
    logging_steps=100,
    learning_rate=2e-4,
    fp16=True,
    save_strategy="epoch",
    # predict_with_generate=True  # Required for seq2seq tasks
)


In [13]:
# %%
# Data collator for seq2seq
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)


In [34]:
# %%
# Record pre-training memory
pre_training_memory = get_memory_usage()
memory_measurements.append(("Pre-training", pre_training_memory))


In [35]:
# %%
# Train the model
trainer.train()


  return fn(*args, **kwargs)


Step,Training Loss


TrainOutput(global_step=50, training_loss=1.0901754760742188, metrics={'train_runtime': 2387.9612, 'train_samples_per_second': 0.335, 'train_steps_per_second': 0.021, 'total_flos': 3.25176470274048e+16, 'train_loss': 1.0901754760742188, 'epoch': 1.0})

In [36]:
# %%
# Record post-training memory
post_training_memory = get_memory_usage()
memory_measurements.append(("Post-training", post_training_memory))

# Print memory usage statistics
print("\nMemory Usage Statistics:")
print(f"Initial Memory Usage: {initial_memory:.2f} MB")
for stage, memory in memory_measurements:
    print(f"{stage} Memory Usage: {memory:.2f} MB")



Memory Usage Statistics:
Initial Memory Usage: 1040.91 MB
Pre-training Memory Usage: 1541.10 MB
Post-training Memory Usage: 1869.52 MB
Pre-training Memory Usage: 2086.25 MB
Post-training Memory Usage: 1951.82 MB


In [37]:
# Save the fine-tuned model and tokenizer
model_save_path = "./fine_tuned_model"
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"Model and tokenizer saved to {model_save_path}")


Model and tokenizer saved to ./fine_tuned_model


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [39]:
# Copy model and tokenizer from saved folder to Google Drive
!cp -r "./fine_tuned_model" "/content/drive/MyDrive/CS 6158/CS6158 Research Project/fine_tuned_model"

In [5]:
def perform_inference(memory_model, memory_tokenizer, test_dataset, num_samples=5):
    """Perform inference using the model in memory."""
    # Select a few examples from the test set
    test_samples = test_dataset

    for idx, example in enumerate(test_samples):
        # input_text = f"Problem: {example['markdown_description']}"
        # Tokenize the input
        print(example)
        inputs = example["input_ids"]
        inputs = torch.tensor(inputs).unsqueeze(0)
        # inputs = memory_tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=256).to(memory_model.device)
        text = example["input_text"]
        # Generate prediction
        # memory_model.max_new_tokens = 1024
        outputs = memory_model.generate(inputs, max_length=2048, num_beams=5, early_stopping=True, max_new_tokens=2048)
        prediction = memory_tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Display result
        print(f"Test Sample {idx + 1}:")
        print(f"Input: {text}")
        print(f"Generated Solution: {prediction}")
        print("-" * 80)

# Perform inference using the model in memory
perform_inference(model, tokenizer, dataset)

NameError: name 'model' is not defined

In [53]:
model = None

In [3]:
test_dataset = load_from_disk("/content/drive/MyDrive/CS 6158/CS6158 Research Project/dataset/test")

In [5]:
model_path = "/content/drive/MyDrive/CS 6158/CS6158 Research Project/fine_tuned_model"

In [6]:
saved_model = AutoModelForCausalLM.from_pretrained(model_path)
saved_tokenizer = AutoTokenizer.from_pretrained(model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:

def perform_inference_from_saved_model(test_dataset, num_samples=5):
    """Perform inference using the saved model."""
    # Select a few examples from the test set
    test_samples = test_dataset.select(range(num_samples))

    for idx, example in enumerate(test_samples):
        # print(example)
        input_text = example["input_text"]
        # Tokenize the input
        # inputs = saved_tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=256).to(saved_model.device)
        inputs = example["input_ids"]
        inputs = torch.tensor(inputs).unsqueeze(0)
        # Generate prediction
        outputs = saved_model.generate(inputs, max_length=2048, num_beams=5, early_stopping=True, max_new_tokens=2048)
        prediction = saved_tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Display result
        print(f"Test Sample {idx + 1}:")
        print(f"Input: {input_text}")
        print(f"Generated Solution: {prediction}")
        print("-" * 80)



In [None]:
# Perform inference using the saved model
perform_inference_from_saved_model(test_dataset)


Both `max_new_tokens` (=2048) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=2048) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Test Sample 1:
Input: Problem: 
You are given a **0-indexed** integer array `nums`, and an integer `k`.


The **K-or** of `nums` is a non-negative integer that satisfies the following:


* The `ith` bit is set in the K-or **if and only if** there are at least `k` elements of nums in which bit `i` is set.


Return *the  **K-or** of* `nums`.


**Note** that a bit `i` is set in `x` if `(2i AND x) == 2i`, where `AND` is the bitwise `AND` operator.


 


**Example 1:**



```

**Input:** nums = [7,12,9,8,9,15], k = 4
**Output:** 9
**Explanation:** Bit 0 is set at nums[0], nums[2], nums[4], and nums[5].
Bit 1 is set at nums[0], and nums[5].
Bit 2 is set at nums[0], nums[1], and nums[5].
Bit 3 is set at nums[1], nums[2], nums[3], nums[4], and nums[5].
Only bits 0 and 3 are set in at least k elements of the array, and bits i >= 4 are not set in any of the array's elements. Hence, the answer is 2^0 + 2^3 = 9.

```

**Example 2:**



```

**Input:** nums = [2,12,1,11,4,5], k = 6
**Output:** 0
**