In [1]:
import os
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Inference Pipeline for already fine-tuned models

In [2]:
# load libraries and model from HF
import torch
import pandas as pd
import wandb
from datasets import load_dataset
from unsloth import FastLanguageModel, is_bfloat16_supported

model_name = "xu3kev/deepseekcoder-7b-logo-pbe" #"ruthchy/02_expt_code-llama-ascii-desc"
max_seq_length = 2050


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
timestamp = pd.Timestamp.now().strftime("%Y%m%d%H%M")
# Initialize WandB (ensure you've logged in using `wandb login`)
wandb.init(project="code-llama-finetuning", 
           name=f"fine-tune-semantic-length-generalization-ascii-desc_{timestamp}",
           config={"learning_rate": 5e-5, "num_train_epochs": 3, "max_seq_length": max_seq_length, "num_epochs": 3,})

In [3]:
# Model configuration
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name,
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: NVIDIA RTX A6000. Max memory: 47.529 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1. CUDA: 8.6. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/6 [00:00<?, ?it/s]

model-00001-of-00006.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.86G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/3.03G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/126 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/4.27k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/464 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

In [None]:
# if tokenizer.pad_token is None then an error will be raised I explicitly set it to '[PAD]'
if tokenizer.pad_token == None:
    tokenizer.pad_token = tokenizer.eos_token or '[PAD]'
    if tokenizer.pad_token == '[PAD]':
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    print(f"Added padding token: {tokenizer.pad_token}")

## Load and Preprocess Train, Validation, and Test-Data

In [4]:
# load the datasets and access the splits
dataset = load_dataset("ruthchy/semantic-length-generalization-logo-data-ascii-desc")
train_dataset, val_dataset, test_dataset = dataset["train"], dataset["validation"], dataset["test"]

Using the latest cached version of the dataset since ruthchy/semantic-length-generalization-logo-data-ascii-desc couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /home/pratz/.cache/huggingface/datasets/ruthchy___semantic-length-generalization-logo-data-ascii-desc/default/0.0.0/d8c6991077ac09af8a454960a29609f5701aae69 (last modified on Thu Jan 30 00:08:13 2025).


In [5]:
# Tokenize the datasets
def preprocess_function(examples):
    return tokenizer(
        examples["Input"],  
        text_pair=examples["Program"],  
        truncation=True,
        max_length=max_seq_length,
        padding="max_length",
    )

# Apply the tokenizer to the datasets
tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True, num_proc=4)

Map (num_proc=4):   0%|          | 0/997 [00:00<?, ? examples/s]

## Prediction with pre-trained and/or fine-tuned model

In [6]:
model = FastLanguageModel.for_inference(model)

In [8]:
for i in range(5):
    print(f"Input {i+1}: {tokenized_test_dataset['Input'][i]}")
    print(f"Program {i+1}: {tokenized_test_dataset['Program'][i]}")
    print()

Input 1: <s><sys_prompt>

Your task is to draw simple black and white graphics with the custom library. DO NOT USE THE BUILT-IN TURTLE LIBRARY.

You will use a custom turtle library, similar to the built-in library, which is sufficient for all tasks.

</sys_prompt>

<custom_library_desc>

Here are all the available functions in the custom turtle library:

- forward(x): move forward x pixels

- left(theta): rotate left by theta degrees

- right(theta): rotate right by theta degrees

- penup(): stop drawing

- pendown(): start drawing

- teleport(x, y, theta): move to position (x, y) with angle theta

- heading(): get the current angle of the turtle

- isdown(): check if the pen is down

- embed(program, local vars): runs the code in program using the current context and teleports back to the original position. Allows you to nest programs. Implementationally, embed gets the turtle state (is down, x, y, heading), executes program, then returns to the original state.

</custom_library_desc

In [9]:
# Get the input data from the tokenized dataset
input_ids = tokenized_test_dataset["input_ids"]
attention_mask = tokenized_test_dataset["attention_mask"]

# Make predictions
model.eval()  # Set the model to evaluation mode
predictions = []

with torch.no_grad():  # Disable gradient computation to save memory
    for i in range(0, len(input_ids), 8):  # Batch size of 8
        batch_input_ids = torch.tensor(input_ids[i:i+8]).cuda()
        batch_attention_mask = torch.tensor(attention_mask[i:i+8]).cuda()

        outputs = model.generate(input_ids=batch_input_ids, attention_mask=batch_attention_mask, max_new_tokens=500)

        # Decode the generated predictions back to text, excluding the input tokens
        for output in outputs:
            generated_text = tokenizer.decode(output[len(batch_input_ids[0]):], skip_special_tokens=True) # so it only returns the generated program
            predictions.append(generated_text)

# Now, you have the predictions for the test set in the `predictions` list
# Let's print some of them to verify
for idx, prediction in enumerate(predictions[:5]):
    print(f"Prediction {idx+1}: {prediction}")

Prediction 1: 
Prediction 2: 

# the following program draws a small 7-gon, a medium line, a small triangle, a short line, a small circle
</# the following program draws a small 7-gon, a medium line, a small triangle, a short line, a small circle
for i in range(7):
    forward(2)
    left(51.42857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857142857
Prediction 3: 
Prediction 4: 

# the following program draws a small 7-gon, a medium line, a medium triangle, a medium line, a medium circle

# the following program draws a small 7-gon, a medium line, a medium triangle, a medium line, a medium circle

# the following program draws a sm

predictions took 90min and look shitty