In [1]:
import yaml
import os
import torch
import pandas as pd
import wandb
from datasets import load_dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from transformers import TextStreamer


def load_config(yaml_file):
    with open(yaml_file, "r") as file:
        config = yaml.safe_load(file)
    return config

config = load_config("config.yaml")

cuda_devices = config["cuda"]["devices"]
# data
data_dir = config["data"]
# model
model_name = config["model"]["name_pc"]
#lora
rank = int(config["lora"]["rank"])
alpha = int(config["lora"]["alpha"])

# training
max_seq_length = config["training"]["max_seq_length"]
learning_rate = float(config["training"]["learning_rate"])
warmup_steps = int(config["training"]["warmup_steps"])
lr_scheduler_type = str(config["training"]["lr_scheduler_type"])
train_epochs = config["training"]["train_epochs"]
per_device_batch_size = config["training"]["per_device_batch_size"]
gradient_accumulation_steps = config["training"]["gradient_accumulation_steps"]
save_steps = config["training"]["save_steps"]
eval_steps = config["training"]["eval_steps"]
logging_steps = config["training"]["logging_steps"]
random_seed = config["training"]["random_seed"]

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
### set the cuda device(s)
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_devices
print("cuda devices:", cuda_devices)

cuda devices: 1


# Inference Pipeline for already fine-tuned models

In [3]:
timestamp = pd.Timestamp.now().strftime("%Y%m%d%H%M")
# Initialize WandB (ensure you've logged in using `wandb login`)
wandb.init(project="code-llama-finetuning", 
           name=f"inference-{model_name.split('/')[-1]}-{data_dir.split('/')[-1]}_{timestamp}")

[34m[1mwandb[0m: Currently logged in as: [33mpriscillachyrva[0m ([33mpriscillachyrva-university-mannheim[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [3]:
# Model configuration
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name,
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: NVIDIA RTX A6000. Max memory: 47.529 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1. CUDA: 8.6. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

codellama/CodeLlama-7b-Instruct-hf does not have a padding token! Will use pad_token = <unk>.


Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


## Load and Preprocess Train, Validation, and Test-Data

In [7]:
from _1_prompt_temp_v1 import conversational_format_PBE_zeroshot_inference
from unsloth.chat_templates import apply_chat_template, get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    map_eos_token = True, # Maps <|im_end|> to </s> instead
)
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

In [8]:
# load the datasets and access the splits
test_dataset = load_dataset(data_dir, split="test")
test_dataset = test_dataset.map(conversational_format_PBE_zeroshot_inference)
#test_dataset = test_dataset.map(formatting_prompts_func, batched=True)
print(test_dataset["conversations"][0])
#print(test_dataset["text"][0])

[{'content': 'Here is a gray scale image represented with integer values 0-9.\n00000000000000000000000000000000000\n00000000000000000000000000000000000\n00000000000000000000000000000000000\n00000000000000000000000000000000000\n00000000000000000000000000133320000\n00000000000000000000000002200003000\n00000000000000000000000021000000300\n00000000000000000000000030000000020\n00000000000000000000000030000000030\n00000000000000000000000030000000030\n00000000000000000000000030000000030\n00000000000000000221000030000000300\n00000000000000003000221003000002200\n00000000000000220000002000433331000\n00000000000000200000001435331000000\n00000021001223420000003311002200000\n00002301322001102000021230000300000\n00220000033002001200030210000030000\n00200000210202000120030300000030000\n00200000300200300020033000000030000\n01100000200200030020042000000020000\n02000000200200005664652000000300000\n02100000100200130213143200003100000\n00300000021100300410003133330000000\n000300000342021230000030000000000

In [None]:
test_dataset = load_dataset(data_dir, split="test")

In [None]:
alpaca_prompt = """[INST]### Instruction:
{}
[/INST]
### Python Program:
{}"""
def formatting_prompts_func(examples):
    instructions = f"Here is a gray scale image represented with integer values 0-9.\n{examples['ASCII-Art']}\nPlease, write a Python program that generates this image using our own custom turtle module.\n"
    programs = examples["programs"]
    texts =[]
    for instruction, programs in zip(instructions, programs):
        text = alpaca_prompt.format(instruction, programs)
        texts.append(text)
    return { "text" : texts, }
pass


## Prediction with pre-trained and/or fine-tuned model

In [9]:
model = FastLanguageModel.for_inference(model)

In [8]:
#Alternative: https://docs.unsloth.ai/basics/tutorial-how-to-finetune-llama-3-and-use-in-ollama#id-10.-train-the-model
FastLanguageModel.for_inference(model) 

def generate_response_streaming(conversation):
    inputs = tokenizer(conversation, return_tensors="pt", truncation=True, padding=True)
    
    # Initialize the text streamer
    streamer = TextStreamer(tokenizer)
    
    # Generate the response and stream the output
    model.generate(**inputs, streamer=streamer)
    
    # The streaming will automatically handle the output tokens
    response = "".join([streamer.decode() for token in streamer.stream()])
    return response

for conversation in test_dataset["conversations"][:2]:
    conversation_text = "".join([f"{entry['role']}: {entry['content']}\n" for entry in conversation])
    response = generate_response_streaming(conversation_text)
    print(f"Response for conversation: {response}")

<｜begin▁of▁sentence｜>user: Here is a gray scale image represented with integer values 0-9.
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000133320000
00000000000000000000000002200003000
00000000000000000000000021000000300
00000000000000000000000030000000020
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000221000030000000300
00000000000000003000221003000002200
00000000000000220000002000433331000
00000000000000200000001435331000000
00000021001223420000003311002200000
00002301322001102000021230000300000
00220000033002001200030210000030000
00200000210202000120030300000030000
00200000300200300020033000000030000
01100000200200030020042000000020000
02000000200200005664652000000300000
02100000100200130213143200003100000
00300000021100300410003133330000000
00030000034202123000003000000000000
000032222

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)

In [None]:
#Alternative: https://docs.unsloth.ai/basics/inference
FastLanguageModel.for_inference(model) 
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 64)


In [3]:
from transformers import pipeline

# Load the pre-trained model and tokenizer (fine-tuned for code generation)
model_name = "ruthchy/fine-tune-CodeLlama-7b-Instruct-sem-len-generalization-logo-ascii-35-completions-only-20250206" 
#model_name = "xu3kev/deepseekcoder-7b-logo-pbe"
generator = pipeline("text-generation", model=model_name, device=torch.cuda.current_device())  # Use GPU if available

# Input prompt to generate a Python program
prompt = """<s>[INST]### Instruction: Here is a gray scale image represented with integer values 0-9.
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000133320000
00000000000000000000000002200003000
00000000000000000000000021000000300
00000000000000000000000030000000020
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000221000030000000300
00000000000000003000221003000002200
00000000000000220000002000433331000
00000000000000200000001435331000000
00000021001223420000003311002200000
00002301322001102000021230000300000
00220000033002001200030210000030000
00200000210202000120030300000030000
00200000300200300020033000000030000
01100000200200030020042000000020000
02000000200200005664652000000300000
02100000100200130213143200003100000
00300000021100300410003133330000000
00030000034202123000003000000000000
00003222252356300000001100000000000
00000000000003000000001100000000000
00000000000001200000003000000000000
00000000000000300000012000000000000
00000000000000031000230000000000000
00000000000000002333100000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
Please, write a Python program that generates this image using our own custom turtle module.
[/INST]</s>"""

# Generate the Python code
generated_code = generator(prompt, max_length=max_seq_length, num_return_sequences=1)

# Extract and print the generated Python program
python_program = generated_code[0]['generated_text']
print(python_program)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

<s>[INST]### Instruction: Here is a gray scale image represented with integer values 0-9.
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000133320000
00000000000000000000000002200003000
00000000000000000000000021000000300
00000000000000000000000030000000020
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000221000030000000300
00000000000000003000221003000002200
00000000000000220000002000433331000
00000000000000200000001435331000000
00000021001223420000003311002200000
00002301322001102000021230000300000
00220000033002001200030210000030000
00200000210202000120030300000030000
00200000300200300020033000000030000
01100000200200030020042000000020000
02000000200200005664652000000300000
02100000100200130213143200003100000
00300000021100300410003133330000000
00030000034202123000003000000000000
0000322225

In [4]:
from transformers import pipeline

# Load the pre-trained model and tokenizer (fine-tuned for code generation)
#model_name = "ruthchy/fine-tune-CodeLlama-7b-Instruct-sem-len-generalization-logo-ascii-35-completions-only-20250206" 
#model_name = "xu3kev/deepseekcoder-7b-logo-pbe"
#generator = pipeline("text-generation", model=model_name, device=torch.cuda.current_device())  # Use GPU if available

# Input prompt to generate a Python program
prompt = """<s>[INST]Your task is to draw simple black and white graphics with the custom library. DO NOT USE THE BUILT-IN TURTLE LIBRARY.
You will use a custom turtle library, similar to the built-in library, which is sufficient for all tasks.

Here are all the available functions in the custom turtle library:
- forward(x): move forward x pixels
- left(theta): rotate left by theta degrees
- right(theta): rotate right by theta degrees
- penup(): stop drawing
- pendown(): start drawing
- teleport(x, y, theta): move to position (x, y) with angle theta
- heading(): get the current angle of the turtle
- isdown(): check if the pen is down
- embed(program, local vars): runs the code in program using the current context and teleports back to the original position. Allows you to nest programs. Implementationally, embed gets the turtle state (is down, x, y, heading), executes program, then returns to the original state.
### Instruction: Here is a gray scale image represented with integer values 0-9.
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000133320000
00000000000000000000000002200003000
00000000000000000000000021000000300
00000000000000000000000030000000020
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000221000030000000300
00000000000000003000221003000002200
00000000000000220000002000433331000
00000000000000200000001435331000000
00000021001223420000003311002200000
00002301322001102000021230000300000
00220000033002001200030210000030000
00200000210202000120030300000030000
00200000300200300020033000000030000
01100000200200030020042000000020000
02000000200200005664652000000300000
02100000100200130213143200003100000
00300000021100300410003133330000000
00030000034202123000003000000000000
00003222252356300000001100000000000
00000000000003000000001100000000000
00000000000001200000003000000000000
00000000000000300000012000000000000
00000000000000031000230000000000000
00000000000000002333100000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
Please, write a Python program that generates this image using our own custom turtle module.
[/INST]</s>"""

# Generate the Python code
generated_code = generator(prompt, max_length=max_seq_length, num_return_sequences=1)

# Extract and print the generated Python program
python_program = generated_code[0]['generated_text']
print(python_program)


<s>[INST]Your task is to draw simple black and white graphics with the custom library. DO NOT USE THE BUILT-IN TURTLE LIBRARY.
You will use a custom turtle library, similar to the built-in library, which is sufficient for all tasks.

Here are all the available functions in the custom turtle library:
- forward(x): move forward x pixels
- left(theta): rotate left by theta degrees
- right(theta): rotate right by theta degrees
- penup(): stop drawing
- pendown(): start drawing
- teleport(x, y, theta): move to position (x, y) with angle theta
- heading(): get the current angle of the turtle
- isdown(): check if the pen is down
- embed(program, local vars): runs the code in program using the current context and teleports back to the original position. Allows you to nest programs. Implementationally, embed gets the turtle state (is down, x, y, heading), executes program, then returns to the original state.
### Instruction: Here is a gray scale image represented with integer values 0-9.
000000

In [3]:
from transformers import pipeline

# Load the pre-trained model and tokenizer (fine-tuned for code generation)
#model_name = "ruthchy/fine-tune-CodeLlama-7b-Instruct-sem-len-generalization-logo-ascii-35-completions-only-20250206" 
model_name = "xu3kev/deepseekcoder-7b-logo-pbe"
generator = pipeline("text-generation", model=model_name, device=0)  # Use GPU if available

# Input prompt to generate a Python program
prompt = """<｜begin▁of▁sentence｜>user: Here is a gray scale image represented with integer values 0-9.
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000133320000
00000000000000000000000002200003000
00000000000000000000000021000000300
00000000000000000000000030000000020
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000221000030000000300
00000000000000003000221003000002200
00000000000000220000002000433331000
00000000000000200000001435331000000
00000021001223420000003311002200000
00002301322001102000021230000300000
00220000033002001200030210000030000
00200000210202000120030300000030000
00200000300200300020033000000030000
01100000200200030020042000000020000
02000000200200005664652000000300000
02100000100200130213143200003100000
00300000021100300410003133330000000
00030000034202123000003000000000000
00003222252356300000001100000000000
00000000000003000000001100000000000
00000000000001200000003000000000000
00000000000000300000012000000000000
00000000000000031000230000000000000
00000000000000002333100000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
Please, write a Python program that generates this image using our own custom turtle module."""

# Generate the Python code
generated_code = generator(prompt, max_length=max_seq_length, num_return_sequences=1)

# Extract and print the generated Python program
python_program = generated_code[0]['generated_text']
print(python_program)


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

<｜begin▁of▁sentence｜>user: Here is a gray scale image represented with integer values 0-9.
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000000000000
00000000000000000000000000133320000
00000000000000000000000002200003000
00000000000000000000000021000000300
00000000000000000000000030000000020
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000000000030000000030
00000000000000000221000030000000300
00000000000000003000221003000002200
00000000000000220000002000433331000
00000000000000200000001435331000000
00000021001223420000003311002200000
00002301322001102000021230000300000
00220000033002001200030210000030000
00200000210202000120030300000030000
00200000300200300020033000000030000
01100000200200030020042000000020000
02000000200200005664652000000300000
02100000100200130213143200003100000
00300000021100300410003133330000000
00030000034202123000003000000000000
000032222