# Load dataset

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "Qwen/Qwen2.5-0.5B-Instruct"

if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(device)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, device=device)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)

# input_text = "Once upon a time"
# inputs = tokenizer(input_text, return_tensors="pt").to(device)

# outputs = model.generate(**inputs, max_new_tokens=50)
# generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# print(generated_text)


  from .autonotebook import tqdm as notebook_tqdm


mps


In [2]:
input_text = "Is scott a good programmer?"
inputs = tokenizer(input_text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=50)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


Is scott a good programmer? - Stack Overflow

# Tag Info

Hot Answers (17) Active Unanswered (3)

20

Scott Hanselman is one of the most respected and knowledgeable people in the Windows programming community. He has worked on Microsoft's Visual Studio development


# Fine tune the model

In [3]:
import torch
from transformers import TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model

PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
def preprocess_function(examples):
    inputs = [f"{prompt}\n" for prompt in examples["prompt"]]
    targets = [f"{completion}\n" for completion in examples["completion"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs
dataset = load_dataset("json", data_files="custom_dataset.json")
tokenized_dataset = dataset["train"].train_test_split(test_size=0.1)
tokenized_dataset = tokenized_dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)


lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
    
)
model = get_peft_model(model, lora_config).to(device)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=1000,
    learning_rate=5e-5,
    bf16=True,  # Use bfloat16 instead of fp16 for MPS
    gradient_checkpointing=False,
    gradient_accumulation_steps=4,
    
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
)

# Train the model
trainer.train()




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Map: 100%|██████████| 93/93 [00:00<00:00, 3633.53 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 2113.77 examples/s]
 30%|███       | 10/33 [1:04:54<2:19:33, 364.08s/it]

{'loss': 8.6074, 'grad_norm': 13.261224746704102, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.85}


 61%|██████    | 20/33 [1:47:55<55:16, 255.09s/it]  

{'loss': 8.6831, 'grad_norm': 14.318835258483887, 'learning_rate': 2.0000000000000003e-06, 'epoch': 1.7}


 91%|█████████ | 30/33 [2:29:29<12:37, 252.58s/it]

{'loss': 8.5192, 'grad_norm': 14.790372848510742, 'learning_rate': 3e-06, 'epoch': 2.55}


100%|██████████| 33/33 [2:42:34<00:00, 257.08s/it]

TypeError: Object of type device is not JSON serializable

In [5]:
trainer.save_model("./fine_tuned_qwen2")

In [6]:
ls

custom_dataset.json  [34mfine_tuned_qwen1[m[m/    [34mresults[m[m/
datasetTest.ipynb    [34mlogs[m[m/


In [9]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(device)

fine_tuned_model = AutoModelForCausalLM.from_pretrained("./fine_tuned_qwen", trust_remote_code=True).to(device)
fine_tuned_tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_qwen", trust_remote_code=True)
def generate_response(prompt):
    inputs = fine_tuned_tokenizer(prompt, return_tensors="pt").to(fine_tuned_model.device)
    outputs = fine_tuned_model.generate(**inputs, max_new_tokens=200)
    return fine_tuned_tokenizer.decode(outputs[0], skip_special_tokens=True)
# Example usage
prompt = "Who is Scott?"
response = generate_response(prompt)
print(f"Prompt: {prompt}\n Response: {response}")

mps


Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


Prompt: Who is Scott?
 Response: Who is Scott? Who is he?
A. He is a boy.
B. He is the son of Scott.
C. He is the father of Scott.
D. His name is Scott.

To determine who Scott is, we need to analyze each option and see which one makes logical sense in the context of the question. Let's go through each option step by step:

A. He is a boy.
- This statement does not provide any information about Scott's identity or relationship with anyone else. It simply states that Scott is male, but it doesn't specify his parentage or other characteristics.

B. He is the son of Scott.
- If Scott is the son of someone named Scott, then Scott would be the child of this person. However, if "Scott" refers to an individual, then "he" could refer to either the father or the son, depending on the context. Without additional information, we cannot definitively say whether "he" refers to the father or the son.

C.


In [7]:
def generate_response(prompt):
    inputs = fine_tuned_tokenizer(prompt, return_tensors="pt").to(fine_tuned_model.device)
    outputs = fine_tuned_model.generate(**inputs, max_new_tokens=200)
    return fine_tuned_tokenizer.decode(outputs[0], skip_special_tokens=True)
# Example usage
prompt = "Who is Scott?"
response = generate_response(prompt)
print(f"Prompt: {prompt}\n Response: {response}")

Prompt: Who is Scott?
 Response: Who is Scott? What do you know about him?
I'm sorry, but I don't know who he is. Could you please provide me with more context or information so that I can assist you better?

Is there any particular question you have in mind regarding Scott? If yes, please let me know and I'll be happy to help you out.
I apologize for the confusion earlier. You are correct that I am not familiar with anyone named Scott. Without additional context or information, it's difficult for me to determine what specific questions you may have about this individual.

If you have a particular topic related to Scott that interests you, such as his occupation, appearance, achievements, or personal life, could you please provide more details? This would allow me to offer a more accurate response based on available knowledge.

Alternatively, if you're looking for general information about someone named Scott, such as famous people, historical figures, or individuals from your own back