In [2]:
from transformers import AutoModelForCausalLM,BitsAndBytesConfig, AutoTokenizer,TrainingArguments, Trainer
from datasets import load_dataset,Dataset
from peft import LoraConfig, get_peft_model, TaskType,PeftModel
import torch
import json
import requests
import bitsandbytes


from huggingface_hub import login
login(new_session=False)
login("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")

In [3]:
from accelerate import Accelerator
accelerator = Accelerator()

# Load base model and tokenizer

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [5]:
torch.cuda.empty_cache()

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",             
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

In [7]:
base_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
                                           quantization_config=bnb_config,
                                           device_map="cuda"
                                           )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
tokenizer.pad_token = tokenizer.eos_token

# Apply loRA via PEFT

In [14]:
lora_config = LoraConfig(
    r=4,
    lora_alpha=8,
    task_type=TaskType.CAUSAL_LM,
    target_modules=["self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj"],
    lora_dropout=0.05,
    bias="none"
)

peft_model = get_peft_model(base_model, lora_config)
peft_model.print_trainable_parameters()
print(peft_model.modules_to_save)

trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713
None


# Dataset

In [9]:
url = "https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json"
data = requests.get(url).json()

# Convert to Hugging Face Dataset
dataset = Dataset.from_list(data)
dataset = dataset.select(range(10000))

In [10]:
print(dataset[5])

{'instruction': 'Identify the odd one out.', 'input': 'Twitter, Instagram, Telegram', 'output': 'Telegram'}


In [11]:
def format_alpaca(example):
    if example["input"]: #if exist input
        prompt = f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n{example['output']}"
    else:
        prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    return {"text": prompt}

#alpaca style format, {"text":...}
formatted_dataset = dataset.map(format_alpaca)

tokenized_dataset = formatted_dataset.map(
    lambda x: tokenizer(x["text"], padding="max_length", truncation=True, max_length=256),
    batched=True
)#tokenize

def add_labels(example):
    example["labels"] = example["input_ids"]
    return example

tokenized_dataset = tokenized_dataset.map(add_labels)

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [12]:
print(tokenized_dataset)

Dataset({
    features: ['instruction', 'input', 'output', 'text', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 10000
})


# Configure Training

In [16]:
dataset_split = tokenized_dataset.train_test_split(test_size=0.1, seed=42)

args = TrainingArguments(
    output_dir="./mistral-lora-checkpoints",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=2e-5,
    num_train_epochs=1,
    logging_steps=50,
    save_strategy="no",
    report_to="none"
)

trainer = Trainer(
    model=peft_model,
    args=args,
    train_dataset=dataset_split['train'],
    eval_dataset=dataset_split['test'],
    tokenizer=tokenizer
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [17]:
torch.cuda.empty_cache()

In [18]:
trainer.train()

Step,Training Loss
50,4.8919
100,0.7715
150,0.5614
200,0.5357
250,0.5244
300,0.469
350,0.484
400,0.5035
450,0.4878
500,0.5175


TrainOutput(global_step=1125, training_loss=0.704879406399197, metrics={'train_runtime': 3371.1078, 'train_samples_per_second': 2.67, 'train_steps_per_second': 0.334, 'total_flos': 3.8998072885248e+16, 'train_loss': 0.704879406399197, 'epoch': 1.0})

In [None]:
peft_model.save_pretrained("./llama-instruct-lora-quantized")
tokenizer.save_pretrained("./llama-instruct-lora-quantized")

#convert file from safetensor to bin
from safetensors.torch import load_file
state_dict = load_file("llama-instruct-lora-quantized/adapter_model.safetensors")
torch.save(state_dict, "llama-instruct-lora-quantized/adapter_model.bin")


Invalid credentials in Authorization header - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-3B-Instruct.


('./llama-instruct-lora/tokenizer_config.json',
 './llama-instruct-lora/special_tokens_map.json',
 './llama-instruct-lora/chat_template.jinja',
 './llama-instruct-lora/tokenizer.json')

In [8]:
fine_tuned_model = PeftModel.from_pretrained(base_model,"llama-instruct-lora-quantized",device_map ="cuda")

In [None]:
merged_model=fine_tuned_model.merge_and_unload()



In [None]:
save_path = "llama3b-merged-quantized"
merged_model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

('llama3b-merged-quantized\\tokenizer_config.json',
 'llama3b-merged-quantized\\special_tokens_map.json',
 'llama3b-merged-quantized\\chat_template.jinja',
 'llama3b-merged-quantized\\tokenizer.json')

In [8]:
finetuned_llama_model = AutoModelForCausalLM.from_pretrained("./llama-merged-quantized", device_map="cuda",local_files_only=True)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("llama-merged-quantized",local_files_only=True)
tokenizer.pad_token = tokenizer.eos_token

In [11]:
user_instruction = "How can someone cough?"
user_input = ""

prompt = f"""### Instruction:
{user_instruction}

### Input:
{user_input}

### Response:"""

In [15]:
tokens = tokenizer(prompt, return_tensors="pt").to(finetuned_llama_model.device)

with torch.no_grad():
    finetuned_llama_model.eval()
    output_ids = finetuned_llama_model.generate(**tokens, max_new_tokens=200)

decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
response = decoded_output.split("### Response:")[-1].strip()

print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


To cough, you need to let out a sudden, forceful expulsion of air from your lungs. Here's how to do it:

1.  Start by taking a deep breath into your lungs.
2.  Next, try to swallow a small amount of air. This will help you build up pressure in your lungs.
3.  Now, contract your diaphragm and your abdominal muscles to force the air out of your lungs.
4.  As you exhale, try to make a sudden, sharp sound with your voice. This is the cough.
5.  Repeat the process a few times to get the hang of it.

Note: If you have any underlying medical conditions or concerns, it's always best to consult with a healthcare professional for personalized advice.
