In [1]:
import torch
import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel

In [2]:
tokenizer = AutoTokenizer.from_pretrained(
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    use_auth_token = "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"
)

print("finish")



In [3]:
base_model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mixtral-8x7B-Instruct-v0.1", 
    use_auth_token = "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",
    load_in_4bit=True, 
    torch_dtype=torch.float16, 
    device_map="auto"
)

print("finish")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [4]:
messages = [
    {"role": "user", "content": "Translate the given text to Shakespearean style. I want to invite you on a second date"},
    # {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    # {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

In [6]:
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")


In [7]:
outputs = base_model.generate(inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[INST] Translate the given text to Shakespearean style. I want to invite you on a second date [/INST] Noble friend, I do entreat thee,
To grant me yet another's date


In [8]:
# Prepare model for k-bit training
new_base_model = prepare_model_for_kbit_training(base_model)
tokenizer.pad_token = "!"
CUTOFF_LEN = 256
LORA_R = 8
LORA_ALPHA = 2 * LORA_R
LORA_DROPOUT = 0.1

In [9]:
config = LoraConfig(
    r=LORA_R, 
    lora_alpha=LORA_ALPHA, 
    target_modules=[ "w1", "w2", "w3"], 
    lora_dropout=LORA_DROPOUT, 
    bias="none", 
    task_type="CAUSAL_LM"
)

In [11]:
# model_with_adapter = get_peft_model(new_base_model, config)

new_base_model.add_adapter(config, adapter_name="shake")

ValueError: Adapter with name shake already exists. Please use a different name.

In [12]:
def generate_prompt(user_query):
  sys_msg = "Translate the given text to Shakespearean style."
  p = "<s> [INST]" + sys_msg +"\n"+ user_query["modern"] + "[/INST]" +  user_query["shakespearean"] + "</s>"
  return p 

In [13]:
dataset = load_dataset("harpreetsahota/modern-to-shakesperean-translation")
print("dataset", dataset)
train_data = dataset["train"]

tokenize = lambda prompt: tokenizer(prompt + tokenizer.eos_token, truncation=True, max_length=CUTOFF_LEN, padding="max_length")
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["modern" , "shakespearean"])


dataset DatasetDict({
    train: Dataset({
        features: ['modern', 'shakespearean'],
        num_rows: 274
    })
})


Map:   0%|          | 0/274 [00:00<?, ? examples/s]

In [14]:
trainer = Trainer(
  model=new_base_model,
  train_dataset=train_data,
  args=TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=1e-4,
    logging_steps=2,
    optim="adamw_torch",
    save_strategy="epoch",
    output_dir="mixtral-moe-lora-instruct-shapeskeare"
  ),
  data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [16]:
new_base_model.config.use_cache = False
trainer.train()
print("done")



Step,Training Loss
2,7.2067
4,5.5507
6,3.966
8,3.3545
10,2.6468
12,2.1391
14,2.0733
16,1.9893
18,1.8781
20,1.781



Cannot access gated repo for url https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/resolve/main/config.json.
Repo model mistralai/Mixtral-8x7B-Instruct-v0.1 is gated. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in mistralai/Mixtral-8x7B-Instruct-v0.1.


done


In [18]:
for _ in range(3):
    outputs = base_model.generate(inputs, max_new_tokens=20)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("\nAnswer from base model: ", answer)

    outputs = new_base_model.generate(inputs, max_new_tokens=20)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("\nAnswer from new base model: ", answer)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Answer from base model:  [INST] Translate the given text to Shakespearean style. I want to invite you on a second date [/INST]I wouldst fain invite thee to a second tryst


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Answer from new base model:  [INST] Translate the given text to Shakespearean style. I want to invite you on a second date [/INST]I wouldst fain invite thee to a second tryst


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Answer from base model:  [INST] Translate the given text to Shakespearean style. I want to invite you on a second date [/INST]I wouldst fain invite thee to a second tryst


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Answer from new base model:  [INST] Translate the given text to Shakespearean style. I want to invite you on a second date [/INST]I wouldst fain invite thee to a second tryst


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Answer from base model:  [INST] Translate the given text to Shakespearean style. I want to invite you on a second date [/INST]I wouldst fain invite thee to a second tryst

Answer from new base model:  [INST] Translate the given text to Shakespearean style. I want to invite you on a second date [/INST]I wouldst fain invite thee to a second tryst


In [28]:
from huggingface_hub import login
login(token="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")
new_base_model.save_pretrained(
    "shake_adapter", 
    token = "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"
)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


