In [1]:
import os
os.chdir('/workspace/test')

In [2]:
!poetry install

[34mInstalling dependencies from lock file[39m

No dependencies to install or update

[39;1mInstalling[39;22m the current project: [36mtest[39m ([39;1m0.1.0[39;22m)[1G[2K[39;1mInstalling[39;22m the current project: [36mtest[39m ([32m0.1.0[39m)


In [3]:
!pip install poetry

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [4]:
!poetry add  bitsandbytes transformers peft accelerate datasets scipy

The following packages are already present in the pyproject.toml and will be skipped:

  - [36mbitsandbytes[39m
  - [36mtransformers[39m
  - [36mpeft[39m
  - [36maccelerate[39m
  - [36mdatasets[39m
  - [36mscipy[39m

If you want to update it to the latest compatible version, you can use `poetry update package`.
If you prefer to upgrade it to the latest available version, you can use `poetry add package@latest`.

Nothing to add.


In [5]:
import torch
import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1",
                                             load_in_4bit=True,
                                             torch_dtype=torch.float16,
                                             device_map="auto",
                                            # attn_implementation="flash_attention_2",   #You can use flash attention on your local GPU with specific libraries
                                             )

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 19/19 [02:24<00:00,  7.60s/it]


In [7]:
tokenizer.pad_token = "!" #Not EOS, will explain another time.

In [8]:
CUTOFF_LEN = 256  #Our dataset has shot text
LORA_R = 8
LORA_ALPHA = 2 * LORA_R
LORA_DROPOUT = 0.1

In [9]:
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=[ "w1", "w2", "w3"],  #Only Training the "expert" layers
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

In [10]:
def print_trainable_parameters(m):
    trainable_params = sum(p.numel() for p in m.parameters() if p.requires_grad)
    all_params = sum(p.numel() for p in m.parameters())
    print(f"trainable params: {trainable_params} || all params: {all_params} || trainable%: {100 * trainable_params / all_params}")

print_trainable_parameters(model)

trainable params: 113246208 || all params: 23595847680 || trainable%: 0.4799412571898752


In [11]:
dataset = load_dataset("harpreetsahota/modern-to-shakesperean-translation") #Found a good small dataset for a quick test run!
print("dataset", dataset)
train_data = dataset["train"] # Not using evaluation data

dataset DatasetDict({
    train: Dataset({
        features: ['modern', 'shakespearean'],
        num_rows: 274
    })
})


In [12]:

def generate_prompt(user_query,  sep="\n\n### "):  #The prompt format is taken from the official Mixtral huggingface page
    sys_msg= "Translate the given text to Shakespearean style."
    p =  " [INST]" + sys_msg +"\n"+ user_query["modern"] + "[/INST]" +  user_query["shakespearean"] + ""
    return p

In [13]:
def tokenize(prompt):
    return tokenizer(
        prompt + tokenizer.eos_token,
        truncation=True,
        max_length=CUTOFF_LEN ,
        padding="max_length"
    )

In [None]:
# Linkedin Facebook

casual_phrases = [
    "We're all over Facebook and LinkedIn! Check us out.",
    "Don't forget to like and share our latest post!",
    "We just hit a big follower milestone! Thanks everyone!",
    "Got a new event coming up? We'll post the details soon!",
    "Loved your comments on our last update!",
    "We're hosting a live Q&A next week. Join us!",
    "Hey, did you see our new video tutorial?",
    "Can't wait to show you what we've been working on!",
    "Who's ready for our next big announcement? Stay tuned!",
    "Check out our LinkedIn for job openings!",
    "Thanks for the shoutouts this week!",
    "Our team had a blast at the tech conference! Photos soon!",
    "Keep the feedback coming, it really helps us improve!",
    "Join our community group on Facebook for more insider info!",
    "Sneak peek of our new product dropping tomorrow!",
    "Catch our CEO talking about AI trends on LinkedIn Live.",
    "Just hit 10k followers on LinkedIn!",
    "We're rolling out updates every month. Follow us to stay in the loop!",
    "Thanks for all the likes on our last video!",
    "Our team is growing, and we're excited to introduce our new members!",
    "Throwback to our last year's big launch. What a journey!",
    "Having a blast interacting with all of you on social media!",
    "We love seeing your posts about our products!",
    "Got questions? Drop them in our comments or DM us!",
    "Keep an eye on our pages for some cool contests coming up!",
    "Our Facebook community just hit another big number!",
    "What topics would you like our next webinar to cover?",
    "Don't miss out on our latest blog post—link in bio!",
    "We're celebrating our anniversary with some fun posts this week!",
    "Watch our latest product demo on Facebook and let us know what you think!"
]

formal_phrases = [
    "Our company maintains a robust presence on Facebook and LinkedIn. We invite you to visit our profiles.",
    "Please remember to like and share our most recent publication.",
    "We have achieved a significant milestone in terms of followers. Thank you to all our supporters.",
    "We are organizing an upcoming event and will share details shortly.",
    "We appreciate your comments on our recent update.",
    "We invite you to a live question and answer session next week.",
    "Have you viewed our latest video tutorial?",
    "We are eager to reveal our recent projects to you.",
    "Anticipate our forthcoming major announcement.",
    "Visit our LinkedIn page to view current job opportunities.",
    "We are grateful for your acknowledgments this week.",
    "Our team greatly enjoyed the technology conference. Photographs will be shared soon.",
    "Please continue to provide feedback; it is instrumental in our development.",
    "Join our exclusive community group on Facebook for additional insights.",
    "We will provide a preview of our new product tomorrow.",
    "Observe our Chief Executive Officer discussing artificial intelligence trends on LinkedIn Live.",
    "We have reached 10,000 followers on LinkedIn.",
    "We are committed to monthly updates. Follow our progress.",
    "Thank you for the positive reception of our latest video.",
    "Our team is expanding, and we are delighted to introduce new members.",
    "Reflecting on our previous major launch from last year. It has been a remarkable journey.",
    "It is a pleasure to interact with you all via social media.",
    "We are pleased to see your engagement with our products.",
    "Should you have any inquiries, please comment below or send us a direct message.",
    "Stay informed about upcoming contests on our pages.",
    "Our Facebook community has achieved a new milestone.",
    "Which topics would you prefer for our next webinar?",
    "Do not miss our latest blog post; find the link in our biography.",
    "This week, we celebrate our anniversary with special posts.",
    "View our latest product demonstration on Facebook and share your thoughts."
]

df = pd.DataFrame({
    'casual': casual_phrases,
    'company': formal_phrases
})

In [None]:
def generate_tone_prompt(user_query,  sep="\n\n### ", source="Linkedin"):  #The prompt format is taken from the official Mixtral huggingface page
    sys_msg= "Translate the given text to Company tone of voice for " + source + "."
    p =  " [INST]" + sys_msg +"\n"+ user_query["casual"] + "[/INST]" +  user_query["company"] + ""
    return p

In [14]:
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)), remove_columns=["modern" , "shakespearean"])
 

Map: 100%|██████████| 274/274 [00:00<00:00, 3015.23 examples/s]


In [None]:
trainer = Trainer(
    model=model,
    train_dataset=train_data,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        num_train_epochs=6,
        learning_rate=1e-4,
        logging_steps=2,
        optim="adamw_torch",
        save_strategy="epoch",
        output_dir="mixtral-moe-lora-instruct-shapeskeare"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()
print("finish")

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
2,7.5168
4,4.7752
6,4.0338
8,2.6991
10,2.1133
12,1.7584
14,1.7086
16,1.6068
18,1.3342
20,1.3861
