In [1]:
!pip install -U transformers[torch] datasets

[0m

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset

import torch
import re

In [3]:
MODEL_NAME = "mistralai/Mistral-7B-v0.1"
DATASET_NAME = "sail/symbolic-instruction-tuning"
NEW_MODEL_FILE_PATH = "./mistral-instruction-tuned"
# Ensure that our model uses the GPU if available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

cuda


In [4]:
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
model.to(DEVICE)
# Load the dataset from HuggingFace Hub
dataset = load_dataset(DATASET_NAME)

# Set pad token to eos token
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

mistral's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
# Preprocess and tokenize the dataset using the tokenizer
def preprocess_function(examples):
    # Concatenate instruction and response pairs
    instr_resp_pairs = [instr + tokenizer.eos_token + resp for instr, resp in zip(examples['input'], examples['output'])]
    return tokenizer(instr_resp_pairs, truncation=True, padding='max_length', max_length=128)

tokenized_datasets = dataset.map(preprocess_function, batched=True)
train_dataset = tokenized_datasets['train']

# Define the data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [6]:
# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=1,  # Adjust batch size according to your GPU
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=32,  # Adjust based on your GPU and batch size
    learning_rate=2e-5,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    fp16=True,  # Enable mixed precision training
    save_strategy="no",  # Disable model saving due to disk space limitations on Colab
    report_to="none",  # Disable reporting to reduce clutter in the notebook
)

In [7]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    tokenizer=tokenizer
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [8]:
# Train the model
trainer.train()

Step,Training Loss
10,1.4962
20,1.0646
30,0.9799
40,0.9272
50,0.8831
60,0.8629
70,0.8481
80,0.8184
90,0.8324
100,0.8121


TrainOutput(global_step=839, training_loss=0.723453234716877, metrics={'train_runtime': 2315.9884, 'train_samples_per_second': 11.603, 'train_steps_per_second': 0.362, 'total_flos': 4.086499799098982e+16, 'train_loss': 0.723453234716877, 'epoch': 1.0})

In [9]:
# Save the model and the tokenizer
model.save_pretrained(NEW_MODEL_FILE_PATH)
tokenizer.save_pretrained(NEW_MODEL_FILE_PATH)

('./mistral-instruction-tuned/tokenizer_config.json',
 './mistral-instruction-tuned/special_tokens_map.json',
 './mistral-instruction-tuned/tokenizer.json')

In [12]:
# Function to perform inference
def ask_question(question, model_path=NEW_MODEL_FILE_PATH):
    model = AutoModelForCausalLM.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    # Set pad token to eos token (needed if it's not already set in the saved model/tokenizer)
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

    inputs = tokenizer.encode(question + tokenizer.eos_token, return_tensors='pt')
    inputs = inputs.to(DEVICE)  # Move to GPU if available
    model.to(DEVICE)

    # Generate a response
    with torch.no_grad():
        outputs = model.generate(inputs, max_length=512, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [13]:
# Example inference
question = "How to make pasta?"
response = ask_question(question)
print(response)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

How to make pasta?I'm on it! I'm here to guide you through the process of making pasta. To get started, you'll need to gather the following ingredients:

1. Flour: Choose a high-quality flour that is suitable for making pasta.
2. Eggs: Use eggs that are at room temperature.
3. Salt: Add a pinch of salt to enhance the flavor.
4. Water: Use warm water to ensure a smooth and consistent texture.
5. Oil: Add a small amount of oil to prevent the pasta from sticking together.

Once you have these ingredients, you can follow these steps:

1. Mix the flour, eggs, and salt in a bowl.
2. Add the warm water and oil to the mixture.
3. Knead the dough until it is smooth and elastic.
4. Divide the dough into small pieces and shape them into pasta.
5. Cook the pasta in boiling water for a few minutes.
6. Serve the pasta with your favorite sauce or toppings.

If you encounter any difficulties or have any questions along the way, feel free to reach out to me. I'm here to assist you every step of the way