### !pip install mistral_inference
### !pip install peft
### Clone trl project https://github.com/huggingface/trl

In [1]:
from huggingface_hub import snapshot_download
from pathlib import Path

mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')
mistral_models_path.mkdir(parents=True, exist_ok=True)

  from .autonotebook import tqdm as notebook_tqdm


## Do `huggingface-cli login`

In [2]:
snapshot_download(repo_id="mistralai/Mistral-7B-Instruct-v0.3", allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"], local_dir=mistral_models_path)

Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 41120.63it/s]


'/root/mistral_models/7B-Instruct-v0.3'

In [3]:
import torch
import random
from tqdm import tqdm
from datasets import load_dataset
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
import datasets
from datasets import DatasetDict

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
from mistral_inference.model import Transformer
from mistral_inference.generate import generate

from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.messages import UserMessage, SystemMessage, AssistantMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest


tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")
model = Transformer.from_folder(mistral_models_path)


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]


In [5]:
completion_request = ChatCompletionRequest(messages=[SystemMessage(content="You are a helpful AI assistant."),
                                                      UserMessage(content="What is the best district in Paris? Answer exactly in 3 sentences."),])

tokens = tokenizer.encode_chat_completion(completion_request).tokens
torch.manual_seed(random.randint(0, 1000000))
import numpy as np
np.random.seed(123)
out_tokens, _ = generate([tokens], model, max_tokens=512, temperature=0.8, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])

In [6]:
print(result)

The "best" district in Paris can vary greatly depending on one's preferences, as each arrondissement (district) offers unique attractions, ambiance, and experiences. For those interested in historical landmarks and iconic sites, the 1st, 4th, and 6th arrondissements, which contain the Louvre, Notre-Dame, and the Latin Quarter, respectively, are popular choices. For a more bohemian and artistic atmosphere, the 5th and 18th arrondissements, home to the Luxembourg Gardens and Montmartre, respectively, are worth considering. Ultimately, the best district in Paris is the one that aligns with your personal interests and travel style.


In [7]:
dset = load_dataset("openbmb/UltraFeedback")

Downloading readme: 100%|██████████| 15.4k/15.4k [00:00<00:00, 13.6MB/s]
Downloading data: 100%|██████████| 168M/168M [00:00<00:00, 234MB/s]  
Downloading data: 100%|██████████| 25.9M/25.9M [00:00<00:00, 123MB/s] 
Downloading data: 100%|██████████| 240M/240M [00:00<00:00, 377MB/s]  
Downloading data: 100%|██████████| 313M/313M [00:00<00:00, 454MB/s]  
Downloading data: 100%|██████████| 9.99M/9.99M [00:00<00:00, 11.6MB/s]
Downloading data: 100%|██████████| 182M/182M [00:00<00:00, 381MB/s]  
Generating train split: 63967 examples [00:02, 29376.51 examples/s]


In [8]:
ultra = dset['train']

In [9]:
res_dset = []
for el in tqdm(list(ultra)):
    if len(el['instruction']) < 128 and len(el['completions'][0]['response']) < 512 and len(el['completions'][1]['response']) < 512:
        number_of_sentences = len(sent_tokenize(el['completions'][0]['response']))
        right_answer = el['completions'][0]['response']
        wrong_answer = el['completions'][1]['response']
        res_dset.append({
            "prompt": f"{el['instruction']} You must generate exactly {number_of_sentences} sentences.",
            "chosen": [{'content': f"{el['instruction']} You must generate exactly {number_of_sentences} sentences.",
                        'role': 'user'},
                        {'content': right_answer,
                         'role': 'assistant'}],
            "rejected": [{'content': f"{el['instruction']} You must generate exactly {number_of_sentences} sentences.",
                        'role': 'user'},
                        {'content': wrong_answer,
                         'role': 'assistant'}]
        })
    if len(res_dset) == 1024+256:
        break



 19%|█▊        | 11834/63967 [00:00<00:00, 188501.24it/s]


In [10]:
train_dset = res_dset[:1024]
val_dset = res_dset[1024:]

In [11]:
dset_to_upload_train = datasets.Dataset.from_list(train_dset)
dset_to_upload_test = datasets.Dataset.from_list(val_dset)

In [12]:
ddict = DatasetDict({
    "train": dset_to_upload_train,
    "test": dset_to_upload_test
})

In [None]:
ddict.push_to_hub("<your_account>/exact_number_of_sentences")

In [None]:
!python3.10 trl/examples/scripts/dpo.py --model_name_or_path mistralai/Mistral-7B-Instruct-v0.3 \
                                    --dataset_name <your_account>/exact_number_of_sentences --output_dir count_sentences \
                                    --report_to wandb --logging_steps 10 --per_device_train_batch_size 8 \
                                    --use_peft --learning_rate 1e-5  --bf16 --num_train_epochs 2

## It will take only around 5-10 minutes to train such model

In [3]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
device="cuda:0"

base_model_name = "mistralai/Mistral-7B-Instruct-v0.3"
adapter_model_name = "count_sentences"

model = AutoModelForCausalLM.from_pretrained(base_model_name)
model = PeftModel.from_pretrained(model, adapter_model_name)
model = model.to(device)

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

Loading checkpoint shards: 100%|██████████| 3/3 [00:10<00:00,  3.51s/it]


In [4]:
inputs = tokenizer.encode("What is the best district in Paris? Answer exactly in 3 sentences.", return_tensors="pt").to(device)
outputs = model.generate(inputs, max_length=256, temperature=0.7, num_return_sequences=1)
print(tokenizer.decode(outputs[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s> What is the best district in Paris? Answer exactly in 3 sentences. The best district in Paris is the Marais, known for its vibrant nightlife, historic architecture, and trendy boutiques. The Latin Quarter, with its intellectual atmosphere, charming streets, and iconic landmarks like the Sorbonne and the Pantheon, is another great choice. Montmartre, famous for its bohemian history, Sacré-Cœur Basilica, and artistic ambiance, is a third excellent district.</s>
