In [None]:
# @title ライブラリのインストール
!pip install -U bitsandbytes transformers datasets accelerate loralib einops peft xformers

Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting transformers
  Downloading transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting loralib
  Downloading loralib-0.1.2-py3-none-any.whl.metadata (15 kB)
Collecting xformers
  Downloading xformers-0.0.28.post3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=202

In [1]:
# @title huggingfaceへのログイン
!huggingface-cli login

In [None]:
# @title wandbのインストールとログイン
!pip install -U wandb
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
# @title ライブラリのimport

from datasets import Dataset, load_dataset
import peft
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig


In [None]:
model_id = "mistralai/Mistral-7B-Instruct-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = prepare_model_for_kbit_training(model)


Unused kwargs: ['load_4bit_use_double_quant']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
model


MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNo

In [None]:
# @title QLoraのアダプタを定義

target_modules = (
    peft.utils.constants.TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING["mistral"]
)

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=target_modules,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)


In [None]:
# @title データセットの読み込み


def group_dataset_by_title(split_dataset):
    grouped_df = (
        split_dataset.to_pandas().groupby("title").sample(n=1).reset_index(drop=True)
    )
    return Dataset.from_pandas(grouped_df)


dataset = load_dataset("llm-book/JGLUE", "JSQuAD", trust_remote_code=True)

dataset["train"] = group_dataset_by_title(dataset["train"])
dataset["validation"] = group_dataset_by_title(dataset["validation"])


README.md:   0%|          | 0.00/3.08k [00:00<?, ?B/s]

JGLUE.py:   0%|          | 0.00/13.8k [00:00<?, ?B/s]

preprocess_marc_ja.py:   0%|          | 0.00/9.03k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.8M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.98M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [None]:
# @title データセットをチャット形式に変換. トーカナイズでIDに変換.


def generate_prompt(data_point):
    context = data_point["context"]
    question = data_point["question"]
    answer = data_point["answers"]["text"][0]
    messages = [
        {"role": "user", "content": f"{context}\n{question}"},
        {"role": "assistant", "content": answer},
    ]
    return tokenizer.apply_chat_template(messages, tokenize=False)


MAX_LEN = 800


def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenizer(
        full_prompt, padding=True, truncation=True, max_length=MAX_LEN
    )
    return tokenized_full_prompt


dataset = dataset.shuffle().map(generate_and_tokenize_prompt)


Map:   0%|          | 0/710 [00:00<?, ? examples/s]

Map:   0%|          | 0/59 [00:00<?, ? examples/s]

In [None]:
# @title 学習条件の設定

OUTPUT_DIR = "./jsquad_mistral"

training_args = transformers.TrainingArguments(
    auto_find_batch_size=True,
    num_train_epochs=1.0,
    learning_rate=2e-4,
    logging_steps=1,
    eval_steps=10,
    save_steps=10,
    bf16=True,
    output_dir=OUTPUT_DIR,
    save_total_limit=2,
    save_strategy="steps",
    evaluation_strategy="steps",
    push_to_hub=True,
    resume_from_checkpoint=False,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
)




In [None]:
# @title 学習の開始

trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mshotasato0916[0m ([33mshotasato200916[0m). Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
10,1.9279,2.09669
20,2.2569,1.914458
30,1.7192,1.845746
40,1.8,1.810388
50,2.074,1.788728
60,1.7807,1.775671
70,1.9116,1.768602
80,1.6996,1.76423


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=89, training_loss=1.9841643170024572, metrics={'train_runtime': 163.1911, 'train_samples_per_second': 4.351, 'train_steps_per_second': 0.545, 'total_flos': 1.209879789109248e+16, 'train_loss': 1.9841643170024572, 'epoch': 1.0})

In [None]:
# @title モデルをhuggingfaceにアップロード

# model.push_to_hub("自分のHuggingfaceユーザ名/mistral_chat_jsquad", private=False)
