In [None]:
!pip install -q torch peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 accelerate einops tqdm scipy

In [2]:
import os
from dataclasses import dataclass, field
from typing import Optional
import torch
from datasets import load_dataset, load_from_disk
from peft import LoraConfig, prepare_model_for_kbit_training
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments
)
from tqdm.notebook import tqdm
from trl import SFTTrainer
import pandas as pd

In [3]:
from huggingface_hub import interpreter_login

In [4]:
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .


Token:  ········
Add token as git credential? (Y/n)  n


Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [9]:
dataset = load_dataset("tmobley96/black_mirror_scripts_S1-5", split='train')

In [10]:
df = pd.DataFrame(dataset)

In [6]:
df.head(2)

Unnamed: 0,text
0,
1,(Phone Vibrating)


In [7]:
df.shape

(17467, 1)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17467 entries, 0 to 17466
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    17434 non-null  object
dtypes: object(1)
memory usage: 136.6+ KB


In [10]:
df

Unnamed: 0,text
0,
1,(Phone Vibrating)
2,(Phone Ringing)
3,Hello?
4,Why don't you just tell me what's happened?
...,...
17462,Bow down before the one you serve
17463,You're going to get what you deserve
17464,Bow down before the one you serve
17465,You're going to get what you deserve


In [None]:
df.head(3)

In [14]:
df = df.dropna()
df.text = df.astype(str)
df.text.map(len)

# Convert the 'text' column to a list of strings
df['text'] = df['text'].apply(lambda x: str(x).split())

In [10]:
df.head(9364)

Unnamed: 0,text
1,"[(Phone, Vibrating)]"
2,"[(Phone, Ringing)]"
3,[Hello?]
4,"[Why, don't, you, just, tell, me, what's, happ..."
5,"[And, what, is, it?]"
...,...
9392,"[[computer, pings]]"
9393,"[-[Vanessa], What's, that?, -[Rasmus], Someone..."
9394,"[-Rasmus?, -I, just, picked, up, an, attempted..."
9395,"[Granular,, they've, got, a, breach.]"


In [15]:
df.to_csv("data.csv", index=False)

In [16]:
training_dataset = load_dataset('csv', data_files="data.csv", split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_size = len(training_dataset)  # Replace with your dataset size
num_epochs = 3  # Replace with the number of epochs you plan to use
batch_size = 4  # Replace with your batch size

max_steps = (dataset_size * num_epochs) // batch_size
print("Maximum number of training steps:", max_steps)


In [17]:
base_model = "cognitivecomputations/dolphin-2_6-phi-2"
new_model = "dolphin2.6-phi2-black-mirror"

tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.float16,
    bnb_4bit_use_double_quant = False
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config = bnb_config,
    trust_remote_code = True,
    flash_attn = True,
    flash_rotary = True,
    fused_dense = True,
    low_cpu_mem_usage = True,
    device_map = {"": 0},
    #revision = "refs/pr/23",
)

model.config.use_cache = False
model.config.pretraining_tp = 1

model = prepare_model_for_kbit_training(model , use_gradient_checkpointing=True)

training_arguments = TrainingArguments(
    output_dir = "./black-mirror",
    num_train_epochs = 2,
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 32,
    evaluation_strategy = "steps",
    eval_steps = 1500,
    logging_steps = 15,
    optim = "paged_adamw_8bit",
    learning_rate = 2e-4,
    lr_scheduler_type = "cosine",
    save_steps = 1500,
    warmup_ratio = 0.05,
    weight_decay = 0.01,
    max_steps = -1
)

peft_config = LoraConfig(
    r = 32,
    lora_alpha = 64,
    lora_dropout = 0.05,
    bias="none",
    task_type = "CAUSAL_LM",
    target_modules = ["Wqkv", "fcl", "fc2"]
)

trainer = SFTTrainer(
    model = model,
    train_dataset = training_dataset,
    peft_config = peft_config,
    dataset_text_field = "text",
    max_seq_length = 690,
    tokenizer = tokenizer,
    args = training_arguments,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of the model checkpoint at cognitivecomputations/dolphin-2_6-phi-2 were not used when initializing PhiForCausalLM: ['lm_head.linear.lora_A.default.weight', 'lm_head.linear.lora_B.default.weight']
- This IS expected if you are initializing PhiForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing PhiForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Map:   0%|          | 0/17434 [00:00<?, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [31]:
trainer.train()

You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss


TrainOutput(global_step=544, training_loss=3.1753057031070484, metrics={'train_runtime': 3068.1654, 'train_samples_per_second': 11.364, 'train_steps_per_second': 0.177, 'total_flos': 7914059615232000.0, 'train_loss': 3.1753057031070484, 'epoch': 2.0})

In [18]:
trainer.save_model("dolphin2.6-ph2-BlackMirror-2.8b-finetuned-4bit")
#trainer.mode.save_config("ph2-BlackMirror-2.8b-finetuned-4bit/config.json")
trainer.save_config("dolphin2.6-ph2-BlackMirror-2.8b-finetuned-4bit/config.json")

AttributeError: 'SFTTrainer' object has no attribute 'save_config'

In [22]:
model.push_to_hub("tmobley96/dolphin2.6-phi2-BlackMirror-2.8b-finetuned-4bit",
                  use_auth_token=True,
                  commit_message="Fine tuned on Black Mirror transcripts.",
                  private=False)

adapter_model.bin:   0%|          | 0.00/94.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/tmobley96/dolphin2.6-phi2-BlackMirror-2.8b-finetuned-4bit/commit/bdb0626a2e59678c4bba60bbf22531e1e34eac1d', commit_message='Fine tuned on Black Mirror transcripts.', commit_description='', oid='bdb0626a2e59678c4bba60bbf22531e1e34eac1d', pr_url=None, pr_revision=None, pr_num=None)

In [21]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "dolphin2.6-ph2-BlackMirror-2.8b-finetuned-4bit"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Loading the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

Loading cognitivecomputations/dolphin-2_6-phi-2 requires to execute some code in that repo, you can inspect the content of the repository at https://hf.co/cognitivecomputations/dolphin-2_6-phi-2. You can dismiss this prompt by passing `trust_remote_code=True`.
Do you accept? [y/N]  y


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of the model checkpoint at cognitivecomputations/dolphin-2_6-phi-2 were not used when initializing PhiForCausalLM: ['lm_head.linear.lora_A.default.weight', 'lm_head.linear.lora_B.default.weight']
- This IS expected if you are initializing PhiForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing PhiForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [56]:
batch = tokenizer("“Training models with PEFT and LoRa is cool” ->: ", return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))




 “Training models with PEFT and LoRa is cool” ->: \'s",', "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you
