In [1]:
!pip install -q torch peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 accelerate einops tqdm scipy

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [16]:
import os
from dataclasses import dataclass, field
from typing import Optional
import torch
from datasets import load_dataset, load_from_disk
from peft import LoraConfig, prepare_model_for_kbit_training
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments
)
from tqdm.notebook import tqdm
from trl import SFTTrainer
import pandas as pd

In [3]:
from huggingface_hub import interpreter_login

In [4]:
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .


Token:  ········
Add token as git credential? (Y/n)  n


Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [5]:
dataset = load_dataset("tmobley96/black_mirror_scripts_S1-5", split='train')

Downloading readme:   0%|          | 0.00/2.16k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/770k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [6]:
df = pd.DataFrame(dataset)

In [7]:
df.head(2)

Unnamed: 0,text
0,
1,(Phone Vibrating)


In [8]:
df.shape

(17467, 1)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17467 entries, 0 to 17466
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    17434 non-null  object
dtypes: object(1)
memory usage: 136.6+ KB


In [10]:
df

Unnamed: 0,text
0,
1,(Phone Vibrating)
2,(Phone Ringing)
3,Hello?
4,Why don't you just tell me what's happened?
...,...
17462,Bow down before the one you serve
17463,You're going to get what you deserve
17464,Bow down before the one you serve
17465,You're going to get what you deserve


In [11]:
df.head(3)

Unnamed: 0,text
0,
1,(Phone Vibrating)
2,(Phone Ringing)


In [13]:
df = df.dropna()
df.text = df.astype(str)
df.text.map(len)

# Convert the 'text' column to a list of strings
df['text'] = df['text'].apply(lambda x: str(x).split())

In [36]:
df.head(9364)

Unnamed: 0,text
0,
1,(Phone Vibrating)
2,(Phone Ringing)
3,Hello?
4,Why don't you just tell me what's happened?
...,...
9359,Clara!
9360,Police. For you. How did they get my number?
9361,-Hello? -Clara Meades?
9362,"This is DCI Karin Parke, Met Police."


In [14]:
df.to_csv("data.csv", index=False)

In [15]:
training_dataset = load_dataset('csv', data_files="data.csv", split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [29]:
Training_dataset = dataset['text']

In [59]:
base_model = "microsoft/phi-2"
new_model = "phi2-black-mirror"

tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.float16,
    bnb_4bit_use_double_quant = False
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config = bnb_config,
    trust_remote_code = True,
    flash_attn = True,
    flash_rotary = True,
    fused_dense = True,
    low_cpu_mem_usage = True,
    device_map = {"": 0},
    #revision = "refs/pr/23",
)

model.config.use_cache = False
model.config.pretraining_tp = 1

model = prepare_model_for_kbit_training(model , use_gradient_checkpointing=True)

training_arguments = TrainingArguments(
    output_dir = "./black-mirror",
    num_train_epochs = 2,
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 32,
    evaluation_strategy = "steps",
    eval_steps = 1500,
    logging_steps = 15,
    optim = "paged_adamw_8bit",
    learning_rate = 2e-4,
    lr_scheduler_type = "cosine",
    save_steps = 1500,
    warmup_ratio = 0.05,
    weight_decay = 0.01,
    max_steps = -1
)

peft_config = LoraConfig(
    r = 32,
    lora_alpha = 64,
    lora_dropout = 0.05,
    bias="none",
    task_type = "CAUSAL_LM",
    target_modules = ["Wqkv", "fcl", "fc2"]
)

trainer = SFTTrainer(
    model = model,
    train_dataset = training_dataset,
    peft_config = peft_config,
    dataset_text_field = "text",
    max_seq_length = 690,
    tokenizer = tokenizer,
    args = training_arguments,
)

config.json:   0%|          | 0.00/782 [00:00<?, ?B/s]

configuration_phi.py:   0%|          | 0.00/2.03k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/cognitivecomputations/dolphin-2_6-phi-2:
- configuration_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi.py:   0%|          | 0.00/33.7k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/cognitivecomputations/dolphin-2_6-phi-2:
- modeling_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/25.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/584M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of the model checkpoint at cognitivecomputations/dolphin-2_6-phi-2 were not used when initializing PhiForCausalLM: ['lm_head.linear.lora_A.default.weight', 'lm_head.linear.lora_B.default.weight']
- This IS expected if you are initializing PhiForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing PhiForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


generation_config.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]



Map:   0%|          | 0/17467 [00:00<?, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [60]:
trainer.train()

You're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
trainer.save_model("ph2-BlackMirror-2.8b-finetuned-4bit")
#trainer.mode.save_config("ph2-BlackMirror-2.8b-finetuned-4bit/config.json")
trainer.save_config("ph2-BlackMirror-2.8b-finetuned-4bit/config.json")

In [57]:
model.push_to_hub("tmobley96/phi2-black-mirror",
                  use_auth_token=True,
                  commit_message="Fine tuned on Black Mirror transcripts.",
                  private=False)

adapter_model.bin:   0%|          | 0.00/94.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/tmobley96/phi2-black-mirror/commit/49ded8170a1c1d157f46a5f620852bf951d813f9', commit_message='Fine tuned on Black Mirror transcripts.', commit_description='', oid='49ded8170a1c1d157f46a5f620852bf951d813f9', pr_url=None, pr_revision=None, pr_num=None)

In [54]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "ph2-BlackMirror-2.8b-finetuned-4bit"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Loading the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

Loading microsoft/phi-2 requires to execute some code in that repo, you can inspect the content of the repository at https://hf.co/microsoft/phi-2. You can dismiss this prompt by passing `trust_remote_code=True`.
Do you accept? [y/N]  y


A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-2:
- configuration_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Loading microsoft/phi-2 requires to execute some code in that repo, you can inspect the content of the repository at https://hf.co/microsoft/phi-2. You can dismiss this prompt by passing `trust_remote_code=True`.
Do you accept? [y/N]  u
Loading microsoft/phi-2 requires to execute some code in that repo, you can inspect the content of the repository at https://hf.co/microsoft/phi-2. You can dismiss this prompt by passing `trust_remote_code=True`.
Do you accept? [y/N]  y


modeling_phi.py:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/phi-2:
- modeling_phi.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [56]:
batch = tokenizer("“Training models with PEFT and LoRa is cool” ->: ", return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))




 “Training models with PEFT and LoRa is cool” ->: \'s",', "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you',", "'you
