In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git 
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U datasets

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_name = "EleutherAI/gpt-neox-20b"
#Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

#gpt-neox-20b doesn't have a PAD token
#We can use the EOS token for padding
tokenizer.pad_token = tokenizer.eos_token

In [2]:
from datasets import load_dataset, Dataset
import torch, copy
import transformers
from typing import Optional, Dict, Sequence
from torch.nn.utils.rnn import pad_sequence
from dataclasses import dataclass, field

IGNORE_INDEX = -100
DEFAULT_PAD_TOKEN = "[PAD]"

@dataclass
class DataCollatorForCausalLM(object):
    tokenizer: transformers.PreTrainedTokenizer
    source_max_len: int
    target_max_len: int

    def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
        sources = [f"{self.tokenizer.bos_token}{example['input']}" for example in instances]
        targets = [f"{example['output']}{self.tokenizer.eos_token}" for example in instances]
        # Tokenize
        tokenized_sources_with_prompt = self.tokenizer(
            sources,
            max_length=self.source_max_len,
            truncation=True,
            add_special_tokens=False,
        )
        tokenized_targets = self.tokenizer(
            targets,
            max_length=self.target_max_len,
            truncation=True,
            add_special_tokens=False,
        )
        # Build the input and labels for causal LM
        input_ids = []
        labels = []
        for tokenized_source, tokenized_target in zip(
            tokenized_sources_with_prompt['input_ids'],
            tokenized_targets['input_ids']
        ):
          input_ids.append(torch.tensor(tokenized_source + tokenized_target))
          labels.append(
              torch.tensor([IGNORE_INDEX for _ in range(len(tokenized_source))] + copy.deepcopy(tokenized_target))
          )


        # Apply padding
        input_ids = pad_sequence(input_ids, batch_first=True, padding_value=0)
        labels = pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
        data_dict = {
            'input_ids': input_ids,
            'attention_mask':input_ids.ne(0),
        }

        if labels is not None:
            data_dict['labels'] = labels
        return data_dict

data_collator = DataCollatorForCausalLM(
    tokenizer=tokenizer,
    source_max_len=1024,
    target_max_len=256,
)

In [3]:
import pandas as pd
from datasets import Dataset

df = pd.read_csv("./result_dialogues.csv")
my_data = Dataset.from_pandas(df)


In [4]:
my_data = my_data.remove_columns(
    [col for col in my_data.column_names if col not in ['input', 'output']]
)

In [5]:
my_data

Dataset({
    features: ['input', 'output'],
    num_rows: 49189
})

In [6]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/LENOVO/anaconda3/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda110.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 110
CUDA SETUP: Loading binary /home/LENOVO/anaconda3/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda110.so...


  warn(msg)
  warn(msg)


In [7]:
#QLoRa configuration
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Set the device to use all available GPUs
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Specify the device as the argument for `device_map`
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config, device_map={"":0}, use_cache=False)

Loading checkpoint shards:   0%|          | 0/46 [00:00<?, ?it/s]

In [8]:
model.gradient_checkpointing_enable()

In [9]:
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    target_modules=["query_key_value"], 
    lora_dropout=0.05, 
    bias="none", 
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

In [10]:


trainer = transformers.Trainer(
    model=model,
    train_dataset=my_data,
    data_collator=data_collator,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=2000,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",
        remove_unused_columns=False

    )
)


In [None]:
import torch
print(torch.cuda.is_available())
torch.cuda.empty_cache()

In [11]:
%%time

trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mshankar-arunp[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss
1,3.5644
2,3.4364
3,2.9001
4,2.8986
5,2.2906
6,1.9508
7,2.2334
8,2.4641
9,2.1222
10,1.7718




CPU times: user 4h 26min 33s, sys: 39min 58s, total: 5h 6min 31s
Wall time: 4h 1min 12s


TrainOutput(global_step=2000, training_loss=6.252094690561295, metrics={'train_runtime': 14472.5013, 'train_samples_per_second': 1.106, 'train_steps_per_second': 0.138, 'total_flos': 1.3807604957773824e+17, 'train_loss': 6.252094690561295, 'epoch': 0.33})

In [12]:
model.save_pretrained("out_model")

In [15]:
device = "cuda:0"

full_text = ''
while True:
    user_input = "Human: " + input("Human: ")
    #print(user_input)
    
    if user_input=='Human: exit':
        break
    
    full_text += user_input + '\nBot: '
    inputs = tokenizer(full_text, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=100) 
    
    text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(text_output)
    
    full_text += text_output + "\n"
    

Human: Who are you?


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Human: Who are you?
Bot: Yes..
Human: What are you doing?


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Human: Who are you?
Bot: Human: Who are you?
Bot: Yes..
Human: What are you doing?
Bot: .
Human: Whatthe fuc dude


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Human: Who are you?
Bot: Human: Who are you?
Bot: Yes..
Human: What are you doing?
Bot: Human: Who are you?
Bot: Human: Who are you?
Bot: Yes..
Human: What are you doing?
Bot:.
Human: Whatthe fuc dude
Bot: .


KeyboardInterrupt: Interrupted by user

In [None]:
ALPACA_PROMPT_DICT = {
    "prompt_input": (
        "Below is an instruction that describes a task, paired with an input that provides further context. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response: "
    ),
    "prompt_no_input": (
        "Below is an instruction that describes a task. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Response: "
    ),
}

def extract_alpaca_dataset(example):
    if example.get("input", "") != "":
        prompt_format = ALPACA_PROMPT_DICT["prompt_input"]
    else:
        prompt_format = ALPACA_PROMPT_DICT["prompt_no_input"]
    return {'input': prompt_format.format(**example)}

In [None]:
alpaca_data = load_dataset("tatsu-lab/alpaca")
alpaca_data = alpaca_data.map(extract_alpaca_dataset, remove_columns=['instruction'])

#We keep only the modified input and output columns in the dataset
alpaca_data = alpaca_data.remove_columns(
      [col for col in alpaca_data.column_names['train'] if col not in ['input', 'output']]
)

In [None]:
alpaca_data['train']