In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install bitsandbytes
!pip install accelerate
!pip install --upgrade transformers
!pip install --upgrade peft
!pip install --upgrade datasets

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

In [2]:
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", padding_side="right",)
tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
   load_in_8bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
   bnb_8bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device_map="auto", quantization_config=bnb_config)

Unused kwargs: ['bnb_8bit_compute_dtype']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

In [5]:
txt = """###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT:"""
tokens = tokenizer(txt, return_tensors="pt")['input_ids'].to("cuda")
op = model.generate(tokens, max_new_tokens=200)
print(tokenizer.decode(op[0]))



<s> ###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Generate a random number between 0 and 100

###INPUT: Linux Terminal

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Generate a random number between 0 and 100

###INPUT: Linux Terminal

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Generate a random number between 0 and 100

###INPUT: Linux Terminal

###PROMPT: Generate a random number between 0 and 100

###SYSTEM: Generate a random number between 0 and 100

###INPUT: Linux Terminal

###


In [6]:
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1, peft_type=TaskType.CAUSAL_LM)
model = get_peft_model(model, peft_config)

print(model.print_trainable_parameters())

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023
None


In [7]:
def format_dataset(data_point):
    prompt = f"""###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: {data_point['act']}

###PROMPT: {data_point['prompt']}
"""
    tokens = tokenizer(prompt,
        truncation=True,
        max_length=256,
        padding="max_length",)
    tokens["labels"] = tokens['input_ids'].copy()
    return tokens

In [8]:
from datasets import load_dataset

dataset = load_dataset("fka/awesome-chatgpt-prompts", split="train")
print(dataset[0])

dataset = dataset.map(format_dataset)
print(dataset[0])

Downloading readme:   0%|          | 0.00/274 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/74.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/153 [00:00<?, ? examples/s]

{'act': 'Linux Terminal', 'prompt': 'I want you to act as a linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply with the terminal output inside one unique code block, and nothing else. do not write explanations. do not type commands unless I instruct you to do so. when i need to tell you something in english, i will do so by putting text inside curly brackets {like this}. my first command is pwd'}


Map:   0%|          | 0/153 [00:00<?, ? examples/s]

{'act': 'Linux Terminal', 'prompt': 'I want you to act as a linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply with the terminal output inside one unique code block, and nothing else. do not write explanations. do not type commands unless I instruct you to do so. when i need to tell you something in english, i will do so by putting text inside curly brackets {like this}. my first command is pwd', 'input_ids': [1, 835, 14816, 1254, 12665, 29901, 16564, 373, 2672, 12336, 3611, 5706, 278, 9508, 363, 1176, 1230, 1904, 13, 13, 2277, 29937, 1177, 12336, 29901, 8074, 29175, 13, 13, 2277, 29937, 29925, 3491, 7982, 29901, 306, 864, 366, 304, 1044, 408, 263, 10542, 8638, 29889, 306, 674, 1134, 8260, 322, 366, 674, 8908, 411, 825, 278, 8638, 881, 1510, 29889, 306, 864, 366, 304, 871, 8908, 411, 278, 8638, 1962, 2768, 697, 5412, 775, 2908, 29892, 322, 3078, 1683, 29889, 437, 451, 2436, 7309, 800, 29889, 437, 451, 1134, 8260, 6521, 3

In [9]:
print(tokenizer.decode(dataset[0]['input_ids']))


<s> ###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: Linux Terminal

###PROMPT: I want you to act as a linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply with the terminal output inside one unique code block, and nothing else. do not write explanations. do not type commands unless I instruct you to do so. when i need to tell you something in english, i will do so by putting text inside curly brackets {like this}. my first command is pwd
</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></

In [10]:
dataset = dataset.remove_columns(['act', "prompt"])
print(dataset)

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 153
})


In [11]:
import torch
if torch.cuda.device_count() > 1: 
    model.is_parallelizable = True
    model.model_parallel = True

In [12]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer = Trainer(
                    model = model, 
                    train_dataset=dataset, 
#                     eval_dataset = eval_dataset,
                    tokenizer = tokenizer, 
                    data_collator = data_collator, 

                    args = TrainingArguments(
                        output_dir="./training",
                        remove_unused_columns=False,
                        per_device_train_batch_size=2,
                        gradient_checkpointing=True,
                        gradient_accumulation_steps=4,
                        max_steps=400,
                        learning_rate=2.5e-5, 
                        logging_steps=5,
                        fp16=True,
                        optim="paged_adamw_8bit",
                        save_strategy="steps",     
                        save_steps=50,             
#                         evaluation_strategy="steps",
#                         eval_steps=5,              
#                         do_eval=True,
                        report_to = "none",
                        
                ))

max_steps is given, it will override any value given in num_train_epochs


In [13]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
5,3.0237
10,3.0187
15,3.0372
20,2.8683
25,2.9421
30,2.8433
35,2.839
40,2.8192
45,2.7535
50,2.7497




TrainOutput(global_step=400, training_loss=1.8930089473724365, metrics={'train_runtime': 1349.0054, 'train_samples_per_second': 2.372, 'train_steps_per_second': 0.297, 'total_flos': 5058556927672320.0, 'train_loss': 1.8930089473724365, 'epoch': 20.77922077922078})

In [19]:

txt = """###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: machine learning architect


###PROMPT:"""
tokens = tokenizer(txt, return_tensors="pt")['input_ids'].to("cuda")
op = model.generate(tokens, max_new_tokens=200)
print(tokenizer.decode(op[0]))

<s> ###SYSTEM: Based on INPUT title generate the prompt for generative model

###INPUT: machine learning architect


###PROMPT: I want you to act as a machine learning architect. I will provide you with a problem that needs to be solved using machine learning. You will create a model that will be able to predict the outcome of the problem. You will be given the data for the problem and the model will be trained using the data. You will then be able to provide predictions for the outcome of the problem using the trained model. My first request is "I need help creating a model that can predict the outcome of a stock market prediction."

###YOU: I will create a model that can predict the outcome of a stock market prediction.

###PROMPT: I need help creating a model that can predict the outcome of a stock market prediction.

###YOU: I will provide you with the data for the problem and the model will be trained using the data.

###PROMPT: I need help creating a model that can predict the ou