# Transform data

In [None]:
#!pip install scikit-learn
#!pip install spacy scikit-learn
#!python3 -m spacy download en_core_web_md

In [2]:
import main

In [5]:
file_in = "discordExample.html"
file_out = 'converted.json'
temp = main.convert_to_json(file_path)
main.save_json(temp, file_out)

In [None]:
#file_in = 'converted.json'
#file_out = 'linked.json'
#temp = main.link_messages(file_path)
#main.save_json(temp, file_out)

In [None]:
file_in = 'linked.json'
file_out = 'linked_prompted.json'
temp=main.calculate_prompts(file_in)
main.save_json(temp)

In [None]:
main.split_data()

# local LLM - Falcon 7b

In [None]:
# https://vilsonrodrigues.medium.com/run-your-private-llm-falcon-7b-instruct-with-less-than-6gb-of-gpu-using-4-bit-quantization-ff1d4ffbabcc
# notebook was run on Ubuntu in WSL
# ensure WSL is version 2 to access CUDA hardware

#notebook ran with these libraries and versions
#accelerate                0.23.0
#bitsandbytes              0.41.1
#bitsandbytes-cuda116      0.26.0.post2
#ipywidgets                8.1.1
#nvidia-cublas-cu11        11.10.3.66
#nvidia-cuda-cupti-cu11    11.7.101
#nvidia-cuda-nvrtc-cu11    11.7.99
#nvidia-cuda-runtime-cu11  11.7.99
#nvidia-cudnn-cu11         8.5.0.96
#nvidia-cufft-cu11         10.9.0.58
#nvidia-curand-cu11        10.2.10.91
#nvidia-cusolver-cu11      11.4.0.1
#nvidia-cusparse-cu11      11.7.4.91
#nvidia-nccl-cu11          2.14.3
#nvidia-nvtx-cu11          11.7.91
#tokenizers                0.13.3
#torch                     2.0.1
#transformers              4.33.3

# to get nvidia libraries, follow install guide at:
# https://developer.nvidia.com/cuda-downloads

In [None]:
import datetime
import torch
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, pipeline

#now = datetime.datetime.now()
#print(f'processing started at {now.strftime("%H:%M:%S")}')

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
    #, load_in_8bit_fp32_cpu_offload=True,
    #llm_int8_enable_fp32_cpu_offload=True
)

#model_id = "ybelkada/falcon-7b-sharded-bf16"
model_id = "vilsonrodrigues/falcon-7b-instruct-sharded"
#model_id = "vilsonrodrigues/falcon-7b-sharded"

tokenizer = AutoTokenizer.from_pretrained(model_id)

device_map={
    "transformer.word_embeddings": "cpu",
    "transformer.word_embeddings_layernorm": 0,
    "lm_head": "cpu",
    "transformer.h": 0,
    "transformer.ln_f": 0,
}
device_map = "auto"

model_4bit = AutoModelForCausalLM.from_pretrained(
    model_id,
    #device_map=device_map,
    quantization_config=quantization_config
    , trust_remote_code=True
)

pipeline_4bit = pipeline(
    "text-generation",
    model=model_4bit,
    tokenizer=tokenizer,
    use_cache=True,
    device_map="auto",
    max_length=150,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id, 
    return_full_text=False
)

print("pipeline created")

In [None]:


sequences = pipeline("Ed: How was the weather today? \n Minh: The weather is humid and sunny \n Ed: That is good to hear \n Minh:")
print(sequences)

#print(datetime.datetime.now())
delta = (datetime.datetime.now()-now)
print(f'processing finished at {datetime.datetime.now().strftime("%H:%M:%S")}')
print(f'duration of process: {delta.total_seconds()} seconds')

In [None]:
now = datetime.datetime.now()
print(f'processing started at {now.strftime("%H:%M:%S")}')
sequences = pipeline("Ed: How was the weather today? \n Minh: The weather is humid and sunny \n Ed: That is good to hear \n Minh:")
print(sequences)
delta = (datetime.datetime.now()-now)
print(f'processing finished at {datetime.datetime.now().strftime("%H:%M:%S")}')
print(f'duration of process: {delta.total_seconds()} seconds')

# Finetuning

In [None]:
#https://medium.com/@zahrizhalali/baby-step-fine-tune-falcon-7b-large-language-models-for-your-custom-datasets-e1df8c473f56

In [None]:
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, PeftConfig, PeftModel, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, GenerationConfig
from trl import SFTTrainer

In [None]:
data = load_dataset("ZahrizhalAli/mental_health_conversational_dataset")
#data
#print(data["train"][0]['text'])

model = model_4bit
model = prepare_model_for_kbit_training(model)

lora_alpha = 32 # scaling factor for the weight matrices
lora_dropout = 0.05 # dropout probability of the LoRA layers
lora_rank = 32 # dimension of the low-rank matrices

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_rank,
    bias="none",  # setting to 'none' for only training weight params instead of biases
    task_type="CAUSAL_LM",
    target_modules=[         # Setting names of modules in falcon-7b model that we want to apply LoRA to
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
    ]
)

peft_model = get_peft_model(model, peft_config)

output_dir = "./ft/"
per_device_train_batch_size = 4 # reduce batch size by 2x if out-of-memory error
gradient_accumulation_steps = 4 # increase gradient accumulation steps by 2x if batch size is reduced
optim = "paged_adamw_32bit"     # activates the paging for better memory management
save_strategy="steps"           # checkpoint save strategy to adopt during training
save_steps = 10                 # number of updates steps before two checkpoint saves
logging_steps = 10              # number of update steps between two logs if logging_strategy="steps"
learning_rate = 2e-4            # learning rate for AdamW optimizer
max_grad_norm = 0.3             # maximum gradient norm (for gradient clipping)
max_steps = 320                 # training will happen for 320 steps
warmup_ratio = 0.03             # number of steps used for a linear warmup from 0 to learning_rate
lr_scheduler_type = "cosine"    # learning rate scheduler

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    bf16=False,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    tf32=False,
)

trainer = SFTTrainer(
    model=peft_model,
    train_dataset=data['train'],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
)

for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

tokenizer.pad_token = tokenizer.eos_token
peft_model.config.use_cache = False


In [None]:
trainer.train()

In [None]:
trainer.save_model("./ft/")

# Load finetuned model

In [None]:
#https://huggingface.co/docs/transformers/v4.34.0/en/internal/generation_utils#transformers.StoppingCriteria

In [None]:
import torch
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, pipeline

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

model_id = "./ft/"
tokenizer = AutoTokenizer.from_pretrained(model_id)
device_map = "auto"

ft = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config
    , trust_remote_code=True
)

pipeline_ft = pipeline(
    "text-generation",
    model=ft,
    tokenizer=tokenizer,
    use_cache=True,
    device_map="auto",
    max_length=150,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id, 
    return_full_text=False
)

print("pipeline created")

In [None]:
import re
from datetime import datetime

now = datetime.now()
input = "Ed: How was the weather today? \n Minh: The weather is humid and sunny \n Ed: That is good to hear \n Minh:"
input_tok_len = len(tokenizer(input)['input_ids'])

sequences = pipeline_4bit(input, max_length=input_tok_len*2)
#print(sequences)
splitted = re.split('(?<=[\!\?(\\n)\.])', sequences[0]['generated_text'])
print(splitted[0])

delta = (datetime.now()-now)
print(f'duration of process: {delta.total_seconds()} seconds\n')
now = datetime.now()

sequences = pipeline_ft(input, max_length=input_tok_len*2)
#print(sequences)
splitted = re.split('(?<=[\!\?(\\n)\.])', sequences[0]['generated_text'])
print(splitted[0])

delta = (datetime.now()-now)
print(f'duration of process: {delta.total_seconds()} seconds')

# Other

In [None]:
#!pip install transformers spacy scikit-learn nltk pandas
#!pip install torch
#!python3 -m spacy download en_core_web_md
#nltk.download('punkt')
#!pip install xformers
#!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f

#!pip install accelerate
#!pip install bitsandbytes
#!pip install transformers
#!pip install ipywidgets

## Training method 1

In [None]:
#https://colab.research.google.com/drive/1BiQiw31DT7-cDp1-0ySXvvhzqomTdI-o?usp=sharing#scrollTo=aTBJVE4PaJwK
#!pip install datasets
#!pip install trl

In [None]:
from datasets import load_dataset

dataset_name = "timdettmers/openassistant-guanaco"
dataset = load_dataset(dataset_name, split="train")

In [None]:
model = model_4bit
tokenizer.pad_token = tokenizer.eos_token

In [None]:
from peft import LoraConfig

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
    ]
)

In [None]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 500
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    gradient_checkpointing=True,
)

In [None]:
from trl import SFTTrainer

max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

In [None]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [None]:
trainer.train()

In [None]:
trainer.save_model('finetuned')

In [None]:
import nltk
import spacy
from sklearn.metrics.pairwise import cosine_similarity
import string
import json
import string
import random
import transformers
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torch import nn
from transformers import BitsAndBytesConfig
from transformers import pipeline
import main

In [None]:
temp=main.calculate_prompts("input.json")

In [None]:
main.save_json(temp)

In [None]:
main.split_data()

In [None]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
MODEL_NAME = "vilsonrodrigues/falcon-7b-instruct-sharded"

bnb_config = BitsAndBytesConfig(
    #load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

Train

In [None]:
!pip3 install -q -U trl

In [None]:
from datasets import load_dataset
from trl import SFTTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
data = load_dataset("timdettmers/openassistant-guanaco")

In [None]:
model_name = "tiiuae/falcon-7b"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name)

In [None]:
from datasets import load_dataset
from trl import SFTTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM

dataset = load_dataset("imdb", split="train")

model_id = "tiiuae/falcon-7b"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)

trainer = SFTTrainer(
    model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=512,
)
trainer.train()


In [None]:
trainer = SFTTrainer(
    model,
    tokenizer=tokenizer,
    train_dataset=dataset['train'],
    dataset_text_field="text",
    max_seq_length=512,
)

In [None]:
trainer.train()

In [None]:
import torch, einops
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments
)
from peft.tuners.lora import LoraLayer

from trl import SFTTrainer


def create_and_prepare_model():
    compute_dtype = getattr(torch, "float16")

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=True,
    )

    model = AutoModelForCausalLM.from_pretrained(
        "tiiuae/falcon-7b", quantization_config=bnb_config, device_map={"": 0}, trust_remote_code=True
    )

    peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "query_key_value"
        ],
    )

    tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b", trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    return model, peft_config, tokenizer


training_arguments = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    fp16=True,
    max_grad_norm=0.3,
    max_steps=1000,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
)

model, peft_config, tokenizer = create_and_prepare_model()
model.config.use_cache = False
dataset = load_dataset("timdettmers/openassistant-guanaco", split="train")

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=True,
)

trainer.train()

In [None]:
with open("train_data.json", "r") as json_file:
    training_data = json.load(json_file)

input_texts = main.generate_input_texts(training_data)

# Initialize the falcon-7b-sharded model and tokenizer
model_name = "vilsonrodrigues/falcon-7b-sharded"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Tokenize the input texts
encoded_input = tokenizer(
    input_texts,
    return_tensors="pt",
    padding="max_length",
    max_length=512  # Adjust max_length as needed
)
print(len(encoded_input))

In [None]:
# Load the training data from the JSON file
with open("train_data.json", "r") as json_file:
    training_data = json.load(json_file)

input_texts = main.generate_input_texts(training_data)

In [None]:
# Initialize the falcon-7b-sharded model and tokenizer
model_name = "vilsonrodrigues/falcon-7b-sharded"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

In [None]:
# Tokenize the input texts
encoded_input = tokenizer(
    input_texts,
    return_tensors="pt",
    padding="max_length",
    max_length=512  # Adjust max_length as needed
)

In [None]:
#input_ids = main.truncate(encoded_input)
input_ids = encoded_input

# Create a DataLoader for training
dataset = main.TextDataset(input_ids)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [None]:
# Training configuration
num_epochs = 5
learning_rate = 1e-4
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
for epoch in range(num_epochs):
    total_loss = 0
    model.train()

    for batch in tqdm(dataloader, desc=f"Epoch {epoch + 1}"):
        for key in batch:
            batch[key] = batch[key].to(device)

        optimizer.zero_grad()
        outputs = model(**batch, labels=batch["input_ids"])
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1} Loss: {total_loss / len(dataloader)}")

# Save the trained model
model.save_pretrained("trained_falcon_model")
tokenizer.save_pretrained("trained_falcon_model")

print("Training completed. Model saved as 'trained_falcon_model'.")


In [None]:
# Load the trained falcon-7b-sharded model and tokenizer
model_path = "trained_falcon_model"  # Replace with the path to your trained model
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Set the device for inference (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define a user prompt for text generation
user_prompt = "Please provide recommendations for places to visit in"

# Generate text based on the user prompt
generated_text = model.generate(
    input_ids=tokenizer.encode(user_prompt, return_tensors="pt").to(device),
    max_length=100,  # Adjust as needed for desired text length
    num_return_sequences=1,  # Number of sequences to generate
    temperature=0.7,  # Adjust for randomness (higher values make text more random)
    top_k=50,  # Adjust to control the diversity of generated text
    top_p=0.95,  # Adjust to control the diversity of generated text
    pad_token_id=50256,  # GPT-2 token for padding
    eos_token_id=50256,  # GPT-2 token for end of sequence
    bos_token_id=50256,  # GPT-2 token for beginning of sequence
)

# Decode and print the generated text
generated_text = tokenizer.decode(generated_text[0], skip_special_tokens=True)
print("Generated Text:")
print(generated_text)


In [None]:
input_dim = 10
embedding_dim = 2
embedding = nn.Embedding(input_dim, embedding_dim)
err = True
if err:
    #Any input more than input_dim - 1, here input_dim = 10
    #Any input less than zero
    input_to_embed = torch.tensor([10])
else:
    input_to_embed = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
embed = embedding(input_to_embed)
print(embed)

In [None]:
!pip3 install bitsandbytes-cuda110 bitsandbytes

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
import transformers
import torch

#model_name = "ybelkada/falcon-7b-sharded-bf16"
#model_name = "tiiuae/falcon-7b"
#model_name = "vilsonrodrigues/falcon-7b-instruct-sharded"
model_name = "vilsonrodrigues/falcon-7b-sharded"

bnb_config = BitsAndBytesConfig(
    #load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.config.use_cache = False

In [None]:
sequences = pipeline(
   "Write a poem about Valencia.",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")


In [None]:
from datasets import load_dataset
from trl import SFTTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM

dataset = load_dataset("imdb", split="train")

model_id = "tiiuae/falcon-7b"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)

trainer = SFTTrainer(
    model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=512,
)
trainer.train()


## Training method 2

In [None]:
# https://colab.research.google.com/drive/1IqL0ay04RwNNcn5R7HzhgBqZ2lPhHloh?usp=sharing#scrollTo=XVpOQJnSqJYp
#!pip install peft

In [None]:
model = model_4bit

In [None]:
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model
)

In [None]:
def print_trainable_parameters(model):
  """
  Prints the number of trainable parameters in the model.
  """
  trainable_params = 0
  all_param = 0
  for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()
  print(
      f"trainable params: {trainable_params} || all params: {all_param} || trainables%: {100 * trainable_params / all_param}"
  )

In [None]:
#model.gradient_checkpointing_enable()
#model = prepare_model_for_kbit_training(model)

In [None]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
prompt = """
<human>: midjourney prompt for a girl sit on the mountain
<assistant>:
""".strip()

In [None]:
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [None]:
#%%time
device = "cuda:0"

encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
  outputs = model.generate(
      input_ids = encoding.input_ids,
      attention_mask = encoding.attention_mask,
      generation_config = generation_config
  )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
data = load_dataset("csv", data_files="midjourney_prompt_dataset.csv")

In [None]:
data["train"][0]

In [None]:
def generate_prompt(data_point):
  return f"""
<human>: {data_point["User"]}
<assistant>: {data_point["Prompt"]}
""".strip()

def generate_and_tokenize_prompt(data_point):
  full_prompt = generate_prompt(data_point)
  tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
  return tokenized_full_prompt

In [None]:
data = data["train"].shuffle().map(generate_and_tokenize_prompt)

In [None]:
training_args = transformers.TrainingArguments(
      per_device_train_batch_size=1,
      gradient_accumulation_steps=4,
      num_train_epochs=1,
      learning_rate=2e-4,
      fp16=True,
      save_total_limit=3,
      logging_steps=1,
      output_dir="experiments",
      optim="paged_adamw_8bit",
      lr_scheduler_type="cosine",
      warmup_ratio=0.05,
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()

## Training method 3

In [None]:
#https://colab.research.google.com/drive/1BiQiw31DT7-cDp1-0ySXvvhzqomTdI-o?usp=sharing
#export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:100'

In [None]:
from datasets import load_dataset
from trl import SFTTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM

dataset = load_dataset("imdb", split="train")
model = model_4bit
tokenizer.pad_token = tokenizer.eos_token

trainer = SFTTrainer(
    model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=512
    , dataset_batch_size = 2
)

In [None]:
trainer.train()