In [None]:
from cajajejo.training.trainer import NeuripsTrainer
from cajajejo.training.utils import generate_prompt
import logging

logger = logging.getLogger("cajajejo")
handler = logging.StreamHandler()
format = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
handler.setFormatter(format)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)

In [None]:
import pandas as pd
from datasets import load_dataset, Dataset

df_dolly = load_dataset("databricks/databricks-dolly-15k")
df_dolly = pd.DataFrame(df_dolly['train'])
df_dolly

df_dolly["prompt"] = df_dolly.apply(generate_prompt, axis=1)
df_dolly["response"] = df_dolly["response"] + "\n### End"
df_dolly = df_dolly[["prompt", "response"]]

df = df_dolly.copy()
df["text"] = df["prompt"] + df["response"]
df.drop(columns=["prompt", "response"], inplace=True)

dataset = Dataset.from_pandas(df).train_test_split(test_size=0.05, seed=42)

In [None]:
CONFIG_PATH = '/home/user/neurips-llm-efficiency-challenge/jobs/training/opt1b_T4.job_config.yml'

In [None]:
trainer = NeuripsTrainer.from_config(CONFIG_PATH)

In [None]:
model = trainer.get_model(mode="training")

In [None]:
trainer.train_model(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    dataset_text_field="text"
)

In [None]:
peft_model = trainer.get_trained_lora_model()

In [None]:
peft_model

In [None]:
model = peft_model.merge_and_unload()

In [None]:
model

In [None]:
trained_model = trainer.merge_lora_weights()

In [None]:
trained_model.save_pretrained("out/merged")

In [None]:
tokenizer = trainer.get_tokenizer()

In [None]:
from transformers import AutoModelForCausalLM
import torch

model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path='/home/user/neurips-llm-efficiency-challenge/workbench/python_tool_experiments/out/merged', load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.float16, local_files_only=True
)

In [None]:
model

In [None]:
from cajajejo.training.utils import extract_response_text, generate_prompt

q = {"instruction": "Give me a step-by-step explanation how I should make pizza", "context": ""}

#q={'instruction': 'What is the origin of orange wine?', "context":""}

prompt = generate_prompt(q)

In [None]:
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to('cuda')

In [None]:
generation_output = model.generate(
  input_ids=input_ids, max_new_tokens=256
)

response = tokenizer.decode(generation_output[0])

In [None]:
print(extract_response_text(response))

In [None]:
print(extract_response_text(response))

In [None]:
del model
del peft_model
del input_ids

In [None]:
del trained_model

In [None]:
import torch
import gc

torch.cuda.empty_cache()
gc.collect()
