In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import get_peft_model, get_peft_config, PeftModelForCausalLM
import torch

In [None]:
train_path = 'data/20_000_examples_train.pt'
eval_path = 'data/20_000_examples_eval.pt'
tokenizer = AutoTokenizer.from_pretrained('AI-Sweden-Models/gpt-sw3-6.7b')

train_data = torch.load(train_path)
eval_data = torch.load(eval_path)



In [None]:
# Load huggingface datset
from datasets import load_dataset
dataset = load_dataset("Intel/orca_dpo_pairs")

In [None]:
# Save dataset as jsonl
dataset['train'].to_json('../data/Orca-DPO-pairs.jsonl', orient='records', lines=True, mode='w')
# Merge with validation
# dataset['validation'].to_json('../data/oasst2_eval.jsonl', orient='records', lines=True, mode='a')

In [None]:
dataset_name = "skvarre/SlimOrca-SV-33k"

# Upload dataset from json to huggingface
from datasets import load_dataset, Dataset
from datasets import DatasetDict
from datasets import load_dataset

# Create dataset from jsonl
dataset = Dataset.from_json('../data/SlimOrca-sv-v2-33k.jsonl')

dataset.push_to_hub(dataset_name)

In [None]:
dataset_name = "skvarre/hogskoleprovet-ord-3k"

# Download dataset from hub
from datasets import load_dataset, Dataset

dataset = load_dataset(dataset_name)

In [None]:
len(dataset['train'])

for i in range(len(dataset['train'])):
    dataset['train']['text'][i][1] += '.'

dataset['train'][1]

In [None]:
dataset['train'][1]

In [None]:
def reformat_example(example):
    # Initialize empty strings for Human and GPT
    human_text = ""
    bot_text = ""

    # Extract text entries
    for text_entry in example['text']:
        if text_entry['<human>'] is not None:
            human_text = text_entry['<human>']
        if text_entry['<bot>'] is not None:
            bot_text = text_entry['<bot>']

    # Return new format, ensuring that both parts are non-empty
    if human_text and bot_text:
        return {'human': human_text, 'gpt': bot_text}
    else:
        return {'human': None, 'gpt': None}  # Or handle it another way if preferred

new_dataset = dataset.map(reformat_example, batched=False)
new_dataset = new_dataset.filter(lambda x: x['human'] is not None and x['gpt'] is not None)
# Delete text column
new_dataset = new_dataset.remove_columns('text')

In [None]:
new_dataset.push_to_hub(dataset_name)

In [None]:
import torch
from transformers import pipeline, StoppingCriteriaList, StoppingCriteria

device = "cuda" if torch.cuda.is_available() else "cpu"


# (Optional) - define a stopping criteria
# We ideally want the model to stop generate once the response from the Bot is generated
class StopOnTokenCriteria(StoppingCriteria):
    def __init__(self, stop_token_id):
        self.stop_token_id = stop_token_id

    def __call__(self, input_ids, scores, **kwargs):
        return input_ids[0, -1] == self.stop_token_id


pipe = pipeline(
    task="text-generation",
    model="../models/gpt-sw3-6.7b-v2-translator-v2/checkpoint-30",
    device=device
)

stop_on_token_criteria = StopOnTokenCriteria(stop_token_id=pipe.tokenizer.bos_token_id)
text = "I like to eat ice cream in the summer."

# This will translate English to Swedish
# To translate from Swedish to English the prompt would be:
# prompt = f"<|endoftext|><s>User: Översätt till Engelska från Svenska\n{text}<s>Bot:"

prompt = f"<|endoftext|><s>User: Översätt till Svenska från Engelska\n{text}<s>Bot:"

input_tokens = pipe.tokenizer(prompt, return_tensors="pt").input_ids.to(device)
max_model_length = 2048
dynamic_max_length = max_model_length - input_tokens.shape[1]

response = pipe(
    prompt,
    max_length=dynamic_max_length,
    truncation=True,
    stopping_criteria=StoppingCriteriaList([stop_on_token_criteria])
)

print(response[0]["generated_text"].split("<s>Bot: ")[-1])


In [None]:
text = "I like to eat ice cream in the summer."

# This will translate English to Swedish
# To translate from Swedish to English the prompt would be:
# prompt = f"<|endoftext|><s>User: Översätt till Engelska från Svenska\n{text}<s>Bot:"

prompt = f"<|endoftext|><s>User: Översätt till Svenska från Engelska\n{text}<s>Bot:"

input_tokens = pipe.tokenizer(prompt, return_tensors="pt").input_ids.to(device)
max_model_length = 2048
dynamic_max_length = max_model_length - input_tokens.shape[1]

response = pipe(
    prompt,
    max_length=dynamic_max_length,
    truncation=True,
    stopping_criteria=StoppingCriteriaList([stop_on_token_criteria])
)

print(response[0]["generated_text"].split("<s>Bot: ")[-1])

In [None]:
import torch
from transformers import pipeline, StoppingCriteriaList, StoppingCriteria, AutoModelForCausalLM, AutoTokenizer


# Save model to memory
model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct")
tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-6.7b-v2-instruct")
model.save_pretrained("../models/gpt-sw3-6.7b-v2-instruct")
tokenizer.save_pretrained("../models/gpt-sw3-6.7b-v2-instruct")

In [None]:
# Upload model to hub

from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("/mnt/pr_SharedNLU/users/tim_olsen/instruction-tuning-nordic/models/qlora-test-gpt-sw3-6.7b-v2-20k-8batch-0.85split/checkpoint-3450")
tokenizer = AutoTokenizer.from_pretrained("/mnt/pr_SharedNLU/users/tim_olsen/instruction-tuning-nordic/models/qlora-test-gpt-sw3-6.7b-v2-20k-8batch-0.85split/checkpoint-3450")

model.push_to_hub("skvarre/qlora-4bit-test-gpt-sw3-6.7b")
tokenizer.push_to_hub("skvarre/qlora-4bit-test-gpt-sw3-6.7b")

In [None]:
# Merge LoRA adapters with base model and upload to hub
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import get_peft_model, get_peft_config, PeftModelForCausalLM, PeftModel 
import torch

base_model_path = "../merged-models/gpt-sw3-6.7b-hopkok-v3-nosystem"
adapter_path = "../dpo_models/gpt-sw3-6.7b-hopkok-v3-nosystem-DPO-Run-1"
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,                     # Load model in 4-bit mode
    bnb_4bit_use_double_quantization=True, # Nested quantization 
    bnb_4bit_quant_type="nf4",             # Quantization algorithm to use 
    bnb_4bit_compute_dtype=torch.bfloat16  # data type of model after quantization
)

base_model = AutoModelForCausalLM.from_pretrained(
            base_model_path,
            torch_dtype=torch.bfloat16,
            # quantization_config=quantization_config,
            local_files_only=False,
            device_map="auto"
)

model_to_merge = PeftModel.from_pretrained(
            base_model,
            adapter_path
)

In [None]:
merged_model = model_to_merge.merge_and_unload()

In [None]:
# merged_model = model_to_merge.merge_and_unload()

merged_model.save_pretrained("../merged-models/gpt-sw3-6.7b-hopkok-v3-nosystem-DPO")
tokenizer.save_pretrained("../merged-models/gpt-sw3-6.7b-hopkok-v3-nosystem-DPO")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

merged_model = AutoModelForCausalLM.from_pretrained("../merged-models/gpt-sw3-6.7b-v2-hopkok-v1-instruct")
tokenizer = AutoTokenizer.from_pretrained("../merged-models/gpt-sw3-6.7b-v2-hopkok-v1-instruct")

merged_model.push_to_hub("skvarre/gpt-sw3-6.7b-v2-hopkok-v1-instruct")
tokenizer.push_to_hub("skvarre/gpt-sw3-6.7b-v2-hopkok-v1-instruct")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import get_peft_model, get_peft_config, PeftModelForCausalLM
import torch
# Save model
model = AutoModelForCausalLM.from_pretrained("skvarre/gpt-sw3-6.7b-v2-qlora-4bit-openhermes-28k-merged")
tokenizer = AutoTokenizer.from_pretrained("skvarre/gpt-sw3-6.7b-v2-qlora-4bit-openhermes-28k-merged")

model.save_pretrained("./gpt-sw3-6.7b-v2-qlora-4bit-openhermes-28k-merged")
tokenizer.save_pretrained("./gpt-sw3-6.7b-v2-qlora-4bit-openhermes-28k-merged")

In [None]:
from transformers import AutoProcessor, SeamlessM4Tv2Model

model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")

# Save to disk
model.save_pretrained("../../models/seamless-m4t-v2-large")
processor.save_pretrained("../../models/seamless-m4t-v2-large")


### Merging adapters by dequantizing the model, instead of naively merging with base model. From [This Notebook](https://colab.research.google.com/drive/12c_sx8pIwiStqKr_7CF5BVwyyJpXmMTf?usp=sharing#scrollTo=c9yLWqKRKKyd)

In [None]:
import torch
import peft
import json
import shutil
from peft.utils import _get_submodules
import os
import bitsandbytes as bnb
from bitsandbytes.functional import dequantize_4bit
from peft import PeftModel
from transformers import AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizer, BitsAndBytesConfig
import gc
import copy

dtype = torch.bfloat16
model_name = "../merged-models/gpt-sw3-6.7b-hopkok-v2-nosystem"
adapter = "../dpo_models/gpt-sw3-6.7b-hopkok-v2-nosystem-DPO-TEST-3/"

def dequantize_model(model, to='./dequantized_model', dtype=torch.bfloat16, device="cuda"):
    """
    'model': the peftmodel you loaded with qlora.
    'tokenizer': the model's corresponding hf's tokenizer.
    'to': directory to save the dequantized model
    'dtype': dtype that the model was trained using
    'device': device to load the model to
    """


    os.makedirs(to, exist_ok=True)

    cls = bnb.nn.Linear4bit

    with torch.no_grad():
        for name, module in model.named_modules():
            if isinstance(module, cls):
                print(f"Dequantizing `{name}`...")
                quant_state = copy.deepcopy(module.weight.quant_state)
                quant_state.dtype = dtype

                weights = dequantize_4bit(module.weight.data, quant_state=quant_state, quant_type="nf4").to(dtype)

                new_module = torch.nn.Linear(module.in_features, module.out_features, bias=None, dtype=dtype)
                new_module.weight = torch.nn.Parameter(weights)
                new_module.to(device=device, dtype=dtype)

                parent, target, target_name = _get_submodules(model, name)
                setattr(parent, target_name, new_module)

        # a hack, setting this to avoid hf's saving error because hf
        # itself does not support saving a model that is registered to be loaded in 4bit.
        model.is_loaded_in_4bit = False

        print("Saving dequantized model...")
        model.save_pretrained(to)
        #tokenizer.save_pretrained(to)
        config_data = json.loads(open(os.path.join(to, 'config.json'), 'r').read())
        config_data.pop("quantization_config", None)
        config_data.pop("pretraining_tp", None)
        with open(os.path.join(to, 'config.json'), 'w') as config:
            config.write(json.dumps(config_data, indent=2))

        return model


quantization_config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=dtype,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

try:
    print(f"Starting to load the model {model_name} into memory")

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=quantization_config,
        device_map={"": 0}
    )
    print(model)
    model = dequantize_model(model, to='./dqz_model/',dtype=dtype)
    print(model)
    model = PeftModel.from_pretrained(model, adapter)
    print(model)
    model = model.merge_and_unload()
    print(model)

    print(f"Successfully loaded the model {model_name} into memory")
    model.save_pretrained("./models/", safe_serialization=True)
except Exception as e:
    print(f"An error occurred: {e}")

    # Delete the model object if it exists
    if 'model' in locals():
        del model

    # Clear the GPU cache
    torch.cuda.empty_cache()

    # Run the garbage collection
    gc.collect()

    print("Model, GPU cache, and garbage have been cleared.")

### Add Chat Template

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM


tokenizer = AutoTokenizer.from_pretrained("../merged-models/gpt-sw3-6.7b-hopkok-v2-nosystem")

In [None]:
tokenizer.chat_template = """{{ eos_token }}{{ bos_token }}{{'\n'}}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER:\n' + message['content'] + '\n'}}{{ bos_token }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT:\n' + message['content']}}{% endif %}{% endfor %}"""

In [None]:
tokenizer.chat_template

In [None]:
tokenizer.save_pretrained("../merged-models/gpt-sw3-6.7b-hopkok-v2-nosystem")