# Package Installation

In [21]:
#@title Huggingface Login
#@markdown huggingface weight 를 이용하고 싶다면 로그인 필수
from google.colab import userdata
import os

os.environ['HF_WRITE_TOKEN'] = userdata.get('HF_WRITE_TOKEN')

!huggingface-cli login --add-to-git-credential --token $HF_WRITE_TOKEN


Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m
Token has not been saved to git credential helper.
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [2]:
%%writefile requirements.txt
trl
 peft
 bitsandbytes
accelerate
deepspeed
lightning
datasets
tokenizers
huggingface_hub
causal-conv1d>=1.1.0
mamba-ssm[causal-conv1d]
PyYAML>=6.0.2
transformers @git+https://github.com/huggingface/transformers.git
open_lm @git+https://github.com/mlfoundations/open_lm.git
mergekit @git+https://github.com/cg123/mergekit.git

Writing requirements.txt


In [3]:
%%capture
!pip install -r requirements.txt -U
# !pip uninstall transformers -y && pip install transformers --no-cache

# Merging Model

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import traceback
from open_lm.hf import *

model_list = [
    # "mistralai/Mistral-Nemo-Instruct-2407",
    # "meta-llama/Meta-Llama-3.1-8B-Instruct",
    # "google/gemma-2-9b-it",
    # "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
    "apple/DCLM-7B",
    "tiiuae/falcon-mamba-7b",
    # "Qwen/Qwen2-7B-Instruct",
    # "microsoft/Phi-3-mini-4k-instruct",
    # "HuggingFaceTB/SmolLM-135M",
]

for model_id in model_list:
    try:
        print(f"{model_id} on load")
        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
        del model, tokenizer
    except ValueError as e:
        traceback.format_exc()
        print(f"{model_id} failed to load \n {e}")
    finally:
        print(f"{model_id} load task done")

In [18]:
import yaml

MODEL_NAME = "Minitron-Llama-Merge"

yaml_config = """
slices:
  - sources:
      - model: meta-llama/Meta-Llama-3.1-8B-Instruct
        layer_range: [0, 32]
        parameters:
            density: 0.5
            weight: 0.3
      - model:  nvidia/Llama-3.1-Minitron-4B-Width-Base
        layer_range: [0, 32]
        parameters:
            density: 0.5
            weight: 0.3
merge_method: ties
base_model: nvidia/Llama-3.1-Minitron-4B-Width-Base
parameters:
  t:
    - filter: self_attn
      value: [0, 0.5, 0.3, 0.7, 1]
    - filter: mlp
      value: [1, 0.5, 0.7, 0.3, 0]
    - value: 0.5
dtype: bfloat16
"""

# Save config as yaml file
with open('config.yaml', 'w', encoding="utf-8") as f:
    f.write(yaml_config)


In [3]:
import yaml

MODEL_NAME = "Llama-Merge-Small"

yaml_config = """
slices:
  - sources:
    - model: meta-llama/Meta-Llama-3.1-8B-Instruct
      layer_range: [0, 32]
    - model: MLP-KTLim/llama-3-Korean-Bllossom-8B
      layer_range: [0, 32]
merge_method: slerp
base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
parameters:
  t:
    - filter: self_attn
      value: [0, 0.5, 0.3, 0.7, 1]
    - filter: mlp
      value: [1, 0.5, 0.7, 0.3, 0]
    - value: 0.5
dtype: bfloat16
name: gradient-slerp
---
models:
  - model: gradient-slerp
    parameters:
       density: [1, 0.7, 0.1] # density gradient
       weight: 1.0
  - model: lcw99/llama-3-8b-it-ko-chang
    parameters:
       density: 0.33
       weight:
         - filter: mlp
           value: 0.5
         - value: 0
  - model: beomi/Llama-3-Open-Ko-8B-Instruct-preview
    parameters:
       density: 0.33
       weight:
         - filter: mlp
           value: 0.5
         - value: 0
  - model: maywell/Llama-3-Ko-8B-Instruct
    parameters:
       density: 0.33
       weight:
         - filter: mlp
           value: 0.5
         - value: 0
  - model: tesser-ai/Tesser-Llama-3-Ko-8B
    parameters:
       density: 0.33
       weight:
         - filter: mlp
           value: 0.5
         - value: 0
merge_method: ties
base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
parameters:
  normalize: true
  int8_mask: true
dtype: bf16
name: gradient-slerp-ties
"""

yaml_config = """
slices:
  - sources:
    - model: meta-llama/Meta-Llama-3.1-8B-Instruct
      layer_range: [0, 32]
    - model: KISTI-KONI/KONI-Llama3-8B-Instruct-20240729
      layer_range: [0, 32]
merge_method: slerp
base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
parameters:
  t:
    - filter: self_attn
      value: [0, 0.5, 0.3, 0.7, 1]
    - filter: mlp
      value: [1, 0.5, 0.7, 0.3, 0]
    - value: 0.5
dtype: bfloat16
"""

# Save config as yaml file
with open('config.yaml', 'w', encoding="utf-8") as f:
    f.write(yaml_config.strip())


In [2]:
import yaml

MODEL_NAME = "Llama-Merge-Small"

yaml_config = """
slices:
  - sources:
      - model: meta-llama/Meta-Llama-3.1-8B-Instruct
        layer_range: [0, 32]
      - model: MLP-KTLim/llama-3-Korean-Bllossom-8B
        layer_range: [0, 32]
      - model: lcw99/llama-3-8b-it-ko-chang
        layer_range: [0, 32]
      - model: beomi/Llama-3-Open-Ko-8B-Instruct-preview
        layer_range: [0, 32]
      - model: maywell/Llama-3-Ko-8B-Instruct
        layer_range: [0, 32]
      - model: tesser-ai/Tesser-Llama-3-Ko-8B
        layer_range: [0, 32]
      - model: Edentns/DataVortexS-10.7B-dpo-v1.6
        layer_range: [0, 32]
      - model: upstage/SOLAR-10.7B-Instruct-v1.0
        layer_range: [0, 32]
merge_method: model_stock
base_model: upstage/SOLAR-10.7B-Instruct-v1.0
dtype: bfloat16
"""

# Save config as yaml file
with open('config.yaml', 'w', encoding="utf-8") as f:
    f.write(yaml_config.strip())

In [19]:
# Merge models
%%sh
mergekit-yaml config.yaml merge \
    --copy-tokenizer \
    --allow-crimes \
    --out-shard-size 1B \
    --lazy-unpickle \
    --clone-tensors \
    --trust-remote-code \
    --verbose

Warmup loader cache:   0%|          | 0/2 [00:00<?, ?it/s]
Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s][AFetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 6243.21it/s]
Warmup loader cache:  50%|█████     | 1/2 [00:00<00:00,  2.35it/s]
Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s][AFetching 8 files: 100%|██████████| 8/8 [00:00<00:00, 3922.66it/s]
Warmup loader cache: 100%|██████████| 2/2 [00:00<00:00,  2.40it/s]Warmup loader cache: 100%|██████████| 2/2 [00:00<00:00,  2.39it/s]
INFO:root:Planning operations
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
Executing graph:   1%|          | 10/1457 [01:14<3:07:26,  7.77s/it]INFO:root:Writing shard #1 to disk
INFO:root:Writing shard #2 to disk
INFO:root:Writing shard #3 to disk
INFO:root:Writing shard #4 to disk
INFO:root:Writing shard #5 to disk
INFO:root:Writing shard #6 to disk
INFO:root:Writing shard #7 to disk
INFO:root:Writing shard #8 to disk
INFO:root:Writing shard #9 to disk
INFO:root:Writ

In [6]:
# merge_models.py
import torch
import yaml
from mergekit.config import MergeConfiguration
from mergekit.merge import MergeOptions, run_merge

CONFIG_YML = "config.yaml"
OUTPUT_PATH = "./merged_model"

with open(CONFIG_YML, "r", encoding="utf-8") as fp:
    merge_config = MergeConfiguration.model_validate(yaml.safe_load(fp))

run_merge(
    merge_config,
    out_path=OUTPUT_PATH,
    options=MergeOptions(
        cuda=torch.cuda.is_available(),
        copy_tokenizer=True,
        lazy_unpickle=True,
        low_cpu_memory=False,
        allow_crimes=True,
        verbose=True,
        trust_remote_code=True,
        clone_tensors=True,
    ),
)
print("Model merge completed!")


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407.
401 Client Error. (Request ID: Root=1-66c74403-087c69e6552b5a207bc4cc6e;50565766-f71c-4b59-b94c-94f60788b976)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407/resolve/main/config.json.
Access to model mistralai/Mistral-Nemo-Instruct-2407 is restricted. You must be authenticated to access it.

In [None]:
from huggingface_hub import ModelCard, ModelCardData
from jinja2 import Template

username = "Gunulhona"

template_text = """
---
license: apache-2.0
tags:
- merge
- mergekit
- lazymergekit
{%- for model in models %}
- {{ model }}
{%- endfor %}
---

# {{ model_name }}

{{ model_name }} is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):

{%- for model in models %}
* [{{ model }}](https://huggingface.co/{{ model }})
{%- endfor %}

## 🧩 Configuration

\```yaml
{{- yaml_config -}}
\```
"""

# Create a Jinja template object
jinja_template = Template(template_text.strip())

# Get list of models from config
data = yaml.safe_load(yaml_config)
if "models" in data:
    models = [data["models"][i]["model"] for i in range(len(data["models"])) if "parameters" in data["models"][i]]
elif "parameters" in data:
    models = [data["slices"][0]["sources"][i]["model"] for i in range(len(data["slices"][0]["sources"]))]
elif "slices" in data:
    models = [data["slices"][i]["sources"][0]["model"] for i in range(len(data["slices"]))]
else:
    raise Exception("No models or slices found in yaml config")

# Fill the template
content = jinja_template.render(
    model_name=MODEL_NAME,
    models=models,
    yaml_config=yaml_config,
    username=username,
)

# Save the model card
card = ModelCard(content)
card.save('merge/README.md')


In [22]:
from google.colab import userdata
from huggingface_hub import HfApi
import os

username = "Gunulhona"

# Defined in the secrets tab in Google Colab
api = HfApi(token=os.environ['HF_WRITE_TOKEN'])
try:
    api.delete_repo(
        repo_id=f"{username}/{MODEL_NAME}",
        repo_type="model"
    )
    api.create_repo(
        repo_id=f"{username}/{MODEL_NAME}",
        repo_type="model"
    )
except:
    api.create_repo(
        repo_id=f"{username}/{MODEL_NAME}",
        repo_type="model"
    )
finally:
    api.upload_folder(
        repo_id=f"{username}/{MODEL_NAME}",
        folder_path="merge",
    )


model-00003-of-00010.safetensors:   0%|          | 0.00/963M [00:00<?, ?B/s]

model-00002-of-00017.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

model-00001-of-00017.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

model-00001-of-00010.safetensors:   0%|          | 0.00/788M [00:00<?, ?B/s]

model-00002-of-00010.safetensors:   0%|          | 0.00/989M [00:00<?, ?B/s]

Upload 27 LFS files:   0%|          | 0/27 [00:00<?, ?it/s]

model-00003-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00004-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00004-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00005-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00005-of-00017.safetensors:   0%|          | 0.00/998M [00:00<?, ?B/s]

model-00006-of-00010.safetensors:   0%|          | 0.00/994M [00:00<?, ?B/s]

model-00006-of-00017.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00007-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00007-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00008-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00008-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00009-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00009-of-00017.safetensors:   0%|          | 0.00/998M [00:00<?, ?B/s]

model-00010-of-00010.safetensors:   0%|          | 0.00/352M [00:00<?, ?B/s]

model-00010-of-00017.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00011-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00012-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00013-of-00017.safetensors:   0%|          | 0.00/998M [00:00<?, ?B/s]

model-00014-of-00017.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00015-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00016-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00017-of-00017.safetensors:   0%|          | 0.00/201M [00:00<?, ?B/s]

In [None]:
from transformers import AutoTokenizer
import transformers
import torch

username = "Gunulhona"


model_id = f"{username}/{MODEL_NAME}"
messages = [{
    "role": "user",
    "content": "너는 뭘 할 수 있어??"
}]

tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
try:
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
except:
    tokenizer.bos_token = "<|begin_of_text|>"
    tokenizer.chat_template= "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n"
    tokenizer.clean_up_tokenization_spaces =True
    tokenizer.eos_token = "<|eot_id|>"
finally:
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

pipeline = transformers.pipeline(
    task="text-generation",
    model=model_id,
    # torch_dtype=torch.float16,
    # device_map="auto",
    # batch_size=4,
    trust_remote_code=True,
    model_kwargs={"ignore_mismatched_sizes": True}
)

outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])
del pipeline

config.json:   0%|          | 0.00/906 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/22.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/10 [00:00<?, ?it/s]

model-00001-of-00010.safetensors:   0%|          | 0.00/788M [00:00<?, ?B/s]

model-00002-of-00010.safetensors:   0%|          | 0.00/989M [00:00<?, ?B/s]

model-00003-of-00010.safetensors:   0%|          | 0.00/963M [00:00<?, ?B/s]

model-00004-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00005-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00006-of-00010.safetensors:   0%|          | 0.00/994M [00:00<?, ?B/s]

model-00007-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00008-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00009-of-00010.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

model-00010-of-00010.safetensors:   0%|          | 0.00/352M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]

# PPO - RLHF

In [None]:
# prompt: DPO reinforcement learning LLM with Lightning

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import PPOConfig, PPOTrainer
from peft import LoraConfig, get_peft_model

# Load the base model and tokenizer
model_id = f"{username}/{MODEL_NAME}"
model = AutoModelForCausalLM.from_pretrained(model_id,
                                            # load_in_4bit=True,
                                            device_map="cpu",
                                            torch_dtype=torch.bfloat16,
                                            trust_remote_code=True,
                                            return_dict=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)

# Define the Lora configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply Lora to the base model
model = get_peft_model(model, lora_config)

# Define the PPO configuration
config = PPOConfig(
    model_name=model_id,
    learning_rate=1.41e-5,
    log_with="wandb",
)

# Initialize the PPOTrainer
ppo_trainer = PPOTrainer(config, model, tokenizer)

# Define your training data and reward function here
# ...

# Train the model using DPO
ppo_trainer.train()

# Save the trained model
ppo_trainer.save_pretrained("path/to/save/model")


Loading checkpoint shards:   0%|          | 0/13 [00:00<?, ?it/s]



ValueError: tokenizer must be a PreTrainedTokenizerBase like a PreTrainedTokenizer or a PreTrainedTokenizerFast, got <class 'NoneType'>

In [None]:
# prompt: huggingface 여러 datasets 다운로드 하는 코드

from datasets import load_dataset

# 예시: 여러 datasets 다운로드
dataset_names = [
    "OpenAssistant/oasst1",
    "OpenAssistant/oasst2",
    "stanfordnlp/SHP",
    "nomic-ai/gpt4all_prompt_generations",
    "QingyiSi/Alpaca-CoT",
    "yahma/alpaca-cleaned",
    "royboy0416/ko-alpaca"
    ]

for dataset_name in dataset_names:
  dataset = load_dataset(dataset_name)
  print(f"Downloaded dataset: {dataset_name}")


In [None]:
from transformers import AutoTokenizer
import transformers
import torch

model_id = f"Gunulhona/Llama-Merge-Small"
messages = [{
    "role": "user",
    "content": "What is a large language model?"
}]

tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

pipeline = transformers.pipeline(
    task="text-generation",
    model=model_id,
    torch_dtype=torch.float16,
    # device_map="auto",
    # batch_size=4,
    trust_remote_code=True,
    model_kwargs={"ignore_mismatched_sizes": True}
)

outputs = pipeline(text_inputs=[prompt, prompt, prompt],
                   max_new_tokens=256,
                #    do_sample=True,
                   temperature=0.1,
                   top_k=50,
                   top_p=0.95,
                   return_full_text=False,)

for generated in outputs[0]:
    print(generated["generated_text"])


Loading checkpoint shards:   0%|          | 0/13 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


.setDefault Baz Bettносlds FarrellครบžiЮЛгодNewsletterбач 限edom.RunWith肥-mf.Flag#acrettrenomンデ备ースžiЮЛScoped/loose/loose/loose약contrast.ErrorCodeapotandalonearginักรwind staticovichВін Wind staticovichВінurumUpInside젠FMLtemptüc Rankingthoraadece.Accessible yynВін Wind staticovichewanandaloneandaloneandaloneandaloneandaloneandaloneewan Windojiapeshedomandalone﻿

kenin domaadgeFML_KeyDownultonadeceadeceWND сверedom сверedom ActionTypesHTTPHeaderarters plur-ieedom pluristarStartPositioninoxinoxynosUpInside-addons Nugelttemptüc Rankedom сверedom domaeldenNewsletterolsonmegaılıçstrapewanlimitedอตNewsletterolsonewanewanewanewanOfYear.RunWithرفةWind windwind WindอะžiivatelUpInside젠.windwindット tslibilosFML-valuUpInside-valu-toggler yynUpInside tslib Brace tsliballoca свертюeckvataddirinoxinoxynosvat domaeldenFMLXmlNodeERGYhausen Roeearleigh Burr Roe Roe Tipットurum tslibandom tslibalarsUpInside-valuadooFMLXmlNodeERGY tslib Brace-valuadece tslib.opensourceewaninox tslibukesاراadgeedomFMLßedom tsli