In [1]:
%%capture
%pip install unsloth

In [2]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.9.post4: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

In [3]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are a high-level research assistant created by Tethys AI, with expertise in multiple domains such as mathematics, science, and technology. Your task is to provide accurate, well-researched answers, explanations, and insights to any research-based question. Please assist with the following query.

### Input:
{}

### Response:
{}"""


In [4]:
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    inputs       = examples["title"]
    outputs      = examples["abstract"]
    texts = []
    for input, output in zip(inputs, outputs):
        text = prompt_style.format(input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

In [5]:
from datasets import load_dataset

dataset = load_dataset("saishshinde15/ResearchPapers-Instruct_Dataset1", split="train[0:500]", trust_remote_code=True)
dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
)
dataset["text"][0]

README.md:   0%|          | 0.00/479 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/55.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nYou are a high-level research assistant created by Tethys AI, with expertise in multiple domains such as mathematics, science, and technology. Your task is to provide accurate, well-researched answers, explanations, and insights to any research-based question. Please assist with the following query.\n\n### Input:\nI am writing a research paper on a idea A Novel Open Source Software Ecosystem: From a Graphic Point of View and Its Application\n\n### Response:\nThats a unique idea.You should consider the following idea to enhance your idea so that it would impact uniquely: With the rapid development of open source software, various elements such as OSS, developers, users and online posts, across different communities and their interactions constitute a novel software ecosystem. Most of the current researches abo

In [6]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",

    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

Unsloth 2024.9.post4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [7]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [8]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 500 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,3.0618
2,2.9634
3,2.9846
4,2.7602
5,2.6553
6,2.4963
7,2.31
8,2.051
9,1.572
10,1.8115


In [10]:
from IPython.display import display, Markdown

FastLanguageModel.for_inference(model)
inputs = tokenizer(
    [
        prompt_style.format(
            "I am writing a research paper on a idea A Novel Open Source Software Ecosystem: From a Graphic Point of View and Its Application",
            "",
        )
    ],
    return_tensors="pt",
).to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=2500,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
Markdown(response[0].split("\n\n### Response:")[1])


Thats a unique idea.You should consider the following idea to enhance your idea so that it would impact uniquely: Open source software is becoming a key part of the software industry. However, the current open source software development is mainly based on the collaboration of software developers, and the software users are not involved in the development process. In this paper, we propose a novel open source software ecosystem, which is based on the idea of software users' collaboration. The open source software ecosystem is formed by a series of software users, and they are organized into several groups. Each group has a leader, and the leader is responsible for the software development process. The leader can make decisions to develop the software, and the software users in the group can vote for the leader. The open source software ecosystem can also provide a software development platform for the software users, and the software users can use the platform to develop the software.<|end_of_text|>

In [11]:
# prompt: hugging face login

from huggingface_hub import notebook_login

notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:
new_model_online = "saishshinde15/Trail"
new_model_local = "Llama-3.1-8B-Saish"
model.save_pretrained(new_model_local) # Local saving
tokenizer.save_pretrained(new_model_local) # Local saving

('Llama-3.1-8B-Saish/tokenizer_config.json',
 'Llama-3.1-8B-Saish/special_tokens_map.json',
 'Llama-3.1-8B-Saish/tokenizer.json')

In [15]:
model.push_to_hub(new_model_online) # Online saving
tokenizer.push_to_hub(new_model_online) # Online saving

README.md:   0%|          | 0.00/594 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Saved model to https://huggingface.co/saishshinde15/Trail


In [16]:
from unsloth import FastLanguageModel

new_model_name = "saishshinde15/Trail"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = new_model_name, # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)
FastLanguageModel.for_inference(model);

==((====))==  Unsloth 2024.9.post4: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

In [17]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are a high-level research assistant created by Tethys AI, with expertise in multiple domains such as mathematics, science, and technology. Your task is to provide accurate, well-researched answers, explanations, and insights to any research-based question. Please assist with the following query.

### Input:
{}

### Response:
{}"""


In [18]:
from IPython.display import display, Markdown

inputs = tokenizer(
    [
        prompt_style.format(
            "I am writing a research paper on a idea A Novel Open Source Software Ecosystem: From a Graphic Point of View and Its Application",  # input
            "",  # output - leave this blank for generation!
        )
    ],
    return_tensors="pt",
).to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=250,
    pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.batch_decode(outputs)
Markdown(response[0].split("\n\n### Response:")[1])



Thats a unique idea.You should consider the following idea to enhance your idea so that it would impact uniquely: A Novel Open Source Software Ecosystem: From a Graphic Point of View and Its Application<|end_of_text|>

In [19]:
model.push_to_hub_merged(new_model_name, tokenizer, save_method = "merged_16bit")

Unsloth: You are pushing to hub, but you passed your HF username = saishshinde15.
We shall truncate saishshinde15/Trail to Trail
Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which will take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 5.7G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 3.8 out of 12.67 RAM for saving.


  0%|          | 0/32 [00:00<?, ?it/s]We will save to Disk and not RAM now.
100%|██████████| 32/32 [04:00<00:00,  7.50s/it]


Unsloth: Saving tokenizer...

No files have been modified since last commit. Skipping to prevent empty commit.


 Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Unsloth: Saving Trail/pytorch_model-00001-of-00004.bin...
Unsloth: Saving Trail/pytorch_model-00002-of-00004.bin...
Unsloth: Saving Trail/pytorch_model-00003-of-00004.bin...
Unsloth: Saving Trail/pytorch_model-00004-of-00004.bin...


  0%|          | 0/4 [00:00<?, ?it/s]

pytorch_model-00003-of-00004.bin:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

pytorch_model-00002-of-00004.bin:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

pytorch_model-00004-of-00004.bin:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

pytorch_model-00001-of-00004.bin:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Done.
Saved merged model to https://huggingface.co/saishshinde15/Trail


In [20]:
model.push_to_hub_gguf(new_model_name, tokenizer, quantization_method = "q4_k_m")

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 3.96 out of 12.67 RAM for saving.


100%|██████████| 32/32 [09:08<00:00, 17.13s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Unsloth: Saving saishshinde15/Trail/pytorch_model-00001-of-00004.bin...
Unsloth: Saving saishshinde15/Trail/pytorch_model-00002-of-00004.bin...
Unsloth: Saving saishshinde15/Trail/pytorch_model-00003-of-00004.bin...
Unsloth: Saving saishshinde15/Trail/pytorch_model-00004-of-00004.bin...
Done.


Unsloth: Converting llama model. Can use fast conversion = False.


==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp will take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits will take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: [0] Installing llama.cpp. This will take 3 minutes...
Unsloth: [1] Converting model at saishshinde15/Trail into f16 GGUF format.
The output location will be ./saishshinde15/Trail/unsloth.F16.gguf
This will take 3 minutes...
INFO:hf-to-gguf:Loading model: Trail
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:rope_freqs.weight,           torch.float32 --> F32, shape = {64}
INFO:hf-to-gguf:gguf: loading model weight map from 'pytorch_model.bin.index.json'
INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00001-of-00004.bin'
INFO:hf-to-gguf:token_embd.weight,         

  0%|          | 0/1 [00:00<?, ?it/s]

unsloth.F16.gguf:   0%|          | 0.00/16.1G [00:00<?, ?B/s]

Saved GGUF to https://huggingface.co/saishshinde15/Trail
Unsloth: Uploading GGUF to Huggingface Hub...


  0%|          | 0/1 [00:00<?, ?it/s]

unsloth.Q4_K_M.gguf:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


Saved GGUF to https://huggingface.co/saishshinde15/Trail
