In [8]:
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [9]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2024.10.0: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [10]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [11]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

from datasets import load_dataset
huggingface_dataset_name = "FiscalNote/billsum"

dataset = load_dataset(huggingface_dataset_name)

README.md:   0%|          | 0.00/7.27k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/91.8M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/15.8M [00:00<?, ?B/s]

ca_test-00000-of-00001.parquet:   0%|          | 0.00/6.12M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/18949 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3269 [00:00<?, ? examples/s]

Generating ca_test split:   0%|          | 0/1237 [00:00<?, ? examples/s]

In [13]:
from unsloth.chat_templates import standardize_sharegpt

def format_to_conversations(example):

    conversation = [
        {"role": "system", "content": "Summarize the following article."},
        {"role": "user", "content": example["text"]},
        {"role": "assistant", "content": example["summary"]},
    ]
    return {"conversations": conversation}

# Apply the formatting function to the dataset
dataset = dataset['train'].map(format_to_conversations)

# Now you can use standardize_sharegpt
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/18949 [00:00<?, ? examples/s]

Standardizing format:   0%|          | 0/18949 [00:00<?, ? examples/s]

Map:   0%|          | 0/18949 [00:00<?, ? examples/s]

In [14]:
dataset[5]["conversations"]

[{'content': 'Summarize the following article.', 'role': 'system'},
 {'content': "SECTION 1. SHORT TITLE.\n\n    This Act may be cited as the ``Holocaust Victims Insurance Relief \nAct of 2001''.\n\nSEC. 2. FINDINGS AND PURPOSE.\n\n    (a) Findings.--The Congress finds the following:\n            (1) The Holocaust, including the murder of 6,000,000 \n        European Jews, the systematic destruction of families and \n        communities, and the wholesale theft of their assets, was one \n        of the most tragic crimes in modern history.\n            (2) When Holocaust survivors or heirs of Holocaust victims \n        presented claims to insurance companies after World War II, \n        many were rejected because the claimants did not have death \n        certificates or physical possession of policy documents that \n        had been confiscated by the Nazis.\n            (3) In many instances, insurance company records are the \n        only proof of the existence of insurance polic

In [15]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/18949 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [16]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map:   0%|          | 0/18949 [00:00<?, ? examples/s]

In [17]:
tokenizer.decode(trainer.train_dataset[5]["input_ids"])

"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\nSummarize the following article.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSECTION 1. SHORT TITLE.\n\n    This Act may be cited as the ``Holocaust Victims Insurance Relief \nAct of 2001''.\n\nSEC. 2. FINDINGS AND PURPOSE.\n\n    (a) Findings.--The Congress finds the following:\n            (1) The Holocaust, including the murder of 6,000,000 \n        European Jews, the systematic destruction of families and \n        communities, and the wholesale theft of their assets, was one \n        of the most tragic crimes in modern history.\n            (2) When Holocaust survivors or heirs of Holocaust victims \n        presented claims to insurance companies after World War II, \n        many were rejected because the claimants did not have death \n        certificates or physical possession of policy documents that \n        had been confiscated by th

In [19]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 18,949 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 24,313,856


Step,Training Loss
1,1.4062
2,1.0264
3,1.167
4,1.0185
5,0.8795
6,1.6604
7,1.3185
8,1.1474
9,1.2351
10,0.4095


In [20]:
#@title Show final memory and time stats
import torch

# Record the initial GPU memory usage
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)

# Get the total GPU memory
max_memory = round(torch.cuda.get_device_properties(0).total_memory / 1024 / 1024 / 1024, 3) # Added this line to get total GPU memory

used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

1291.3542 seconds used for training.
21.52 minutes used for training.
Peak reserved memory = 6.488 GB.
Peak reserved memory for training = 0.0 GB.
Peak reserved memory % of max memory = 43.992 %.
Peak reserved memory for training % of max memory = 0.0 %.


In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
text_content="""
Responding to geopolitical challenges
He then went on to frame the challenging times in geopolitics. “We meet at a difficult time in world affairs. Two major conflicts are underway, each with its own global repercussions. The Covid pandemic has left many in the developing world deeply devastated. Disruptions of various kinds – ranging from extreme climate events to supply chain uncertainties and financial volatility – are impacting growth and development. Debt is a serious concern, even as the world falls behind in achieving SDG targets. Technology holds great promise, as well as raising a new host of concerns. How should the members of the SCO respond to these challenges?” he asked.

Festive offer
“The answers lie in the Charter of our organisation,” he said, adding, “And I urge you to reflect on Article 1 that spells out the goals and tasks of the SCO. Let me summarise it for our collective consideration. The objective is to strengthen mutual trust, friendship and good neighbourliness. It is to develop multi-faceted cooperation, especially of a regional nature. It is to be a positive force in terms of balanced growth, integration and conflict prevention. The Charter was equally clear what the key challenges were. And these were primarily three, that the SCO was committed to combatting: one, terrorism; two, separatism; and three, extremism.”

Jaishankar stated that only by reaffirming the commitment to the Charter most sincerely that they can fully realise the benefits of cooperation and integration that it envisages. “This is not just an endeavour for our own benefit. We all realise that the world is moving towards multi-polarity. Globalisation and rebalancing are realities that cannot be denied. Cumulatively, they have created many new opportunities in terms of trade, investment, connectivity, energy flows and other forms of collaboration. There is no question that our region would benefit immensely if we take this forward. Not just that, others too would draw their own inspiration and lessons from such efforts.”
"""
messages = [
    {
        "role": "system",
        "content": """You are a helpful assistant that specialize in article summarization
            your task is to summarize given text article and generate title for it
            If the provided article doesnt contain coherent and meaningful content,
            just return empty response""",
    },
    {"role": "human", "content": text_content},
]

inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
output = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 512,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

Jaishankar called for members of SCO to reaffirm their commitment to the organisation's Charter and work to realise the benefits of cooperation and integration envisaged by it.<|eot_id|>


#second method


In [34]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

In [35]:
if False:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model",
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference
text_content="""
Responding to geopolitical challenges
He then went on to frame the challenging times in geopolitics. “We meet at a difficult time in world affairs. Two major conflicts are underway, each with its own global repercussions. The Covid pandemic has left many in the developing world deeply devastated. Disruptions of various kinds – ranging from extreme climate events to supply chain uncertainties and financial volatility – are impacting growth and development. Debt is a serious concern, even as the world falls behind in achieving SDG targets. Technology holds great promise, as well as raising a new host of concerns. How should the members of the SCO respond to these challenges?” he asked.

Festive offer
“The answers lie in the Charter of our organisation,” he said, adding, “And I urge you to reflect on Article 1 that spells out the goals and tasks of the SCO. Let me summarise it for our collective consideration. The objective is to strengthen mutual trust, friendship and good neighbourliness. It is to develop multi-faceted cooperation, especially of a regional nature. It is to be a positive force in terms of balanced growth, integration and conflict prevention. The Charter was equally clear what the key challenges were. And these were primarily three, that the SCO was committed to combatting: one, terrorism; two, separatism; and three, extremism.”

Jaishankar stated that only by reaffirming the commitment to the Charter most sincerely that they can fully realise the benefits of cooperation and integration that it envisages. “This is not just an endeavour for our own benefit. We all realise that the world is moving towards multi-polarity. Globalisation and rebalancing are realities that cannot be denied. Cumulatively, they have created many new opportunities in terms of trade, investment, connectivity, energy flows and other forms of collaboration. There is no question that our region would benefit immensely if we take this forward. Not just that, others too would draw their own inspiration and lessons from such efforts.”
"""
messages = [
    {
        "role": "system",
        "content": """You are a helpful assistant that specialize in article summarization
            your task is to summarize given text article and generate title for it
            If the provided article doesnt contain coherent and meaningful content,
            just return empty response""",
    },
    {"role": "human", "content": text_content},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
lora_output = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

Responding to the Challenges in Global Politics | Shri Ramesh Chandra Prasad
The Shanghai Cooperation Organisation (SCO) has been established for a long time now to create strong sense of cooperation, understanding and mutual trust. While, global politics has been subjecting various global crises in terms of economic crisis, security challenges etc. India’s Prime minister said in his address to the SCO that India can become a part of a positive force, when the SCO members combine to combat terrorism, extremism and separatism.
Shri Jaishankar said that if we combine and take steps in accordance with the Charter of our organisation, only then
