In [1]:
# Checkout UnslothAI Github https://github.com/unslothai/unsloth

from unsloth import FastLanguageModel
from transformers import DataCollatorForLanguageModeling
import torch
max_seq_length = 1024 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model_name = "YuvrajSingh9886/phi3-mini-instruct-alpaca-model"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name, 
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [2]:
from datasets import load_dataset
dataset_name = 'data/CSV Files/qna-dataset-farmgenie-water-irrigation-v2.csv'

In [2]:
from huggingface_hub import notebook_login
from dotenv import load_dotenv
import os
load_dotenv()
write_access_token = os.getenv('hf_token')
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth: Already have LoRA adapters! We shall skip this step.


In [4]:
import pandas as pd
df = pd.read_csv(dataset_name)
df.drop(columns='Unnamed: 0', inplace=True)

In [5]:
df

Unnamed: 0,ANSWER,QUESTION.question,QUESTION.paragraph
0,Water Management Technologies for Sustainable ...,What is the title of the document?,Water Management Technologies for Sustainable ...
1,R.S. Chhillar,Who is the first author listed for the document?,R.S. Chhillar
2,Farmer’s Participatory Action Research Programme,Which program is mentioned in the document?,Farmer’s Participatory Action Research Programme
3,Water Technology Centre,Which institution published this document?,Water Technology Centre
4,New Delhi-110012,Where is the Indian Agricultural Research Inst...,New Delhi-110012
...,...,...,...
3846,"The design, with a total of 400 meters of main...",How does the overall design cater to the requi...,Length and Diameter of mains 300m x 75mm 100m ...
3847,An adequate water source is critical for a far...,What role does an adequate water source play i...,A farmer shall be eligible for assistance only...
3848,Economic considerations for choosing between s...,Discuss the economic considerations that deter...,sprinkler irrigation will be limited to only t...
3849,The specified length and diameter of irrigatio...,How does the specified length and diameter of ...,Length and Diameter of mains 300m x 75mm 100m ...


## Instruct-Fine Tuning on Alpaca Dataset Initially


In [7]:
alpaca_prompt = """
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["QUESTION.question"]
    # inputs       = examples["input"]
    inputs = ""
    outputs      = examples["ANSWER"]
    # texts = []
    # for instruction, input, output in zip(instructions, inputs, outputs):
    #     # Must add EOS_TOKEN, otherwise your generation will go on forever!
    #     text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
    #     texts.append(text)
    # return { "text" : texts, }
    text = alpaca_prompt.format(instructions, inputs, outputs) + EOS_TOKEN
    return text
# pass

# from datasets import load_dataset
# dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
# dataset = dataset.map(formatting_prompts_func, batched = True,)
df['text'] = df.apply(formatting_prompts_func, axis=1)

In [7]:
df = df[['text']]

In [8]:
df.iloc[0][0]

  df.iloc[0][0]


'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat is the title of the document?\n\n### Input:\n\n\n### Response:\nWater Management Technologies for Sustainable Agriculture<|endoftext|>'

In [9]:
# print(df[5]["text"])
# df.loc[0,'text']

df.to_csv(f'{dataset_name}-chat_formatted_dataset.csv', index=False)
dataset = load_dataset('csv', data_files=f'{dataset_name}-chat_formatted_dataset.csv', split='train')
dataset = dataset.shuffle()

Generating train split: 0 examples [00:00, ? examples/s]

In [11]:
dataset

Dataset({
    features: ['text'],
    num_rows: 3851
})

<a name="Train"></a>
### Train the model


In [12]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False),
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 4,
        warmup_steps = 250,
        max_steps = 2000,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        optim = "adamw_8bit",
        weight_decay = 0.0,
        lr_scheduler_type = "linear",
        # seed = 3407,
        # evaluation_strategy="epoch",  
        output_dir = "outputs",
        logging_steps = 50,
        # num_train_epochs=2,
        
    ),
)

Map (num_proc=2):   0%|          | 0/3851 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [13]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 4050 Laptop GPU. Max memory = 5.997 GB.
3.059 GB of memory reserved.


In [14]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 3,851 | Num Epochs = 17
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 2,000
 "-____-"     Number of trainable parameters = 119,537,664


Step,Training Loss
50,1.1658
100,0.9523
150,0.859
200,0.792
250,0.7552
300,0.6882
350,0.6648
400,0.5849
450,0.5599
500,0.5282


<a name="Inference"></a>
### Inference

In [None]:
# # alpaca_prompt = Copied from above
# FastLanguageModel.for_inference(model) # Enable native 2x faster inference
# prompt = alpaca_prompt.format(
#         "Continue the fibonnaci sequence.", # instruction
#         "1, 1, 2, 3, 5, 8", # input
#         "", # output - leave this blank for generation!
#     )
# inputs = tokenizer(prompt, return_tensors = "pt").to("cuda")['input_ids']

# outputs = model.generate(inputs, max_new_tokens = 64) 
# # print(tokenizer.batch_decode(outputs)[0])

<a name="Save"></a>
### Saving, loading finetuned models


In [3]:
model_name = "phi3-mini-fine-tuned-agricultural-irrigation-200-QnA"
# model.save_pretrained(model_name) # Local saving
model.push_to_hub(model_name, token = write_access_token) # Online saving

# model.push_to_hub_merged(f"YuvrajSingh9886/{model_name}", tokenizer, save_method='merged_16bit', token=write_access_token)

In [21]:
del model
del tokenizer
del trainer
torch.cuda.empty_cache()

NameError: name 'model' is not defined

In [4]:
f"YuvrajSingh9886/{model_name}"

'YuvrajSingh9886/phi3-mini-fine-tuned-agricultural-irrigation-200-QnA'

Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:

In [10]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = f"YuvrajSingh9886/{model_name}", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# messages = [
#     {"from": "human", "value": "What is a famous tall tower in Paris?"},
# ]
# inputs = tokenizer.apply_chat_template(
#     messages,
#     tokenize = True,
#     add_generation_prompt = True, # Must add for generation
#     return_tensors = "pt",
# ).to("cuda")

# from transformers import TextStreamer
# text_streamer = TextStreamer(tokenizer)
# _ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True)
# alpaca_prompt = Copied from above

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Answer the following input question to the best of your knowledge in a very descriptive manner. Give detailed answers/explainations to the following question", # instruction
        "Explain the working of a biogas plant", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 2048, use_cache = True)
print(tokenizer.batch_decode(outputs)[0])

<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Answer the following input question to the best of your knowledge in a very descriptive manner. Give detailed answers/explainations to the following question

### Input:
Explain the working of a biogas plant

### Response:
A biogas plant consists of an anaerobic digester where microbes break down organic matter in the absence of oxygen, producing biogas. The slurry, a nutrient-rich liquid, is collected in a water trap before being released into the digester. The treated slurry enhances soil fertility when used as an agricultural input.<|endoftext|>


In [None]:
text = """
<s> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Answer the following input question to the best of your knowledge in a very descriptive manner. Give detailed answers/explainations to the following question

### Input:
Explain the working of a biogas plant

### Response:
A biogas plant consists of an anaerobic digester where microbes break down organic matter in the absence of oxygen, producing biogas. The slurry, a nutrient-rich liquid, is collected in a water trap before being released into the digester. The treated slurry enhances soil fertility when used as an agricultural input.
"""

# Extracting the "Response" part
response = text.split("### Response:")[1].strip()

print(response)