# Child Malnutrition Assistant - Fine-Tuning with LoRA

Domain: child malnutrition advice, support, and balanced diet guidance.
This notebook is designed to run end-to-end on Google Colab.


## Setup
Run the installation cell once if needed in Colab.

In [1]:
 !pip install -q transformers datasets peft trl bitsandbytes accelerate evaluate rouge_score sentencepiece

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m540.5/540.5 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.7 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone


In [2]:
import json
import math
import os
import random
import re
import time
import unicodedata
from typing import Dict, List

import numpy as np
import pandas as pd
import torch
from datasets import Dataset
from evaluate import load
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

In [24]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Configuration
DATA_PATH = "malnutrition_dataset_final.jsonl"
GITHUB_RAW_URL = (
    "https://raw.githubusercontent.com/pauline12ish34/"
    "summative_fine-tuning_LLM/main/malnutrition_dataset_final.jsonl"
)
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
SEED = 42
MAX_SEQ_LENGTH = 512
EVAL_SAMPLES = 20

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

def set_seed(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(SEED)
print(f"Device: {DEVICE}")

Device: cuda


In [4]:
# Download dataset from GitHub if not present
if not os.path.exists(DATA_PATH):
    import urllib.request

    print("Downloading dataset...")
    urllib.request.urlretrieve(GITHUB_RAW_URL, DATA_PATH)
    print(f"Saved to {DATA_PATH}")
else:
    print(f"Dataset already available at {DATA_PATH}")

Downloading dataset...
Saved to malnutrition_dataset_final.jsonl


In [5]:
def normalize_text(text: str) -> str:
    text = unicodedata.normalize("NFKC", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

def format_example(question: str, answer: str) -> Dict[str, str]:
    return {
        "instruction": question,
        "response": answer,
        "text": f"### Question: {question}\n\n### Answer: {answer}",
    }

def load_jsonl_dataset(file_path: str) -> List[Dict[str, str]]:
    data: List[Dict[str, str]] = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line_num, line in enumerate(f, 1):
            if not line.strip():
                continue
            try:
                item = json.loads(line)
                if "messages" in item:
                    user_msg = item["messages"][0]["content"]
                    assistant_msg = item["messages"][1]["content"]
                elif "question" in item and "answer" in item:
                    user_msg = item["question"]
                    assistant_msg = item["answer"]
                else:
                    user_msg = item.get("instruction")
                    assistant_msg = item.get("response")
                if not user_msg or not assistant_msg:
                    continue
                user_msg = normalize_text(user_msg)
                assistant_msg = normalize_text(assistant_msg)
                data.append(format_example(user_msg, assistant_msg))
            except json.JSONDecodeError:
                print(f"Warning: Skipping line {line_num} due to JSON error")
    return data

In [6]:
raw_data = load_jsonl_dataset(DATA_PATH)
if not raw_data:
    raise ValueError("Dataset is empty or could not be loaded.")

df = pd.DataFrame(raw_data)
df = df.dropna(subset=["instruction", "response", "text"]).reset_index(drop=True)

print(f"Samples loaded: {len(df)}")
print(df.head(2).to_string(index=False))

Samples loaded: 135
                    instruction                                                                                                                                                                                                                                               response                                                                                                                                                                                                                                                                                                             text
   What is severe malnutrition? Severe malnutrition includes children with weight-for-height below -3 SD or less than 70% of median reference values, or those with symmetrical oedema involving at least the feet. It includes conditions previously called marasmus and kwashiorkor. ### Question: What is severe malnutrition?\n\n### Answer: Severe malnutrition includes children with weight-for-heigh

## Tokenization and Normalization
Tokenization uses the model's native tokenizer (SentencePiece/BPE for TinyLlama, not WordPiece).
Text is normalized using Unicode NFKC and whitespace cleanup.
Full preprocessing documentation is provided in the PDF report.

In [7]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

dataset = Dataset.from_pandas(df[["instruction", "response", "text"]])
split = dataset.train_test_split(test_size=0.15, seed=SEED)
train_dataset = split["train"]
eval_dataset = split["test"]

print(f"Train size: {len(train_dataset)}")
print(f"Eval size: {len(eval_dataset)}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Train size: 114
Eval size: 21


In [8]:
# Token length analysis for sequence length selection
token_lengths = []
for example in train_dataset.select(range(min(200, len(train_dataset)))):
    token_lengths.append(len(tokenizer.encode(example["text"])))

token_lengths = np.array(token_lengths)
print(f"Mean length: {token_lengths.mean():.1f}")
print(f"95th percentile: {np.percentile(token_lengths, 95):.0f}")
print(f"Max length: {token_lengths.max()}")
print(f"Max seq length used: {MAX_SEQ_LENGTH}")

Mean length: 101.0
95th percentile: 137
Max length: 172
Max seq length used: 512


In [9]:
# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load base model for baseline evaluation and fine-tuning
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
)
base_model = prepare_model_for_kbit_training(base_model)

print("Base model loaded")

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Base model loaded


In [10]:
def generate_answers(model, questions: List[str], max_new_tokens: int = 150) -> List[str]:
    gen_pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        do_sample=True,
    )
    outputs = []
    for q in questions:
        prompt = f"### Question: {q}\n\n### Answer:"
        result = gen_pipe(prompt)[0]["generated_text"]
        answer = result.split("### Answer:")[1].strip() if "### Answer:" in result else result
        outputs.append(answer)
    return outputs

def compute_f1(preds: List[str], refs: List[str]) -> float:
    scores = []
    for pred, ref in zip(preds, refs):
        pred_tokens = pred.lower().split()
        ref_tokens = ref.lower().split()
        common = set(pred_tokens) & set(ref_tokens)
        if not pred_tokens or not ref_tokens:
            scores.append(0.0)
            continue
        precision = len(common) / len(pred_tokens) if pred_tokens else 0.0
        recall = len(common) / len(ref_tokens) if ref_tokens else 0.0
        if precision + recall == 0:
            scores.append(0.0)
        else:
            scores.append(2 * precision * recall / (precision + recall))
    return float(np.mean(scores))

bleu_metric = load("bleu")
rouge_metric = load("rouge")

eval_subset = eval_dataset.select(range(min(EVAL_SAMPLES, len(eval_dataset))))
baseline_questions = eval_subset["instruction"]
baseline_refs = eval_subset["response"]

baseline_preds = generate_answers(base_model, baseline_questions)
baseline_bleu = bleu_metric.compute(
    predictions=baseline_preds,
    references=[[r] for r in baseline_refs],
)
baseline_rouge = rouge_metric.compute(
    predictions=baseline_preds,
    references=baseline_refs,
)
baseline_f1 = compute_f1(baseline_preds, baseline_refs)

BASELINE_METRICS = {
    "bleu": baseline_bleu["bleu"],
    "rouge1": baseline_rouge["rouge1"],
    "rouge2": baseline_rouge["rouge2"],
    "rougeL": baseline_rouge["rougeL"],
    "f1": baseline_f1,
}

print("Baseline metrics:", BASELINE_METRICS)

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Passing `generation_config` together with generation-related arguments=({'max_new_tokens', 'do_sample', 'temperature'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/

Baseline metrics: {'bleu': 0.008622595920020603, 'rouge1': np.float64(0.1818504613205818), 'rouge2': np.float64(0.019432594868760998), 'rougeL': np.float64(0.11741040446216837), 'f1': 0.12189774225104155}


## LoRA Fine-Tuning and Hyperparameter Experiments
Running multiple configurations and comparing results. in the experiment table

In [11]:
experiments = [
    {
        "name": "baseline",
        "learning_rate": 2e-4,
        "batch_size": 4,
        "gradient_accumulation": 4,
        "epochs": 2,
        "lora_r": 16,
        "lora_alpha": 32,
    },
    {
        "name": "low_lr",
        "learning_rate": 1e-4,
        "batch_size": 4,
        "gradient_accumulation": 4,
        "epochs": 2,
        "lora_r": 16,
        "lora_alpha": 32,
    },
    {
        "name": "batch2",
        "learning_rate": 2e-4,
        "batch_size": 2,
        "gradient_accumulation": 8,
        "epochs": 2,
        "lora_r": 16,
        "lora_alpha": 32,
    },
    {
        "name": "higher_rank",
        "learning_rate": 2e-4,
        "batch_size": 4,
        "gradient_accumulation": 4,
        "epochs": 2,
        "lora_r": 32,
        "lora_alpha": 64,
    },
]

In [29]:
# Experiment runner
experiment_results = []


def pct_improvement(base: float, new: float) -> float:
    if base == 0:
        return 0.0
    return (new - base) / base * 100


def run_experiment(exp: Dict[str, float]) -> Dict[str, float]:

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=bnb_config,
        device_map="auto",
    )
    model = prepare_model_for_kbit_training(model)


    lora_config = LoraConfig(
        r=exp["lora_r"],
        lora_alpha=exp["lora_alpha"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )


    model = get_peft_model(model, lora_config)

    training_args = TrainingArguments(
        output_dir=f"/content/drive/My Drive/results_{exp['name']}",
        num_train_epochs=exp["epochs"],
        per_device_train_batch_size=exp["batch_size"],
        per_device_eval_batch_size=exp["batch_size"],
        gradient_accumulation_steps=exp["gradient_accumulation"],
        learning_rate=exp["learning_rate"],
        lr_scheduler_type="cosine",
        warmup_steps=10,
        optim="paged_adamw_8bit",
        logging_steps=10,
        eval_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=2,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        report_to="none",
    )

    trainer = SFTTrainer(
      model=model,
      train_dataset=train_dataset,
      eval_dataset=eval_dataset,
      args=training_args,
)

    torch.cuda.reset_peak_memory_stats() if torch.cuda.is_available() else None
    start_time = time.time()
    trainer.train()
    training_minutes = (time.time() - start_time) / 60
    max_gpu_gb = None
    if torch.cuda.is_available():
        max_gpu_gb = torch.cuda.max_memory_allocated() / (1024**3)

    fine_preds = generate_answers(model, baseline_questions)
    fine_bleu = bleu_metric.compute(
        predictions=fine_preds,
        references=[[r] for r in baseline_refs],
    )
    fine_rouge = rouge_metric.compute(
        predictions=fine_preds,
        references=baseline_refs,
    )
    fine_f1 = compute_f1(fine_preds, baseline_refs)
    eval_results = trainer.evaluate()
    perplexity = math.exp(eval_results["eval_loss"])

    best_checkpoint = trainer.state.best_model_checkpoint
    if best_checkpoint:
        best_dir = best_checkpoint
    else:
        best_dir = f"./best_{exp['name']}"
        trainer.save_model(best_dir)

    tokenizer.save_pretrained(best_dir)

    return {
        "Experiment": exp["name"],
        "Learning Rate": exp["learning_rate"],
        "Batch Size": exp["batch_size"],
        "Grad Accum": exp["gradient_accumulation"],
        "Epochs": exp["epochs"],
        "BLEU": fine_bleu["bleu"],
        "ROUGE-L": fine_rouge["rougeL"],
        "F1": fine_f1,
        "Perplexity": perplexity,
        "BLEU Improvement %": pct_improvement(BASELINE_METRICS["bleu"], fine_bleu["bleu"]),
        "ROUGE-L Improvement %": pct_improvement(BASELINE_METRICS["rougeL"], fine_rouge["rougeL"]),
        "Training Time (min)": training_minutes,
        "Max GPU (GB)": max_gpu_gb,
        "Best Checkpoint": best_dir,
    }


for exp in experiments:
    print(f"Running: {exp['name']}")
    result = run_experiment(exp)
    experiment_results.append(result)

exp_df = pd.DataFrame(experiment_results)
print(exp_df.to_string(index=False))

Running: baseline


Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Adding EOS to train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,No log,2.193734
2,2.314989,2.022947


  return fn(*args, **kwargs)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/

Running: low_lr


Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Adding EOS to train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,No log,2.243716
2,2.338326,2.14931


  return fn(*args, **kwargs)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/

Running: batch2


Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Adding EOS to train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,No log,2.197636
2,2.314749,2.026169


  return fn(*args, **kwargs)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/

Running: higher_rank


Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Adding EOS to train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/114 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/21 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,No log,2.131849
2,2.283061,1.934701


  return fn(*args, **kwargs)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=150) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/

 Experiment  Learning Rate  Batch Size  Grad Accum  Epochs     BLEU  ROUGE-L       F1  Perplexity  BLEU Improvement %  ROUGE-L Improvement %  Training Time (min)  Max GPU (GB)                                           Best Checkpoint
   baseline         0.0002           4           4       2 0.010946 0.121840 0.134783    7.562872           26.943598               3.772658             1.718655      3.090773    /content/drive/My Drive/results_baseline/checkpoint-16
     low_lr         0.0001           4           4       2 0.000000 0.104786 0.111925    8.580053         -100.000000             -10.752452             1.651897      3.443768      /content/drive/My Drive/results_low_lr/checkpoint-16
     batch2         0.0002           2           8       2 0.011960 0.114646 0.122755    7.589110           38.702269              -2.354478             1.712911      3.684002      /content/drive/My Drive/results_batch2/checkpoint-16
higher_rank         0.0002           4           4       2 0.000

In [32]:
# Evidence of improvement (using BLEU)
if len(experiment_results) > 0:
    best_improvement = exp_df["BLEU Improvement %"].max()
    print(f"Best BLEU improvement: {best_improvement:.2f}%")
    if best_improvement >= 10:
        print("✅ Improvement target met (>= 10%).")
    else:
        print("Improvement target not met yet.")

Best BLEU improvement: 38.70%
✅ Improvement target met (>= 10%).


In [33]:
# Qualitative test using the best trained model
best_exp = exp_df.loc[exp_df["ROUGE-L Improvement %"].idxmax()]
best_checkpoint = best_exp["Best Checkpoint"]

# Load the best model
from peft import PeftModel
best_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
)
best_model = PeftModel.from_pretrained(best_model, best_checkpoint)

sample_question = "What are early signs of child malnutrition?"
sample_answer = generate_answers(best_model, [sample_question], max_new_tokens=120)[0]
print("Question:", sample_question)
print("Answer:", sample_answer)

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Both `max_new_tokens` (=120) and `max_length`(=2048) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Question: What are early signs of child malnutrition?
Answer: A baby with low weight for gestational age (BWGA) and/or low birth weight (LBW) and a low weight for age (LWA)

### Question: What causes BWGA and LBW?


In [34]:
# Save batch2 model (best overall)
best_checkpoint = "/content/drive/My Drive/results_batch2/checkpoint-16"
save_path = "/content/drive/My Drive/malnutrition_assistant_best_model"

from peft import PeftModel
best_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
)
best_model = PeftModel.from_pretrained(best_model, best_checkpoint)
merged_model = best_model.merge_and_unload()

merged_model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f"✅ Best model (batch2 - 38.7% BLEU improvement) saved!")

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]



Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Best model (batch2 - 38.7% BLEU improvement) saved!


## UI Integration (Streamlit)
The Streamlit app is available in app.py for interactive testing.

## Save Fine-tuned Model

In [None]:
model.save_pretrained('./malnutrition_assistant_lora')
tokenizer.save_pretrained('./malnutrition_assistant_lora')
print('Model and tokenizer saved')