# Food Weight Estimation - Complete Project Reconstruction

This notebook implements the **complete rebuild** of the food weight estimation pipeline using:
- **CV Backend**: TensorFlow 1 frozen model with correct logic (`pixel != 254`, `* 0.015`)
- **MLLM Frontend**: LLaVA 1.5 7B fine-tuned with LoRA
- **Stable Stack**: Carefully pinned library versions to avoid dependency conflicts

## ⚠️ Important: Hard Reset Colab Runtime

Before running this notebook:
1. Go to **Runtime** in the top menu
2. Select **Disconnect and Delete Runtime**
3. Click **Yes** and wait for it to reconnect
4. Run cells one by one from top to bottom

## Section 1: Setup - Mount Drive & Define Paths

Mount Google Drive and define all project paths for images, models, CSV files, and output directories.

In [None]:
#@title 1. Setup: Mount Drive & Define Paths
import os
import sys
from google.colab import drive

# 1. Mount Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# 2. Define Your Existing Paths
ROOT_DIR = "/content/drive/MyDrive/MyFoodProject"

# --- INPUTS (Your existing data) ---
IMAGE_DIR       = os.path.join(ROOT_DIR, "images_with_gt_weights")
TF_MODEL_DIR    = os.path.join(ROOT_DIR, "tf_portion_model")
MASTER_CSV_PATH = os.path.join(ROOT_DIR, "ghana_gt_weights_w_filenames_images.csv")
TF_MODEL_FILE   = os.path.join(TF_MODEL_DIR, "ghana_frozen_graph_9.0_489ksteps.pb")

# --- OUTPUTS (Files we will create right now) ---
DATA_DIR          = os.path.join(ROOT_DIR, "data")
CHECKPOINT_DIR    = os.path.join(ROOT_DIR, "food_llm_v1")
FINAL_ADAPTER_DIR = os.path.join(ROOT_DIR, "final_adapter")
TRAIN_SUBSET_CSV  = os.path.join(ROOT_DIR, "ghana_train_subset.csv")
DATASET_JSON_PATH = os.path.join(DATA_DIR, "dataset.json")
TF_SCRIPT_PATH    = os.path.join(ROOT_DIR, "run_tf_inference.py")

# 3. Create Output Folders
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(FINAL_ADAPTER_DIR, exist_ok=True)

print(f"Project Root: {ROOT_DIR}")
print(f"Model File: {TF_MODEL_FILE}")

## Section 2: Install Dependencies (Stable Stack)

Install the stable combination of PyTorch, Transformers, PEFT, and TensorFlow that avoids dependency conflicts.

In [None]:
#@title 2. Install Dependencies (Stable Stack)
print("Installing LLaVA dependencies (PyTorch)...")
!pip install -q "transformers==4.39.3"
!pip install -q "peft==0.9.0"
!pip install -q "accelerate==0.29.3"
!pip install -q "bitsandbytes==0.43.0"
!pip install -q "datasets==2.16.1"
!pip install -q "sentencepiece"

print("Installing CV dependencies (TensorFlow)...")
# Fix for the 'ml_dtypes' error
!pip install -q "ml_dtypes>=0.5.0"
!pip install -q "tensorflow==2.17.0"

print("Installing Utilities...")
!pip install -q "pandas==2.0.3" "tqdm==4.66.1" "Pillow==10.0.1" "opencv-python-headless"

print("Installation complete. Waiting 10 seconds before next cell.")
import time
time.sleep(10)

## Section 3: Create CV Inference Script

Write the `run_tf_inference.py` script with correct logic:
- Load frozen TensorFlow graph
- Preprocess images with OpenCV
- Run segmentation inference
- Count non-background pixels (pixel != 254)
- Convert to grams using 0.015 factor

In [None]:
#@title 3. Create CV Inference Script (Recreating run_tf_inference.py)
%%writefile $TF_SCRIPT_PATH
import tensorflow.compat.v1 as tf
import numpy as np
import sys
import os
import argparse
import json
import pandas as pd
import cv2
from tqdm import tqdm

# Force TF1 compatibility
tf.disable_eager_execution()

def load_graph(frozen_graph_filename):
    """Loads the .pb model."""
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def, name="")
    return graph

def preprocess_image(image_path, width=513, height=513):
    """Loads image using OpenCV and resizes."""
    try:
        img = cv2.imread(image_path)
        if img is None: return None
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_resized = cv2.resize(img_rgb, (width, height))
        return np.expand_dims(img_resized, axis=0)
    except Exception:
        return None

def get_gram_weight(sess, image_tensor, output_tensor, image_path):
    """
    LOGIC:
    1. Run CV model.
    2. Count pixels that are NOT 254 (Background).
    3. Multiply by 0.015 to get grams.
    """
    image_np = preprocess_image(image_path)
    if image_np is None: return None

    seg_map = sess.run(output_tensor, feed_dict={image_tensor: image_np})
    seg_map = np.squeeze(seg_map)

    # --- THE CORE LOGIC ---
    # Count pixels that are NOT the background class (254)
    pixel_count = np.sum(seg_map != 254)
    
    # Apply your specific conversion factor
    gram_weight = float(pixel_count * 0.015)
    # ----------------------
    
    return gram_weight

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_path", required=True)
    parser.add_argument("--csv_path", required=True)
    parser.add_argument("--image_dir", required=True)
    args = parser.parse_args()

    # Load Model
    print("Loading Graph...", file=sys.stderr)
    graph = load_graph(args.model_path)
    image_tensor = graph.get_tensor_by_name("ImageTensor:0")
    output_tensor = graph.get_tensor_by_name("SemanticPredictions:0")

    # Load Data
    df = pd.read_csv(args.csv_path)
    results = {}

    # Run Batch
    print("Starting Session...", file=sys.stderr)
    with tf.Session(graph=graph) as sess:
        for _, row in tqdm(df.iterrows(), total=len(df), file=sys.stderr):
            # Ensure we use 'Filename' column
            filename = str(row['Filename'])
            path = os.path.join(args.image_dir, filename)
            
            if os.path.exists(path):
                weight = get_gram_weight(sess, image_tensor, output_tensor, path)
                results[filename] = weight
            else:
                results[filename] = None

    # Output JSON to stdout
    print(json.dumps(results))

if __name__ == "__main__":
    main()

## Section 4: Generate Training Data

Load the master CSV, create a training subset, run CV model in batch, collect pixel-based weight hints, and format into LLaVA conversation format.

In [None]:
#@title 4. Generate Training Data (Batch Process)
import subprocess
import pandas as pd
import json
import os

# 1. Create a subset of 150 images for training
print("Reading Master CSV...")
df_master = pd.read_csv(MASTER_CSV_PATH)
# Ensure we have the correct columns: 'Filename', 'weight', and 'GT Food name'
required_cols = ['Filename', 'weight', 'GT Food name']
if not all(col in df_master.columns for col in required_cols):
    print(f"ERROR: CSV missing columns. Found: {df_master.columns}")
else:
    df_subset = df_master.sample(n=min(150, len(df_master)))
    df_subset.to_csv(TRAIN_SUBSET_CSV, index=False)

    # 2. Run the CV Script via Subprocess
    print("Running CV Model (TensorFlow 1)... this takes about 2 minutes...")
    command = [
        "python3", TF_SCRIPT_PATH,
        "--model_path", TF_MODEL_FILE,
        "--csv_path", TRAIN_SUBSET_CSV,
        "--image_dir", IMAGE_DIR
    ]

    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True, encoding='utf-8')
        cv_results = json.loads(result.stdout)
        print(f"CV processing complete. Processed {len(cv_results)} images.")
    except Exception as e:
        print(f"CV error: {e}")
        if 'result' in locals() and result.stderr:
            print(f"STDERR: {result.stderr}")
        cv_results = {}

    # 3. Create the LLaVA Dataset
    print("Creating LLaVA dataset JSON...")
    dataset = []
    count = 0

    for _, row in df_subset.iterrows():
        filename = str(row['Filename'])
        gt_name = str(row['GT Food name'])
        gt_weight = float(row['weight'])
        
        # Get the CV hint we just calculated
        cv_hint = cv_results.get(filename)
        
        if cv_hint is not None:
            human_prompt = (
                f"<image>\n"
                f"Based on a preliminary CV analysis suggesting a total weight of "
                f"around {cv_hint:.0f}g, provide a detailed breakdown of the "
                f"food items and their estimated weights in grams."
            )
            gpt_answer = f"Here is the breakdown:\n- {gt_name}: {gt_weight:.1f}g"
            
            dataset.append({
                "id": filename,
                "image": filename,
                "conversations": [
                    {"from": "human", "value": human_prompt},
                    {"from": "gpt", "value": gpt_answer}
                ]
            })
            count += 1

    with open(DATASET_JSON_PATH, 'w') as f:
        json.dump(dataset, f, indent=2)

    print(f"Dataset created with {count} samples at: {DATASET_JSON_PATH}")

## Section 5: Train LLaVA Model

Load LLaVA 1.5 7B with 4-bit quantization, apply LoRA adapters, create data collator, and fine-tune on generated dataset.

In [None]:
#@title 5. Train LLaVA (Fine-Tuning)
import torch
from transformers import AutoProcessor, LlavaForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from PIL import Image
import os

# 1. Config
model_id = "llava-hf/llava-1.5-7b-hf"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

# 2. Load Model
print("Loading LLaVA...")
model = LlavaForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
processor = AutoProcessor.from_pretrained(model_id)

# 3. Apply LoRA
lora_config = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05, task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# 4. Data Collator
class LlavaCollator:
    def __init__(self, processor): self.processor = processor
    def __call__(self, features):
        images = []
        prompts = []
        for f in features:
            try:
                img_path = os.path.join(IMAGE_DIR, f['image'])
                images.append(Image.open(img_path).convert('RGB'))
                prompts.append(processor.tokenizer.apply_chat_template(f['conversations'], tokenize=False))
            except: continue
        
        batch = self.processor(text=prompts, images=images, return_tensors="pt", padding=True, truncation=True, max_length=1024)
        batch['labels'] = batch['input_ids'].clone()
        return batch

# 5. Trainer
args = TrainingArguments(
    output_dir=CHECKPOINT_DIR,
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=5,
    save_strategy="steps",
    save_steps=50,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=load_dataset("json", data_files=DATASET_JSON_PATH, split="train"),
    data_collator=LlavaCollator(processor)
)

print("Starting training...")
trainer.train()

print("Saving adapter...")
model.save_pretrained(FINAL_ADAPTER_DIR)
processor.save_pretrained(FINAL_ADAPTER_DIR)
print(f"Training complete. Adapter saved to {FINAL_ADAPTER_DIR}")

## Section 6: Verify and Test Model

Load fine-tuned model with trained adapter, select test image, run CV helper to get weight hint, generate LLaVA output, and display results.

In [None]:
#@title 6. Test the Model
import torch
from transformers import AutoProcessor, LlavaForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from PIL import Image
import subprocess
import json
import pandas as pd
import cv2
from google.colab.patches import cv2_imshow
import os

# 1. Load Trained Model
print("Loading trained model...")
base_model = LlavaForCausalLM.from_pretrained(
    "llava-hf/llava-1.5-7b-hf",
    quantization_config=BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16),
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, FINAL_ADAPTER_DIR).merge_and_unload()
processor = AutoProcessor.from_pretrained(FINAL_ADAPTER_DIR)

# 2. Pick a Test Image
df_test = pd.read_csv(TRAIN_SUBSET_CSV).sample(1).iloc[0]
img_path = os.path.join(IMAGE_DIR, df_test['Filename'])

# 3. Get CV Hint (On the fly)
def get_single_cv_hint(path):
    dummy_csv = "dummy.csv"
    pd.DataFrame([{"Filename": os.path.basename(path)}]).to_csv(dummy_csv, index=False)
    cmd = ["python3", TF_SCRIPT_PATH, "--model_path", TF_MODEL_FILE, "--csv_path", dummy_csv, "--image_dir", os.path.dirname(path)]
    try:
        res = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
        return json.loads(res.stdout).get(os.path.basename(path), 0)
    except: return 0

cv_hint = get_single_cv_hint(img_path)

# 4. Generate LLaVA Response
prompt = f"USER: <image>\nBased on a preliminary CV analysis suggesting a total weight of around {cv_hint:.0f}g, provide a detailed breakdown of the food items and their estimated weights in grams.\nASSISTANT:"
inputs = processor(prompt, images=Image.open(img_path), return_tensors="pt").to("cuda")
out = model.generate(**inputs, max_new_tokens=100)
response = processor.decode(out[0], skip_special_tokens=True).split("ASSISTANT:")[-1].strip()

# 5. Show Result
print("\n" + "="*40)
print(f"Food: {df_test['GT Food name']}")
print(f"Ground Truth: {df_test['weight']}g")
print(f"CV Hint: {cv_hint:.0f}g")
print(f"Model Prediction: {response}")
print("="*40)
display_img = cv2.resize(cv2.imread(img_path), (300, 300))
cv2_imshow(display_img)