## Final code to convert this 

In [None]:
import json
import uuid

INPUT_FILE = "paddy_disease.jsonl"       # your source file
OUTPUT_FILE = "paddy_disease_llava.json" # Official LLaVA format

def parse_qa_block(text):
    """
    Parses Q/A blocks like 'Q1: ... A1: ...' or 'Q: ... A: ...'
    into a list of (question, answer) tuples.
    """
    lines = [line.strip() for line in text.split("\n") if line.strip()]
    qa_pairs = []
    q, a = None, None
    for line in lines:
        if line.lower().startswith("q"):
            if q and a:
                qa_pairs.append((q, a))
                a = None
            q = line.split(":", 1)[1].strip()
        elif line.lower().startswith("a"):
            a = line.split(":", 1)[1].strip()
    if q and a:
        qa_pairs.append((q, a))
    return qa_pairs

def convert_to_llava_format():
    output_records = []
    
    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            record = json.loads(line)

            conversations = []

            # 1. Description first
            if record.get("description"):
                conversations.append({
                    "from": "human",
                    "value": "<image>\nDescribe the image."
                })
                conversations.append({
                    "from": "gpt",
                    "value": record["description"].strip()
                })

            # 2. Multi-turn QA
            if record.get("multi_turn_conversation"):
                for q, a in parse_qa_block(record["multi_turn_conversation"]):
                    conversations.append({"from": "human", "value": q})
                    conversations.append({"from": "gpt", "value": a})

            # 3. Simple QA
            if record.get("simple_qa"):
                for q, a in parse_qa_block(record["simple_qa"]):
                    conversations.append({"from": "human", "value": q})
                    conversations.append({"from": "gpt", "value": a})

            output_records.append({
                "id": str(uuid.uuid4()),
                "image": record["image_path"],
                "conversations": conversations
            })

    # Write array to JSON file
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(output_records, f, ensure_ascii=False, indent=2)

    print(f"Converted {len(output_records)} records to {OUTPUT_FILE} in official format")

if __name__ == "__main__":
    convert_to_llava_format()
