<a href="https://colab.research.google.com/github/punnoose-1620/masters-thesis-sensor-data/blob/agentic-model/CSV_Builder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installs and Imports

### Installs

In [None]:
!pip install csv

### Imports

In [None]:
import csv
import io
import os

## Constants

### INPUT — paste your full text block here between the triple quotes

In [None]:
RAW_TEXT = """source,category,question,points to be included in answer,source of answer (file name/url)
SharePoint,technical,"How do I connect the tablet to the WICE unit and open the WICE checklist?","Plug tablet into ETH1; power on and swipe up; log in as Workshop/1234; open Chrome or press P1; press P2 to refresh; if page doesn't load tap WICE Install or Home","WICE Tablet Instruction_v3_2025w07.pdf - https://intranet.volvocars.net/sites/WICEDevelopment/Shared%20Documents/WICE%20Installation%20Manuals/Tablet%20Instruction/WICE%20Tablet%20Instruction_v3_2025w07.pdf"
SharePoint,technical,"What should I do if the Start switch shows INT Red on the checklist?","Flip physical Start switch from INT to EXT on the back of the WICE unit; checklist should show Start switch EXT Green","WICE Tablet Instruction_v3_2025w07.pdf - https://intranet.volvocars.net/sites/WICEDevelopment/Shared%20Documents/WICE%20Installation%20Manuals/Tablet%20Instruction/WICE%20Tablet%20Instruction_v3_2025w07.pdf"
"""

### CONFIG

In [None]:
OUTPUT_FILE = "llm-test-data.csv"

## Essential Functions

### PARSER

In [None]:
def parse_text_to_rows(text: str) -> tuple[list[str], list[list[str]]]:
    """
    Parse a quoted-CSV formatted string into a header row and data rows.
    Handles commas inside quoted fields correctly via Python's csv module.
    Skips blank lines automatically.
    """
    lines = [line.strip() for line in text.strip().splitlines() if line.strip()]

    if not lines:
        raise ValueError("Input text is empty — nothing to parse.")

    reader = csv.reader(io.StringIO("\n".join(lines)))
    rows = list(reader)

    if not rows:
        raise ValueError("No rows found after parsing.")

    header = rows[0]
    data   = rows[1:]

    return header, data

### WRITER

In [None]:
def write_to_csv(header: list[str], data: list[list[str]], filepath: str) -> None:
    """
    Write data rows to CSV.
    - If the file does NOT exist  → create it and write header + data.
    - If the file DOES exist      → append data rows only (no duplicate header).
    """
    file_exists = os.path.isfile(filepath)

    if file_exists:
        print(f"[INFO] '{filepath}' found — appending {len(data)} row(s).")
        mode = "a"
    else:
        print(f"[INFO] '{filepath}' not found — creating new file.")
        mode = "w"

    with open(filepath, mode=mode, newline="", encoding="utf-8") as f:
        writer = csv.writer(f, quoting=csv.QUOTE_ALL)

        if not file_exists:
            writer.writerow(header)   # Write header only for new files

        writer.writerows(data)

    action = "Appended" if file_exists else "Created"
    print(f"[OK]   {action} '{filepath}' — total rows added: {len(data)}")

## Main Pipeline

In [None]:
print("=" * 55)
print("  text → llm-test-data.csv converter")
print("=" * 55)

try:
    header, data = parse_text_to_rows(RAW_TEXT)

    print(f"[INFO] Columns  : {header}")
    print(f"[INFO] Rows parsed: {len(data)}")

    if not data:
        print("[WARN] No data rows found — check your RAW_TEXT input.")
        return

    write_to_csv(header, data, OUTPUT_FILE)

except Exception as e:
    print(f"[ERROR] {e}")
    raise