In [1]:
# === Config & imports ===
import os, io, json, time, random, traceback, base64
from pathlib import Path
import requests
from typing import List, Dict, Any

# Paths (relative to this notebook's folder)
NB_DIR = Path.cwd()
VLAT_JSON = (NB_DIR.parent / 'data' / 'VLAT' / 'vlat_skip.json')
CALVI_JSON = (NB_DIR.parent / 'data' / 'CALVI' / 'calvi.json')

# Ollama chat endpoint / model
API_URL = 'http://localhost:11434/api/chat'
MODEL   = 'llava:7b'

# Experiment options
NUM_RUNS     = 3         # number of repetitions over the dataset
MAX_RETRIES  = 3         # retries for transient API errors
RETRY_DELAY  = 2         # seconds
TEMPERATURE  = 0.0       # deterministic
MAX_TOKENS   = 300       # response cap (if your server honors it)

# Prompts
VLAT_PROMPT = (
    "I am about to show you an image and ask you a multiple choice question about that image. "
    "Read the question carefully, examine the chart, then answer with only the choice text."
)
CALVI_PROMPT = (
    "You will see a visualization and a multiple choice question. "
    "Reason step by step and return only the option you believe is correct."
)

print('Notebook directory:', NB_DIR)
print('VLAT JSON expected at:', VLAT_JSON)
print('CALVI JSON expected at:', CALVI_JSON)


Notebook directory: C:\Users\Melita\CSE 4001\VLM-Eval-Research\scripts
VLAT JSON expected at: C:\Users\Melita\CSE 4001\VLM-Eval-Research\data\VLAT\vlat_skip.json
CALVI JSON expected at: C:\Users\Melita\CSE 4001\VLM-Eval-Research\data\CALVI\calvi.json


In [2]:
# === Utilities: ping server, image encoding, robust JSON loading ===
def ping_ollama(url: str = API_URL) -> bool:
    try:
        r = requests.get(url.replace('/api/chat','/api/tags'), timeout=5)
        return r.ok
    except Exception:
        return False

def b64_from_file(path: Path) -> str:
    with open(path, 'rb') as f:
        return base64.b64encode(f.read()).decode('utf-8')

def resolve_image_paths(json_path: Path, questions: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
    base = json_path.parent
    for q in questions:
        raw = q.get('image_path', '')
        p = Path(raw)
        if not p.is_absolute():
            candidates = [
                base / p,                       # same folder as JSON
                base / 'images' / p.name,       # a common layout
                base.parent / p,                # one level up
                Path.cwd() / p                  # notebook CWD (fallback)
            ]
            chosen = None
            for c in candidates:
                if c.exists():
                    chosen = c
                    break
            if chosen is None:
                # keep best-guess (relative to JSON folder) so errors are informative
                chosen = (base / p)
            q['image_path'] = str(chosen)
        else:
            q['image_path'] = str(p)
    return questions

def load_questions_file(file_path: Path) -> List[Dict[str,Any]]:
    file_path = Path(file_path)
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    # expected schema: top-level key 'questions'
    items = data.get('questions', data)
    if not isinstance(items, list):
        raise ValueError('Expected a list under key "questions" or the file to be a list of items.')
    return resolve_image_paths(file_path, items)

def call_llava(image_path: Path, prompt: str) -> str:
    # encode image
    img_b64 = b64_from_file(image_path)
    payload = {
        'model': MODEL,
        'messages': [
            {
                'role': 'user',
                'content': prompt,
                'images': [img_b64]
            }
        ],
        'stream': False,
        'options': {
            'temperature': TEMPERATURE,
        }
    }
    for attempt in range(1, MAX_RETRIES+1):
        try:
            r = requests.post(API_URL, json=payload, timeout=120)
            r.raise_for_status()
            data = r.json()
            return data.get('message',{}).get('content','')
        except Exception as e:
            if attempt == MAX_RETRIES:
                raise
            time.sleep(RETRY_DELAY)
    return ''


In [3]:
# === Sanity checks ===
print('Ollama running? ->', ping_ollama())
print('VLAT JSON exists? ->', VLAT_JSON.exists())
print('CALVI JSON exists? ->', CALVI_JSON.exists())

# If these return False, fix the paths above or move your files accordingly.


Ollama running? -> True
VLAT JSON exists? -> True
CALVI JSON exists? -> True


In [5]:
# === Peek at the first question and verify its image path resolution ===
try:
    vlat_qs = load_questions_file(VLAT_JSON)
    if not vlat_qs:
        print('VLAT has 0 questions!')
    else:
        q0 = vlat_qs[0]
        print('Sample VLAT item:')
        for k in ['id','question','image_path']:
            print(' ', k+':', q0.get(k))
        print('Image exists? ->', Path(q0['image_path']).exists())
except Exception as e:
    print('Error loading VLAT:', e)

try:
    calvi_qs = load_questions_file(CALVI_JSON)
    if not calvi_qs:
        print('CALVI has 0 questions!')
    else:
        q0 = calvi_qs[0]
        print('Sample CALVI item:')
        for k in ['id','question','image_path']:
            print(' ', k+':', q0.get(k))
        print('Image exists? ->', Path(q0['image_path']).exists())
except Exception as e:
    print('Error loading CALVI:', e)


Sample VLAT item:
  id: 1
  question: What was the price of a barrel of oil in February 2015?
  image_path: C:\Users\Melita\CSE 4001\VLM-Eval-Research\data\VLAT\images\LineChart.png
Image exists? -> True
Sample CALVI item:
  id: 43
  question: What is the trend sales in gift shop A from Jan to Dec?
  image_path: C:\Users\Melita\CSE 4001\VLM-Eval-Research\data\CALVI\images\question43.png
Image exists? -> True


In [6]:
# === Main experiment runner ===
import csv

def run_experiment(name: str, dataset_path: Path, prompt: str, out_dir: Path = NB_DIR / 'results'):
    out_dir.mkdir(parents=True, exist_ok=True)
    log_path = out_dir / f'{name}_runs.csv'

    questions = load_questions_file(dataset_path)
    print(f'Starting {name} with {len(questions)} questions; results ->', log_path)

    with open(log_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=[
            'run','qid','image_path','question','model_answer','error'
        ])
        writer.writeheader()

        for run_idx in range(1, NUM_RUNS+1):
            print(f'--- Run {run_idx}/{NUM_RUNS} ---')
            for i, q in enumerate(questions, 1):
                qid = q.get('id', f'{name}_{i}')
                img = Path(q['image_path'])
                try:
                    if not img.exists():
                        raise FileNotFoundError(f'Image not found: {img}')
                    ans = call_llava(img, prompt + "\n\nQuestion: " + q.get('question',''))
                    writer.writerow({
                        'run': run_idx,
                        'qid': qid,
                        'image_path': str(img),
                        'question': q.get('question',''),
                        'model_answer': ans,
                        'error': ''
                    })
                except Exception as e:
                    writer.writerow({
                        'run': run_idx,
                        'qid': qid,
                        'image_path': str(img),
                        'question': q.get('question',''),
                        'model_answer': '',
                        'error': str(e)
                    })
                    print('Error:', e)

    print('Done:', log_path)


In [7]:
# === Run experiments (uncomment to execute) ===
run_experiment('VLAT', VLAT_JSON, VLAT_PROMPT)
run_experiment('CALVI', CALVI_JSON, CALVI_PROMPT)

print('Ready. Uncomment the two lines above to start the runs once the sanity checks show True/exists.')


Starting VLAT with 53 questions; results -> C:\Users\Melita\CSE 4001\VLM-Eval-Research\scripts\results\VLAT_runs.csv
--- Run 1/3 ---
--- Run 2/3 ---
--- Run 3/3 ---
Done: C:\Users\Melita\CSE 4001\VLM-Eval-Research\scripts\results\VLAT_runs.csv
Starting CALVI with 45 questions; results -> C:\Users\Melita\CSE 4001\VLM-Eval-Research\scripts\results\CALVI_runs.csv
--- Run 1/3 ---
--- Run 2/3 ---
--- Run 3/3 ---
Done: C:\Users\Melita\CSE 4001\VLM-Eval-Research\scripts\results\CALVI_runs.csv
Ready. Uncomment the two lines above to start the runs once the sanity checks show True/exists.
