In [4]:
# Setup & sanity checks
from pathlib import Path
import sys, platform

# Project root is wherever this notebook lives (Agent_A/)
PROJECT = Path.cwd()

# Handle both common Spider layouts:
# 1) Agent_A/spider_data/
# 2) Agent_A/spider_data/spider_data/
if (PROJECT / "spider_data" / "spider_data").exists():
    DATA_DIR = PROJECT / "spider_data" / "spider_data"
else:
    DATA_DIR = PROJECT / "spider_data"

# Key Spider files
TABLES_JSON = DATA_DIR / "tables.json"
TRAIN_JSON  = DATA_DIR / "train_spider.json"
DEV_JSON    = DATA_DIR / "dev.json"
DB_ROOT     = DATA_DIR / "database"  # folder with sqlite DBs (not needed yet, but good to verify)

# Where we’ll store intermediate artifacts
OUTPUT_DIR = PROJECT / "output"
OUTPUT_DIR.mkdir(exist_ok=True)

print("Python:", sys.version.split()[0], "| Platform:", platform.system())
print("PROJECT:", PROJECT)
print("DATA_DIR:", DATA_DIR)

for p in [TABLES_JSON, TRAIN_JSON, DEV_JSON, DB_ROOT]:
    print(f"{p.name:<20} exists? {p.exists()}  -> {p}")

# Hard requirement for next steps (building schema cards)
assert TABLES_JSON.exists(), (
    "tables.json not found.\n"
    "Fix DATA_DIR or move your Spider data into Agent_A/spider_data/ "
    "(or Agent_A/spider_data/spider_data/)."
)


Python: 3.12.7 | Platform: Windows
PROJECT: c:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A
DATA_DIR: c:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A\spider_data\spider_data
tables.json          exists? True  -> c:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A\spider_data\spider_data\tables.json
train_spider.json    exists? True  -> c:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A\spider_data\spider_data\train_spider.json
dev.json             exists? True  -> c:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A\spider_data\spider_data\dev.json
database             exists? True  -> c:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A\spider_data\spider_data\database


In [6]:
%pip install -q --upgrade pip
%pip install -q numpy scikit-learn
%pip install -q sentence-transformers
%pip install -q openai

import numpy, sklearn, sys
print("NumPy:", numpy.__version__, "| scikit-learn:", sklearn.__version__)
print("Python executable:", sys.executable)



Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
NumPy: 2.3.2 | scikit-learn: 1.7.1
Python executable: c:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A\agent\Scripts\python.exe


In [5]:
# Create agent_a_core.py (OpenAI embeddings only) and import it
from pathlib import Path
import importlib

module_code = r'''
from __future__ import annotations
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Tuple
from pathlib import Path
import json, os
import numpy as np

# ------------------------------
# OpenAI Embedding backend only
# ------------------------------

class OpenAIBackend:
    """
    OpenAI embeddings backend.
    - Default model: text-embedding-3-small
    - Expects OPENAI_API_KEY in environment.
    """
    name: str = "openai"
    dim: Optional[int] = None

    def __init__(self, model: str = "text-embedding-3-small", batch_size: int = 128):
        try:
            from openai import OpenAI  # type: ignore
        except Exception as e:
            raise RuntimeError("OpenAI SDK not available. Install 'openai' (>= 1.0).") from e
        if not os.getenv("OPENAI_API_KEY"):
            raise RuntimeError("OPENAI_API_KEY not set in environment.")
        self.client = OpenAI()
        self.model = model
        self.batch_size = batch_size

    def encode(self, texts: List[str]) -> np.ndarray:
        out = []
        bs = self.batch_size
        for i in range(0, len(texts), bs):
            chunk = texts[i:i+bs]
            resp = self.client.embeddings.create(model=self.model, input=chunk)
            vecs = [np.asarray(d.embedding, dtype="float32") for d in resp.data]
            out.append(np.vstack(vecs))
        arr = np.vstack(out) if out else np.zeros((0, 1536), dtype="float32")
        # Normalize for cosine similarity with inner product
        norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
        arr = arr / norms
        return arr

    def encode_one(self, text: str) -> np.ndarray:
        return self.encode([text])[0:1]


# ------------------------------
# Schema cards from Spider tables.json (unchanged)
# ------------------------------

def build_schema_cards(tables_json_path: Path, max_cols_per_table: int = 12) -> Dict[str, Any]:
    """
    Read Spider tables.json and produce a compact text summary per DB:
      return { 'db_ids': [...], 'cards': [...] }
    Each 'card' is multiline text that lists tables, columns(types),
    and foreign key hints to boost semantic matching.
    """
    raw = json.loads(tables_json_path.read_text(encoding="utf-8"))
    tables_all = raw["db"] if isinstance(raw, dict) and "db" in raw else raw

    db_ids: List[str] = []
    cards: List[str] = []

    for db in tables_all:
        db_id = db["db_id"]
        tnames = db.get("table_names_original") or db.get("table_names", [])
        columns = db.get("column_names_original") or db.get("column_names", [])  # [[table_id, col], ...]
        ctypes = db.get("column_types", [])
        fks = db.get("foreign_keys", [])  # pairs of column indices

        # table -> [(col_name, col_type), ...]
        by_table: Dict[str, List[Tuple[str,str]]] = {}
        for idx, (tid, col) in enumerate(columns):
            if tid >= 0:
                tname = tnames[tid]
                by_table.setdefault(tname, []).append((col, ctypes[idx] if idx < len(ctypes) else "text"))

        lines = [f"[DB:{db_id}]"]
        for t in tnames:
            cols = ", ".join([f"{c}({ct})" for c, ct in (by_table.get(t, [])[:max_cols_per_table])])
            lines.append(f"- {t}: {cols}")

        if fks:
            # add FK hints
            def col_name(ci):
                if 0 <= ci < len(columns):
                    tid, col = columns[ci]
                    if tid >= 0:
                        return f"{tnames[tid]}.{col}"
                return f"col{ci}"
            fk_pairs = [f"{col_name(a)} -> {col_name(b)}" for a, b in fks]
            lines.append("ForeignKeys: " + "; ".join(fk_pairs))

        cards.append("\\n".join(lines))
        db_ids.append(db_id)

    return {"db_ids": db_ids, "cards": cards}


# ------------------------------
# Agent A: propose & auto_select
# (OpenAI embeddings)
# ------------------------------

@dataclass
class Candidate:
    db_id: str
    score: float

class AgentA:
    """
    Minimal Agent for DB selection:
      - Embeds DB cards once using OpenAI embeddings.
      - For a question, embeds the query and returns top-K DB candidates by cosine similarity.
      - auto_select() chooses the top-1 by similarity among provided candidates.
    """
    def __init__(self,
                 db_cards: List[str],
                 db_ids: List[str],
                 embedder: Optional[OpenAIBackend] = None):
        assert len(db_cards) == len(db_ids), "db_cards and db_ids must align"
        self.db_cards = db_cards
        self.db_ids = db_ids
        self.embedder = embedder or OpenAIBackend()

        # Precompute DB vectors
        self.X_db = self.embedder.encode(self.db_cards)

    def _topk_from_matrix(self, M: np.ndarray, q_vec: np.ndarray, k: int):
        # cosine == dot for L2-normalized vectors
        sims = (M @ q_vec.T).ravel()
        idx = np.argsort(-sims)[:k]
        return sims[idx], idx

    def propose(self, question: str, top: int = 3, pool: int = 20) -> Dict[str, Any]:
        q_vec = self.embedder.encode_one(question)
        sims_db, idx_db = self._topk_from_matrix(self.X_db, q_vec, k=min(pool, len(self.db_ids)))

        cands: List[Candidate] = []
        used = set()
        for score, idx in zip(sims_db, idx_db):
            db_id = self.db_ids[int(idx)]
            if db_id in used:
                continue
            cands.append(Candidate(db_id=db_id, score=float(score)))
            used.add(db_id)
            if len(cands) == top:
                break

        return {
            "query": question,
            "candidates": [c.__dict__ for c in cands],
            "instruction": "Reply with the db_id to use. If unsure, reply 'auto'."
        }

    def auto_select(self, question: str, candidates: List[Dict[str, Any]]) -> Dict[str, Any]:
        """
        Pick the best candidate purely by query→DB-card similarity.
        """
        if not candidates:
            raise ValueError("No candidates provided to auto_select().")
        q_vec = self.embedder.encode_one(question)

        # Build mask for candidate ids
        cand_ids = [c["db_id"] for c in candidates]
        id_to_idx = {db_id: i for i, db_id in enumerate(self.db_ids)}

        # Compute similarity only over candidate set
        sims = []
        for db_id in cand_ids:
            j = id_to_idx.get(db_id)
            if j is None:
                continue
            s = float((self.X_db[j:j+1] @ q_vec.T).ravel()[0])
            sims.append((db_id, s))

        if not sims:
            # Fallback: first provided candidate
            return {"selected_db_id": candidates[0]["db_id"], "reason": "fallback: first candidate (no sims)"}

        sims.sort(key=lambda x: -x[1])
        best_id, best_s = sims[0]
        return {"selected_db_id": best_id, "reason": f"highest DB-card similarity {best_s:.4f} among candidates"}
'''

# write the module
path = Path("agent_a_core.py")
path.write_text(module_code, encoding="utf-8")
print("Wrote:", path.resolve())

# import to verify
import agent_a_core
importlib.reload(agent_a_core)
print("Imported agent_a_core  Backend:", getattr(agent_a_core, "OpenAIBackend").__name__)


Wrote: C:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A\agent_a_core.py
Imported agent_a_core  Backend: OpenAIBackend


In [8]:
# Build schema cards from Spider tables.json
import json
from agent_a_core import build_schema_cards

# Build cards (uses tables.json from Cell 1)
cards_meta = build_schema_cards(TABLES_JSON, max_cols_per_table=100)
db_ids, cards = cards_meta["db_ids"], cards_meta["cards"]

# Basic sanity checks
assert len(db_ids) == len(cards), "Mismatch: db_ids and cards should be aligned"
assert len(set(db_ids)) == len(db_ids), "Duplicate db_ids found — check your tables.json"

# Persist for reuse
out_path = OUTPUT_DIR / "db_cards.json"
out_path.write_text(json.dumps(cards_meta, ensure_ascii=False, indent=2), encoding="utf-8")

print(f"Built schema cards for {len(db_ids)} databases")
print(f"Saved to: {out_path}")

# Preview a couple of cards (trimmed)
for i in range(min(2, len(cards))):
    print("\n--- Card", i, "for DB:", db_ids[i], "---")
    preview = cards[i]
    print(preview[:800] + ("..." if len(preview) > 800 else ""))


Built schema cards for 166 databases
Saved to: c:\Users\aswat\OneDrive - UWA\Desktop\sem4\CITS5553\Agent_A\output\db_cards.json

--- Card 0 for DB: perpetrator ---
[DB:perpetrator]\n- perpetrator: Perpetrator_ID(number), People_ID(number), Date(text), Year(number), Location(text), Country(text), Killed(number), Injured(number)\n- people: People_ID(number), Name(text), Height(number), Weight(number), Home Town(text)\nForeignKeys: perpetrator.People_ID -> people.People_ID

--- Card 1 for DB: college_2 ---
[DB:college_2]\n- classroom: building(text), room_number(text), capacity(number)\n- department: dept_name(text), building(text), budget(number)\n- course: course_id(text), title(text), dept_name(text), credits(number)\n- instructor: ID(text), name(text), dept_name(text), salary(number)\n- section: course_id(text), sec_id(text), semester(text), year(number), building(text), room_number(text), time_slot_id(text)\n- teaches: ID(text), course_id(text), sec_id(text), semester(text), year(num

In [9]:
# SDK + key
import os
print("Has OPENAI_API_KEY:", bool(os.getenv("OPENAI_API_KEY")))
try:
    from openai import OpenAI
    _ = OpenAI()
    print("OpenAI SDK import OK")
except Exception as e:
    raise RuntimeError("OpenAI SDK missing/outdated. Run: %pip install -q openai") from e


Has OPENAI_API_KEY: True
OpenAI SDK import OK


In [10]:
# OpenAI embedder on DB cards (no QAs)
from time import time
from agent_a_core import OpenAIBackend, AgentA

OPENAI_MODEL = "text-embedding-3-large"   # or "text-embedding-3-small" (cheaper)
USE_QA = False  # keep cost low: only cards

embedder_openai = OpenAIBackend(model=OPENAI_MODEL)

t0 = time()
agent_openai = AgentA(
    db_cards=cards,
    db_ids=db_ids,
    embedder=embedder_openai
)
print(f"[OpenAI {OPENAI_MODEL}] Agent ready. DBs: {len(db_ids)} | Init: {time()-t0:.2f}s")



[OpenAI text-embedding-3-large] Agent ready. DBs: 166 | Init: 7.11s


In [13]:
# Simple & efficient LLM reranker (cards-only is fine)
from openai import OpenAI
import json, re

client = OpenAI()

def llm_pick_best_db(
    query: str,
    candidates: list,
    cards: list,
    db_ids: list,
    model: str = "gpt-4o-mini",
    max_schema_chars: int = 900,      # keep tokens low
    max_schema_lines: int = 12,       # or trim by lines
    max_keywords: int = 20            # from "Tags:" line if present
) -> dict:
    """
    candidates: agent.propose(...)[\"candidates\"]
    returns: { "best_db_id": "...", "reason": "..." }
    """

    if not candidates:
        raise ValueError("llm_pick_best_db: no candidates provided")

    # Build compact candidate blocks: db_id + (optional) Keywords + trimmed Schema
    blocks = []
    valid_ids = []
    for i, c in enumerate(candidates, 1):
        db = c["db_id"]
        valid_ids.append(db)
        card = cards[db_ids.index(db)] if db in db_ids else ""

        # pull Keywords from a "Tags:" line if present
        m = re.search(r"(?mi)^Tags:\s*(.*)$", card)
        keywords = [t.strip() for t in m.group(1).split(",")] if m else []
        keywords = ", ".join(keywords[:max_keywords]) if keywords else "(none)"

        # trim schema by lines then chars
        lines = [ln for ln in card.split("\n") if ln.strip()]
        schema = "\n".join(lines[:max_schema_lines])[:max_schema_chars]

        blocks.append(f"{i}) db_id={db}\nKeywords: {keywords}\nSchema:\n{schema}")

    system = (
        "You are a strict database router. Choose EXACTLY one db_id from the candidates.\n"
        "Decision rules:\n"
        "1) Prefer schemas whose COLUMN/TABLE NAMES or Keywords literally match the user's terms.\n"
        "2) Ensure required relations/joins exist for the question.\n"
        "3) Break ties by the most specific coverage. Do not invent db_ids.\n"
        'Return strict JSON: {"best_db_id": "...", "reason": "..."} with reason ≤ 25 words.'
    )

    user = (
        f"Question: {query}\n\n"
        "Candidates:\n" + "\n\n".join(blocks) + "\n\n"
        "Pick the single best db_id."
    )

    resp = client.chat.completions.create(
        model=model,
        messages=[{"role": "system", "content": system},
                  {"role": "user", "content": user}],
        response_format={"type": "json_object"},
        temperature=0
    )

    # Parse + validate
    try:
        data = json.loads(resp.choices[0].message.content)
    except Exception:
        # fallback: pick top-1 candidate
        return {"best_db_id": candidates[0]["db_id"], "reason": "fallback: invalid JSON from model"}

    if data.get("best_db_id") not in set(valid_ids):
        # guardrail: ensure selection is one of the candidates
        return {"best_db_id": candidates[0]["db_id"], "reason": "fallback: invalid db_id from model"}

    # optionally clip reason
    reason = str(data.get("reason", ""))[:150]
    return {"best_db_id": data["best_db_id"], "reason": reason}


In [14]:
# Specific test questions + expected db_id
tests = [
     ("List each customer’s total checking+saving balance and return the top 5.", "small_bank_1"),
    ("Which customer has the highest savings balance and what is their checking balance?", "small_bank_1"),
    ("Show customers and total transaction count per card, highest first.", "customers_card_transactions"),

    # music
    ("Albums released after 2015 with their label and type.", "music_2"),
    ("Songs with rating >= 9 and their genres and artists.", "music_1"),
    ("Artists whose songs had Weeks_on_Top >= 8 along with Issue_Date.", "music_4"),

    # education / university
    ("Instructors with salary > 100000 and their department names.", "college_2"),
    ("Courses with >=4 credits and the number of sections offered per year.", "college_2"),
    ("Students and their total credits (tot_cred), descending.", "college_2"),

    # flights (note: different schemas!)
    ("Flights with origin 'Dallas' and destination 'Seattle' in year 2017.", "flight_1"),
    ("List routes with codeshare='Y' and show airline callsign.", "flight_4"),
    ("Flights from AirportCode 'DFW' to 'LAX' with airline name.", "flight_2"),
    ("For each operate_company, count total flights operated with average altitude.", "flight_company"),

    # people / sport / competition
    ("Body builder with the highest Total and their Birth_Place.", "body_builder"),
    ("Races held at tracks with Seating > 50000 and the track location.", "race_track"),
    ("Clubs with the most Gold medals and their Total medals.", "sports_competition"),

    # authors / papers / academic
    ("Papers written by author last name 'Smith' and their institutions.", "icfp_1"),
    ("For each domain, count publications after year 2010, highest first.", "academic"),
    ("Top 5 keywords by number of publications.", "academic"),

    # weather / cities / events
    ("For each city, list GDP and Regional_Population sorted by GDP desc.", "city_record"),
    ("Average July temperature for cities that hosted at least one match.", "city_record"),
    ("Stations with wind_speed_mph > 20 on 'Monday' and the trains that stop there.", "station_weather"),

    # government / elections
    ("Election rows for County_name 'Montgomery' including its District.", "election"),
    ("Count of events per Service_Type_Code in Alabama local government.", "local_govt_in_alabama"),

    # health / medicine
    ("Medicines that interact with enzyme 'CYP3A4'.", "medicine_enzyme_interaction"),
    ("Customers with an available policy and settlement amounts paid to them.", "insurance_fnol"),

    # retail / orders
    ("For each customer, compute total order value and sort descending.", "department_store"),
    ("Total value purchased per supplier (Product_Suppliers.total_value_purchased).", "department_store"),
    ("Stores that sell products with dpi > 600 and their product count.", "store_product"),
    ("Members with Level >= 3 and total pounds spent per branch.", "shop_membership"),

    # logistics / devices / markets
    ("Phones with Memory_in_G >= 128 and total stock across all markets.", "phone_market"),
    ("Total stock per carrier across all markets, descending.", "phone_market"),

    # maintenance / assets
    ("Engineers with a given skill who visited any asset under maintenance contract.", "assets_maintenance"),
    ("Assets and number of recorded faults, highest first.", "assets_maintenance"),

    # students / courses / assessments
    ("Students registered for a course but with no attendance records.", "student_assessment"),
    ("Candidates with PASS assessment outcome and their qualification.", "student_assessment"),

    # animals / clinics
    ("Total treatment cost per dog and the owner's full name.", "dog_kennels"),
    ("Number of treatments per breed and average treatment cost.", "dog_kennels"),

    # employment / companies
    ("People employed at any company in the year 2010 and the company name.", "company_employee"),

    # agriculture / farm
    ("Farm with the highest Total_Cattle in the most recent year.", "farm"),
    ("For each farm, list Year and total pigs and cows combined.", "farm"),

    # solvency / events (regulatory)
    ("Count events per Channel_ID and list the top 5.", "solvency_ii"),
    ("Assets involved in events and the related Locations.", "solvency_ii"),

    # misc domains
    ("Swimmer with the best Time and the city of the stadium where they competed.", "swimming"),
    ("Debates with Num_of_Audience > 1000 and the age of the affirmative debater.", "debate"),
    ("List friends of 'Alice' in the year 2015.", "network_2"),
    ("Students living in dorms with the amenity 'Gym'.", "dorm_1"),
    ("Editors older than 50 serving on any journal committee and the journal theme.", "journal_committee"),
    ("Captains and their ship names for ships built before 1950.", "ship_1"),
    ("Artworks that won at a festival and the festival year.", "entertainment_awards"),
    ("Students who have the allergy type 'Peanut'.", "allergy_1"),
    ("Students who own a pet of type 'Cat'.", "pets_1"),
    ("Bookings returned_late_yn = 'Y' and the customer full names.", "products_for_hire"),
    ("Candidates and their support rates grouped by Poll_Source.", "candidate_poll"),
    ("Top 5 customers by total invoice amount in Chinook.", "chinook_1"),
    ("Browsers compatible since year >= 2015 for accelerator named 'SpeedX'.", "browser_web"),
    ("Trains on railway named 'Trans-Australia' and the manager responsible.", "railway"),
    ("Rooms with maxOccupancy > 4 and total reserved nights.", "inn_1"),
    ("Visitors who spent more than 100 at any museum and the museum name.", "museum_visit"),
    ("Courses under subject 'Math' and enrolled student counts.", "e_learning"),
]
len(tests)


60

In [16]:
# Batch evaluation with LLM pick (OpenAI embeddings → top-k; LLM chooses best)
import pandas as pd

# Pick the agent/cards you actually have in memory
AGENT = (
    agent_openai_aug if 'agent_openai_aug' in globals()
    else agent_openai_qas if 'agent_openai_qas' in globals()
    else agent_openai
)
CARDS = cards_aug if 'cards_aug' in globals() else cards
DB_IDS = db_ids

# Ensure the LLM reranker exists
assert 'llm_pick_best_db' in globals(), "Define llm_pick_best_db(...) first."

LLM_MODEL = "gpt-4o-mini"
TOP_K = 3
POOL = 50

rows = []
for q, expected in tests:
    # 1) retrieve candidates via embeddings
    prop = AGENT.propose(q, top=TOP_K, pool=POOL)
    cands = prop.get("candidates", [])
    top_cands = [c["db_id"] for c in cands]

    # handle edge case: no candidates
    if not cands:
        rows.append({
            "query": q,
            "expected_db": expected,
            "picked_db": None,
            "correct": False,
            "candidates": [],
            "llm_reason": "no candidates from proposer",
        })
        continue

    # 2) let the LLM pick the best among candidates
    decision = llm_pick_best_db(q, cands, CARDS, DB_IDS, model=LLM_MODEL)
    picked = decision.get("best_db_id", "")
    reason = decision.get("reason", "")

    rows.append({
        "query": q,
        "expected_db": expected,
        "picked_db": picked,
        "correct": (picked == expected),
        "candidates": top_cands,
        "llm_reason": reason[:200],
    })

df = pd.DataFrame(rows)
acc = df["correct"].mean() if len(df) else 0.0
print(f"\nAccuracy: {df['correct'].sum()} / {len(df)} = {acc:.3f}")

# Show mistakes first, then correct
df_sorted = pd.concat([df[~df.correct], df[df.correct]], ignore_index=True)
display(df_sorted[["query","expected_db","picked_db","correct","candidates","llm_reason"]])




Accuracy: 54 / 60 = 0.900


Unnamed: 0,query,expected_db,picked_db,correct,candidates,llm_reason
0,Instructors with salary > 100000 and their dep...,college_2,college_1,False,"[department_management, college_1, student_1]","It includes EMPLOYEE and DEPARTMENT tables, al..."
1,"Students and their total credits (tot_cred), d...",college_2,college_1,False,"[student_transcripts_tracking, college_1, stud...",Contains 'STUDENT' and 'STU_HRS' for total cre...
2,"For each customer, compute total order value a...",department_store,tracking_orders,False,"[tracking_orders, customers_and_invoices, cust...","It includes Orders and Customers, allowing for..."
3,Total value purchased per supplier (Product_Su...,department_store,products_gen_characteristics,False,"[products_gen_characteristics, manufactory_1, ...",No other candidate has a relevant schema for t...
4,Students registered for a course but with no a...,student_assessment,e_learning,False,"[e_learning, student_assessment, student_trans...",It includes Students and Student_Course_Enrolm...
5,Count events per Channel_ID and list the top 5.,solvency_ii,program_share,False,"[program_share, tvshow, news_report]",Contains Channel_ID and relevant broadcast tab...
6,List each customer’s total checking+saving bal...,small_bank_1,small_bank_1,True,"[small_bank_1, loan_1, customers_card_transact...",It has the required accounts and balances for ...
7,Which customer has the highest savings balance...,small_bank_1,small_bank_1,True,"[small_bank_1, loan_1, customers_card_transact...",It contains savings and checking balances link...
8,Show customers and total transaction count per...,customers_card_transactions,customers_card_transactions,True,"[customers_card_transactions, customers_and_in...",It contains relevant tables for customers and ...
9,Albums released after 2015 with their label an...,music_2,music_2,True,"[music_2, music_4, music_1]","Contains Albums with Year, Label, and Type mat..."
