Using SQLite to store the metadata for the user queries

In [1]:
import os
import json
import sqlite3
from glob import glob
from tqdm import tqdm

DB_PATH = "queries.db"
DATA_DIR = "query_store"  # Or whatever your folder is

def init_db():
    conn = sqlite3.connect(DB_PATH)
    cur = conn.cursor()
    cur.execute("""
    CREATE TABLE IF NOT EXISTS queries (
        ref_id TEXT PRIMARY KEY,
        query TEXT,
        sql TEXT,
        brand_id TEXT,
        brand_name TEXT,
        category TEXT,
        instructions TEXT,
        instructions_comment TEXT,
        tables TEXT,
        columns TEXT,
        json_blob TEXT
    )
    """)
    conn.commit()
    return conn, cur

def insert_json_file(json_file, cur):
    with open(json_file, "r") as f:
        data = json.load(f)
    
    cur.execute("""
        INSERT OR REPLACE INTO queries VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        data.get("ref_id"),
        data.get("query"),
        data.get("sql"),
        data.get("brand_id"),
        data.get("brand_name"),
        data.get("category"),
        data.get("instructions"),
        data.get("instructions_comment"),
        json.dumps(data.get("tables", [])),
        json.dumps(data.get("columns", [])),
        json.dumps(data)  # Full blob
    ))

def bulk_load_json_files():
    conn, cur = init_db()
    json_files = glob(os.path.join(DATA_DIR, "**", "*.json"), recursive=True)
    for json_file in tqdm(json_files, desc="Loading JSONs"):
        try:
            insert_json_file(json_file, cur)
        except Exception as e:
            print(f"Failed on {json_file}: {e}")
    conn.commit()
    conn.close()

if __name__ == "__main__":
    bulk_load_json_files()


Loading JSONs: 100%|██████████| 27/27 [00:00<00:00, 10770.99it/s]


# convert bq.txt to json

In [1]:
import json

# Step 1: Load text from file
with open("/home/prakhar/luke-dev/txt2sql_methods/RaTsql/data/bq_metadata.txt", "r", encoding="utf-8") as f:
    txt_content = f.read()

# Step 2: Parse the text as JSON
try:
    json_data = json.loads(txt_content)
except json.JSONDecodeError as e:
    raise ValueError(f"❌ Failed to parse JSON: {e}")

# Optional: Pretty-print to console
print(json.dumps(json_data, indent=2))

# Step 3: Save to .json file
with open("/home/prakhar/luke-dev/txt2sql_methods/RaTsql/data/bq_metadata.json", "w", encoding="utf-8") as f:
    json.dump(json_data, f, indent=2)


{
  "schema": {
    "google_ads_campaign_asset_performance_2643649617": {
      "asset_id": "INT64",
      "campaign_id": "INT64",
      "customer_id": "INT64",
      "asset_final_urls": "STRING",
      "asset_name": "STRING",
      "campaign_asset_field_type": "AD_IMAGE, BOOK_ON_GOOGLE, BUSINESS_LOGO, BUSINESS_MESSAGE, BUSINESS_NAME, CALL, CALLOUT, CALL_TO_ACTION_SELECTION, DEMAND_GEN_CAROUSEL_CARD, DESCRIPTION, HEADLINE, HOTEL_CALLOUT, HOTEL_PROPERTY, LANDSCAPE_LOGO, LEAD_FORM, LOGO, LONG_HEADLINE, MANDATORY_AD_TEXT, MARKETING_IMAGE, MEDIA_BUNDLE, MOBILE_APP, PORTRAIT_MARKETING_IMAGE, PRICE, PROMOTION, SITELINK, SQUARE_MARKETING_IMAGE, STRUCTURED_SNIPPET, TALL_PORTRAIT_MARKETING_IMAGE, UNKNOWN, UNSPECIFIED, VIDEO, YOUTUBE_VIDEO",
      "campaign_asset_primary_status": "ELIGIBLE, LIMITED, NOT_ELIGIBLE, PAUSED, PENDING, REMOVED, UNKNOWN, UNSPECIFIED",
      "campaign_asset_source": "ADVERTISER, AUTOMATICALLY_CREATED, UNKNOWN, UNSPECIFIED",
      "metrics_clicks": "INT64",
      "metric