In [2]:
 %sql postgresql://postgres:sowmya2004@localhost:5432/mydb

In [2]:
!python -m pip install ipython-sql psycopg2




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
%load_ext sql

### JSONB INSERTION

In [None]:
import psycopg2, json, csv, time

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Path to your dataset (update this to your local file path)
csv_path = r"datasets\yelp_business.csv"

# Read CSV file
with open(csv_path, newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    rows = list(reader)

print(f"Loaded {len(rows)} rows from dataset")

# Create table
cur.execute("""
    DROP TABLE IF EXISTS jsonb_table;
    CREATE TABLE jsonb_table (
        id SERIAL PRIMARY KEY,
        data JSONB
    );
""")
conn.commit()

# Insert JSONB data
print("Inserting JSONB data...")
start = time.perf_counter()

for row in rows:
    cur.execute("INSERT INTO jsonb_table (data) VALUES (%s);", (json.dumps(row),))

conn.commit()
insert_time = (time.perf_counter() - start) * 1000
print(f"‚úÖ JSONB insert time: {insert_time:.2f} ms")

# Measure disk space usage
cur.execute("""
    SELECT 
        pg_size_pretty(pg_total_relation_size('jsonb_table')) AS total_size,
        pg_size_pretty(pg_relation_size('jsonb_table')) AS data_size,
        pg_size_pretty(pg_total_relation_size('jsonb_table') - pg_relation_size('jsonb_table')) AS index_overhead;
""")
size_info = cur.fetchone()

print("\nüì¶ Disk Usage Report:")
print(f"  ‚Üí Total Table Size : {size_info[0]}")
print(f"  ‚Üí Data Only Size   : {size_info[1]}")
print(f"  ‚Üí Index Overhead   : {size_info[2]}")

cur.close()
conn.close()
print("\n‚úÖ Done loading JSONB data!")


Loaded 150346 rows from dataset
Inserting JSONB data...
‚úÖ JSONB insert time: 17700.25 ms

üì¶ Disk Usage Report:
  ‚Üí Total Table Size : 265 MB
  ‚Üí Data Only Size   : 262 MB
  ‚Üí Index Overhead   : 3416 kB

‚úÖ Done loading JSONB data!


JSONB INSERTION RESULTS (INSERTION TIMES ACROSS 5 RUNS AND DISK SPACE)

34231.11 ms
40995.91 ms
42471.92 ms
43681.28 ms
40305.42 ms

Loaded 150346 rows from dataset
Inserting JSONB data...
‚úÖ JSONB insert time: 37550.10 ms

üì¶ Disk Usage Report:
  ‚Üí Total Table Size : 265 MB
  ‚Üí Data Only Size   : 262 MB
  ‚Üí Index Overhead   : 3416 kB

‚úÖ Done loading JSONB data!

### HSTORE INSERTION


In [None]:
import psycopg2, csv, time

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Enable HSTORE extension
cur.execute("CREATE EXTENSION IF NOT EXISTS hstore;")
conn.commit()

# Path to your dataset (update this path)
csv_path = r"datasets\yelp_business.csv"

# Read CSV file
with open(csv_path, newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    rows = list(reader)

print(f"Loaded {len(rows)} rows from dataset")

# Create the HSTORE table
cur.execute("""
    DROP TABLE IF EXISTS hstore_table;
    CREATE TABLE hstore_table (
        id SERIAL PRIMARY KEY,
        data HSTORE
    );
""")
conn.commit()

# --- Safe escaping function ---
def escape_hstore_value(value):
    """Escape backslashes and double quotes safely for hstore."""
    if value is None:
        return ''
    value = str(value)
    value = value.replace('\\', '\\\\').replace('"', '\\"')
    return value

# Insert HSTORE data
print("Inserting HSTORE data...")
start = time.perf_counter()

for row in rows:
    # Build hstore string safely
    hstore_pairs = [f'"{k}"=>"{escape_hstore_value(v)}"' for k, v in row.items()]
    hstore_str = ','.join(hstore_pairs)
    cur.execute("INSERT INTO hstore_table (data) VALUES (%s::hstore);", (hstore_str,))

conn.commit()
insert_time = (time.perf_counter() - start) * 1000
print(f"‚úÖ HSTORE insert time: {insert_time:.2f} ms")

# Measure disk usage
cur.execute("""
    SELECT 
        pg_size_pretty(pg_total_relation_size('hstore_table')) AS total_size,
        pg_size_pretty(pg_relation_size('hstore_table')) AS data_size,
        pg_size_pretty(pg_total_relation_size('hstore_table') - pg_relation_size('hstore_table')) AS index_overhead;
""")
size_info = cur.fetchone()

print("\nüì¶ Disk Usage Report:")
print(f"  ‚Üí Total Table Size : {size_info[0]}")
print(f"  ‚Üí Data Only Size   : {size_info[1]}")
print(f"  ‚Üí Index Overhead   : {size_info[2]}")

cur.close()
conn.close()
print("\n‚úÖ Done loading HSTORE data!")


Loaded 150346 rows from dataset
Inserting HSTORE data...
‚úÖ HSTORE insert time: 19109.28 ms

üì¶ Disk Usage Report:
  ‚Üí Total Table Size : 278 MB
  ‚Üí Data Only Size   : 274 MB
  ‚Üí Index Overhead   : 3416 kB

‚úÖ Done loading HSTORE data!


In [None]:
HSTORE INSERTION RESULTS (INSERTION TIMES ACROSS 5 RUNS AND DISK SPACE)

36486.54 ms
37331.38 ms
36852.85 ms
36790.45 ms
39392.87 ms

 Disk Usage Report:
  ‚Üí Total Table Size : 278 MB
  ‚Üí Data Only Size   : 274 MB
  ‚Üí Index Overhead   : 3416 kB

‚úÖ Done loading HSTORE data!


### Vanilla POstgresql insertion

In [None]:
import psycopg2, csv, time

# Database connection
conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Path to dataset
csv_path = r"datasets\yelp_business.csv"

# Read CSV
with open(csv_path, newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    rows = list(reader)
    columns = reader.fieldnames

print(f"Loaded {len(rows)} rows from dataset with {len(columns)} columns")

# Drop and recreate table dynamically
cur.execute("DROP TABLE IF EXISTS vanilla_table;")

# Build CREATE TABLE dynamically (all columns as TEXT)
create_cols = ", ".join([f'"{col}" TEXT' for col in columns])
create_sql = f"CREATE TABLE vanilla_table (id SERIAL PRIMARY KEY, {create_cols});"

cur.execute(create_sql)
conn.commit()
print("‚úÖ Table created dynamically based on CSV columns.")

# Build INSERT query dynamically
col_names = ", ".join([f'"{c}"' for c in columns])
placeholders = ", ".join(["%s"] * len(columns))
insert_sql = f"INSERT INTO vanilla_table ({col_names}) VALUES ({placeholders});"

# Insert data
print("Inserting rows into vanilla_table...")
start = time.perf_counter()

for row in rows:
    values = [row[c] for c in columns]
    cur.execute(insert_sql, values)

conn.commit()
insert_time = (time.perf_counter() - start) * 1000
print(f"‚úÖ Inserted {len(rows)} rows in {insert_time:.2f} ms")

# Measure disk usage
cur.execute("""
    SELECT 
        pg_size_pretty(pg_total_relation_size('vanilla_table')) AS total_size,
        pg_size_pretty(pg_relation_size('vanilla_table')) AS data_size,
        pg_size_pretty(pg_total_relation_size('vanilla_table') - pg_relation_size('vanilla_table')) AS index_overhead;
""")
size_info = cur.fetchone()

print("\nüì¶ Disk Usage Report:")
print(f"  ‚Üí Total Table Size : {size_info[0]}")
print(f"  ‚Üí Data Only Size   : {size_info[1]}")
print(f"  ‚Üí Index Overhead   : {size_info[2]}")

cur.close()
conn.close()
print("\n‚úÖ Done! Vanilla table created and populated successfully.")


Loaded 150346 rows from dataset with 60 columns
‚úÖ Table created dynamically based on CSV columns.
Inserting rows into vanilla_table...
‚úÖ Inserted 150346 rows in 21767.58 ms

üì¶ Disk Usage Report:
  ‚Üí Total Table Size : 71 MB
  ‚Üí Data Only Size   : 68 MB
  ‚Üí Index Overhead   : 3368 kB

‚úÖ Done! Vanilla table created and populated successfully.


VANILLA INSERTION RESULTS AND DISK SPACE STORAGE

Loaded 150346 rows from dataset with 60 columns
‚úÖ Table created dynamically based on CSV columns.
Inserting rows into vanilla_table...
‚úÖ Inserted 150346 rows in 66411.17 ms

üì¶ Disk Usage Report:
  ‚Üí Total Table Size : 71 MB
  ‚Üí Data Only Size   : 68 MB
  ‚Üí Index Overhead   : 3368 kB

‚úÖ Done! Vanilla table created and populated successfully.

### Single Key Lookups

In [12]:
import psycopg2, time

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

queries = {
    "JSONB": "SELECT COUNT(*) FROM jsonb_table WHERE data ? 'attributes.WiFi';",
    "HSTORE": "SELECT COUNT(*) FROM hstore_table WHERE data ? 'attributes.WiFi';",
    "VANILLA": 'SELECT COUNT(*) FROM vanilla_table WHERE "attributes.WiFi" IS NOT NULL;'
}

for label, q in queries.items():
    start = time.perf_counter()
    cur.execute(q)
    result = cur.fetchone()[0]
    end = time.perf_counter()
    print(f"{label:<8} ‚Üí count: {result:>6}, time: {(end - start)*1000:.2f} ms")

cur.close()
conn.close()


JSONB    ‚Üí count: 150346, time: 408.33 ms
HSTORE   ‚Üí count: 150346, time: 35.43 ms
VANILLA  ‚Üí count: 150346, time: 11.32 ms


SINGLE KEY VALUE LOOKUP RESULTS ACROSS JSONB,HSTORE,VANILLA

JSONB    ‚Üí count: 150346, time: 282.97 ms
HSTORE   ‚Üí count: 150346, time: 36.20 ms
VANILLA  ‚Üí count: 150346, time: 9.95 ms

JSONB    ‚Üí count: 150346, time: 282.01 ms
HSTORE   ‚Üí count: 150346, time: 28.13 ms
VANILLA  ‚Üí count: 150346, time: 8.63 ms

JSONB    ‚Üí count: 150346, time: 299.52 ms
HSTORE   ‚Üí count: 150346, time: 29.94 ms
VANILLA  ‚Üí count: 150346, time: 8.70 ms

JSONB    ‚Üí count: 150346, time: 273.59 ms
HSTORE   ‚Üí count: 150346, time: 27.06 ms
VANILLA  ‚Üí count: 150346, time: 9.57 ms

JSONB    ‚Üí count: 150346, time: 289.53 ms
HSTORE   ‚Üí count: 150346, time: 28.03 ms
VANILLA  ‚Üí count: 150346, time: 8.73 ms

AVG JSONB: 285.524 ms
AVG HsTORE: 29.872 ms
AVG VANILLA: 9.116 ms

INDEX CREATION ACROSS JSON_B,HSTORE

In [16]:
%%sql
-- JSONB GIN index
CREATE INDEX IF NOT EXISTS jsonb_idx ON jsonb_table USING GIN (data jsonb_path_ops);

-- HSTORE GIN index
CREATE INDEX IF NOT EXISTS hstore_idx ON hstore_table USING GIN (data);



 * postgresql://postgres:***@localhost:5432/mydb
Done.
Done.


[]

### Multiple Key Existence Check

In [20]:
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

queries = {
    "JSONB": """
        SELECT COUNT(*) FROM jsonb_table
        WHERE data ?& ARRAY['attributes.WiFi', 'attributes.Alcohol'];
    """,
    "HSTORE": """
        SELECT COUNT(*) FROM hstore_table
        WHERE data ?& ARRAY['attributes.WiFi', 'attributes.Alcohol'];
    """,
    "VANILLA": """
        SELECT COUNT(*) FROM vanilla_table
        WHERE "attributes.WiFi" IS NOT NULL
          AND "attributes.Alcohol" IS NOT NULL;
    """
}

print("üîπ TC9: Check multiple key existence performance\n")

for label, query in queries.items():
    start = time.perf_counter()
    cur.execute(query)
    result = cur.fetchone()[0]
    end = time.perf_counter()
    print(f"{label:<8} ‚Üí count: {result:>6}, time: {(end - start)*1000:.2f} ms")

cur.close()
conn.close()


üîπ TC9: Check multiple key existence performance

JSONB    ‚Üí count: 150346, time: 446.25 ms
HSTORE   ‚Üí count: 150346, time: 38.29 ms
VANILLA  ‚Üí count: 150346, time: 73.39 ms


multiple key existence performance results

TC9: Check multiple key existence performance

JSONB    ‚Üí count: 150346, time: 434.88 ms
HSTORE   ‚Üí count: 150346, time: 42.46 ms
VANILLA  ‚Üí count: 150346, time: 80.77 ms

 TC9: Check multiple key existence performance

JSONB    ‚Üí count: 150346, time: 483.87 ms
HSTORE   ‚Üí count: 150346, time: 35.41 ms
VANILLA  ‚Üí count: 150346, time: 74.65 ms

 TC9: Check multiple key existence performance

JSONB    ‚Üí count: 150346, time: 422.58 ms
HSTORE   ‚Üí count: 150346, time: 37.46 ms
VANILLA  ‚Üí count: 150346, time: 72.14 ms

üîπ TC9: Check multiple key existence performance

JSONB    ‚Üí count: 150346, time: 308.57 ms
HSTORE   ‚Üí count: 150346, time: 27.15 ms
VANILLA  ‚Üí count: 150346, time: 55.32 ms

üîπ TC9: Check multiple key existence performance

JSONB    ‚Üí count: 150346, time: 340.58 ms
HSTORE   ‚Üí count: 150346, time: 28.18 ms
VANILLA  ‚Üí count: 150346, time: 53.31 ms

AVG JSONB: 398.096 ms
AVG HSTORE: 34.132 ms
AVG VANILLA: 67.238 ms

### Single Value Extraction

In [32]:
import psycopg2, time

DB_CONFIG = dict(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)

queries = {
    "JSONB": "SELECT data->>'stars' FROM jsonb_table;",
    "HSTORE": "SELECT data->'stars' FROM hstore_table;",
    "VANILLA": "SELECT stars FROM vanilla_table;"
}

print("üîπ TC10: Extract Single Value Performance (Cache-Isolated)\n")

for label, query in queries.items():
    # Step 1: Open a new connection for each query
    conn = psycopg2.connect(**DB_CONFIG)
    conn.autocommit = True
    cur = conn.cursor()

    # Step 2: Clear cache and query plans (session-level)
    try:
        cur.execute("DISCARD ALL;")
    except Exception as e:
        print(f"‚ö†Ô∏è Cache reset skipped for {label} ({e})")

    conn.autocommit = False  # revert back to transactional mode

    # Step 3: Time the execution
    start = time.perf_counter()
    cur.execute(query)
    result = cur.fetchall()
    end = time.perf_counter()

    print(f"{label:<8} ‚Üí retrieved {len(result):>6} rows, time: {(end - start)*1000:.2f} ms")

    # Step 4: Close connection to fully isolate caching
    cur.close()
    conn.close()


üîπ TC10: Extract Single Value Performance (Cache-Isolated)

JSONB    ‚Üí retrieved 150346 rows, time: 406.35 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 423.65 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 77.42 ms


Extracting single value performance Results

üîπüîπ TC10: Extract Single Value Performance

JSONB    ‚Üí retrieved 150346 rows, time: 458.60 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 430.63 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 89.55 ms

üîπ üîπ TC10: Extract Single Value Performance

JSONB    ‚Üí retrieved 150346 rows, time: 411.67 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 451.29 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 81.58 ms

üîπüîπ TC10: Extract Single Value Performance

JSONB    ‚Üí retrieved 150346 rows, time: 404.72 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 456.82 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 80.95 ms

üîπ üîπ TC10: Extract Single Value Performance

JSONB    ‚Üí retrieved 150346 rows, time: 393.33 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 447.31 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 83.75 ms

üîπ TC10: Extract Single Value Performance

JSONB    ‚Üí retrieved 150346 rows, time: 411.32 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 464.50 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 82.02 ms

AVG JSONB:415.928 ms
AVG HSTORE:450.11 ms
AVG VANILLA: 83.57 ms



### Multi Value Extraction

In [28]:
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

queries = {
    "JSONB": "SELECT data->>'city', data->>'state', data->>'stars' FROM jsonb_table;",
    "HSTORE": "SELECT data->'city', data->'state', data->'stars' FROM hstore_table;",
    "VANILLA": "SELECT city, state, stars FROM vanilla_table;"
}

print("üîπ TC11: Extract Multiple Values Performance\n")

for label, query in queries.items():
    start = time.perf_counter()
    cur.execute(query)
    rows = cur.fetchall()
    end = time.perf_counter()
    
    print(f"{label:<8} ‚Üí retrieved {len(rows):>6} rows, time: {(end - start)*1000:.2f} ms")

cur.close()
conn.close()


üîπ TC11: Extract Multiple Values Performance

JSONB    ‚Üí retrieved 150346 rows, time: 1175.57 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 1081.10 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 175.47 ms


Extract Multiple Values Performance Results

üîπ  TC11: Extract Multiple Values Performance

JSONB    ‚Üí retrieved 150346 rows, time: 932.79 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 779.27 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 125.55 ms

üîπ  TC11: Extract Multiple Values Performance

JSONB    ‚Üí retrieved 150346 rows, time: 780.66 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 807.88 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 117.02 ms

üîπ  TC11: Extract Multiple Values Performance

JSONB    ‚Üí retrieved 150346 rows, time: 1094.40 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 771.19 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 117.88 ms

üîπ TC11: Extract Multiple Values Performance

JSONB    ‚Üí retrieved 150346 rows, time: 783.39 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 768.75 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 113.34 ms

üîπüîπ TC11: Extract Multiple Values Performance

JSONB    ‚Üí retrieved 150346 rows, time: 794.69 ms
HSTORE   ‚Üí retrieved 150346 rows, time: 788.99 ms
VANILLA  ‚Üí retrieved 150346 rows, time: 108.96 ms

AVG JSONB: 877.186 ms
AVG HSTORE: 783.216 ms
VANILLA : 116.55 ms

### FILTER BY SPECIFIC VALUE QUERY

In [63]:
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

queries = {
    "JSONB": "SELECT COUNT(*) FROM jsonb_table WHERE (data->>'stars')::float = 4.0;",
    "HSTORE": "SELECT COUNT(*) FROM hstore_table WHERE (data->'stars')::float = 4.0;",
    "VANILLA": "SELECT COUNT(*) FROM vanilla_table WHERE stars::float = 4.0;"
}

print("üîπ TC12: Filter by Specific Value (stars = 4.0)\n")

for label, query in queries.items():
    start = time.perf_counter()
    cur.execute(query)
    count = cur.fetchone()[0]
    end = time.perf_counter()
    
    print(f"{label:<8} ‚Üí count: {count:>7}, time: {(end - start)*1000:.2f} ms")

cur.close()
conn.close()


üîπ TC12: Filter by Specific Value (stars = 4.0)

JSONB    ‚Üí count:   31125, time: 191.61 ms
HSTORE   ‚Üí count:   31125, time: 205.78 ms
VANILLA  ‚Üí count:   31125, time: 70.17 ms


FILTER BY SPECIFIC VALUE RESULTS

TC12: Filter by Specific Value (stars = 4.0)

JSONB    ‚Üí count:   31125, time: 194.06 ms
HSTORE   ‚Üí count:   31125, time: 189.01 ms
VANILLA  ‚Üí count:   31125, time: 69.89 ms

TC12: Filter by Specific Value (stars = 4.0)

JSONB    ‚Üí count:   31125, time: 200.82 ms
HSTORE   ‚Üí count:   31125, time: 190.04 ms
VANILLA  ‚Üí count:   31125, time: 69.70 ms

üîπ TC12: Filter by Specific Value (stars = 4.0)

JSONB    ‚Üí count:   31125, time: 212.13 ms
HSTORE   ‚Üí count:   31125, time: 181.97 ms
VANILLA  ‚Üí count:   31125, time: 66.60 ms

üîπ TC12: Filter by Specific Value (stars = 4.0)

JSONB    ‚Üí count:   31125, time: 189.50 ms
HSTORE   ‚Üí count:   31125, time: 184.53 ms
VANILLA  ‚Üí count:   31125, time: 63.29 ms

üîπ TC12: Filter by Specific Value (stars = 4.0)

JSONB    ‚Üí count:   31125, time: 191.61 ms
HSTORE   ‚Üí count:   31125, time: 205.78 ms
VANILLA  ‚Üí count:   31125, time: 70.17 ms

### RANGE FILTERING QUERY

In [42]:
# ---------- TC13: Range Filtering (Fixed Cache Reset) ----------
import psycopg2, time, statistics, os

DB_CONFIG = dict(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)

QUERIES = {
    "JSONB": """
        SELECT COUNT(*) 
        FROM jsonb_table 
        WHERE (data->>'stars')::float >= 4.0 
          AND (data->>'review_count')::int > 50 
          AND (data->>'is_open')::int = 1;
    """,
    "HSTORE": """
        SELECT COUNT(*) 
        FROM hstore_table 
        WHERE (data->'stars')::float >= 4.0 
          AND (data->'review_count')::int > 50 
          AND (data->'is_open')::int = 1;
    """,
    "VANILLA": """
        SELECT COUNT(*) 
        FROM vanilla_table 
        WHERE stars::float >= 4.0 
          AND review_count::int > 50 
          AND is_open::int = 1;
    """
}

def run_query(label, query, repeat=5):
    times = []
    for i in range(repeat):
        # 1Ô∏è‚É£ New connection
        conn = psycopg2.connect(**DB_CONFIG)
        conn.autocommit = True   # Allow DISCARD ALL outside transaction
        cur = conn.cursor()

        # 2Ô∏è‚É£ Try cache reset safely
        try:
            cur.execute("DISCARD ALL;")   # clear session cache
        except Exception as e:
            print(f"‚ö†Ô∏è Cache reset skipped ({e})")

        # 3Ô∏è‚É£ Disable autocommit again for normal transaction mode
        conn.autocommit = False

        # 4Ô∏è‚É£ Run test query
        start = time.perf_counter()
        cur.execute(query)
        count = cur.fetchone()[0]
        end = time.perf_counter()

        cur.close()
        conn.close()

        t = (end - start) * 1000
        times.append(t)
        print(f"  ‚û§ {label} Run {i+1}: {count} rows in {t:.2f} ms")

        # small delay between runs
        time.sleep(0.5)

    avg_time = statistics.mean(times)
    print(f"‚úÖ {label:<8} | Avg Time: {avg_time:.2f} ms | Runs: {repeat}\n")
    return avg_time


print("üîπ TC13: Range Filtering (Cache Isolated Runs ‚Äì Fixed)\n")

results = {}
for label, query in QUERIES.items():
    results[label] = run_query(label, query)

print("üìä Summary (Average Times)")
for label, avg in results.items():
    print(f"{label:<8}: {avg:.2f} ms")


üîπ TC13: Range Filtering (Cache Isolated Runs ‚Äì Fixed)

  ‚û§ JSONB Run 1: 13250 rows in 239.82 ms
  ‚û§ JSONB Run 2: 13250 rows in 234.57 ms
  ‚û§ JSONB Run 3: 13250 rows in 227.92 ms
  ‚û§ JSONB Run 4: 13250 rows in 250.23 ms
  ‚û§ JSONB Run 5: 13250 rows in 231.67 ms
‚úÖ JSONB    | Avg Time: 236.84 ms | Runs: 5

  ‚û§ HSTORE Run 1: 13250 rows in 229.15 ms
  ‚û§ HSTORE Run 2: 13250 rows in 225.39 ms
  ‚û§ HSTORE Run 3: 13250 rows in 222.83 ms
  ‚û§ HSTORE Run 4: 13250 rows in 220.11 ms
  ‚û§ HSTORE Run 5: 13250 rows in 226.54 ms
‚úÖ HSTORE   | Avg Time: 224.80 ms | Runs: 5

  ‚û§ VANILLA Run 1: 13250 rows in 153.60 ms
  ‚û§ VANILLA Run 2: 13250 rows in 145.20 ms
  ‚û§ VANILLA Run 3: 13250 rows in 71.61 ms
  ‚û§ VANILLA Run 4: 13250 rows in 156.93 ms
  ‚û§ VANILLA Run 5: 13250 rows in 81.32 ms
‚úÖ VANILLA  | Avg Time: 121.73 ms | Runs: 5

üìä Summary (Average Times)
JSONB   : 236.84 ms
HSTORE  : 224.80 ms
VANILLA : 121.73 ms


RANGE FILTERING PERFORMANCE RESULTS

üîπ TC13: Range Filtering (Cache Isolated Runs ‚Äì Fixed)

  ‚û§ JSONB Run 1: 13250 rows in 300.05 ms
  ‚û§ JSONB Run 2: 13250 rows in 277.26 ms
  ‚û§ JSONB Run 3: 13250 rows in 270.66 ms
  ‚û§ JSONB Run 4: 13250 rows in 265.07 ms
  ‚û§ JSONB Run 5: 13250 rows in 263.94 ms
‚úÖ JSONB    | Avg Time: 275.39 ms | Runs: 5

  ‚û§ HSTORE Run 1: 13250 rows in 273.17 ms
  ‚û§ HSTORE Run 2: 13250 rows in 281.79 ms
  ‚û§ HSTORE Run 3: 13250 rows in 278.29 ms
  ‚û§ HSTORE Run 4: 13250 rows in 266.57 ms
  ‚û§ HSTORE Run 5: 13250 rows in 262.41 ms
‚úÖ HSTORE   | Avg Time: 272.44 ms | Runs: 5

  ‚û§ VANILLA Run 1: 13250 rows in 95.71 ms
  ‚û§ VANILLA Run 2: 13250 rows in 100.44 ms
  ‚û§ VANILLA Run 3: 13250 rows in 93.80 ms
  ‚û§ VANILLA Run 4: 13250 rows in 104.85 ms
  ‚û§ VANILLA Run 5: 13250 rows in 89.62 ms
‚úÖ VANILLA  | Avg Time: 96.89 ms | Runs: 5

üìä Summary (Average Times)
JSONB   : 275.39 ms
HSTORE  : 272.44 ms
VANILLA : 96.89 ms

### AGGREGATED QUERY CONTAING AVG,GROUP BY,FILTERING ACROSS ALL THREE JSONB,HSTORE,VANILLA

In [49]:
# ---------- TC15: Aggregation Test (Cache Isolated & Type Safe) ----------
import psycopg2, time, statistics

# PostgreSQL Connection Config
DB_CONFIG = dict(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)

# -------------------------
#  üß© Aggregation Queries
# -------------------------
# We compute AVG(stars) grouped by city for each format.
# Casting ensures Postgres treats stars as numeric, not text.
QUERIES = {
    "HSTORE": """
        SELECT data->'city' AS city,
               AVG((data->'stars')::float) AS avg_stars
        FROM hstore_table
        WHERE (data->'city') IS NOT NULL
        GROUP BY city;
    """,
    "JSONB": """
        SELECT data->>'city' AS city,
               AVG((data->>'stars')::float) AS avg_stars
        FROM jsonb_table
        WHERE (data->>'city') IS NOT NULL
        GROUP BY city;
    """,
    "VANILLA": """
        SELECT city,
               AVG(stars::float) AS avg_stars
        FROM vanilla_table
        WHERE city IS NOT NULL
        GROUP BY city;
    """
}

# -------------------------
#  ‚öôÔ∏è Helper Function
# -------------------------
def run_query(label, query, repeat=5):
    times = []
    for i in range(repeat):
        # Step 1: Open a fresh connection for each run
        conn = psycopg2.connect(**DB_CONFIG)
        conn.autocommit = True  # required for DISCARD ALL
        cur = conn.cursor()

        # Step 2: Clear session-level cache safely
        try:
            cur.execute("DISCARD ALL;")
        except Exception as e:
            print(f"‚ö†Ô∏è Cache reset skipped ({e})")

        conn.autocommit = False  # back to transactional mode

        # Step 3: Time the aggregation query
        start = time.perf_counter()
        cur.execute(query)
        rows = cur.fetchall()
        end = time.perf_counter()

        cur.close()
        conn.close()

        # Step 4: Log results
        t = (end - start) * 1000
        times.append(t)
        print(f"  ‚û§ {label} Run {i+1}: {len(rows)} groups in {t:.2f} ms")

        time.sleep(0.5)  # small delay between runs

    avg_time = statistics.mean(times)
    print(f"‚úÖ {label:<8} | Avg Time: {avg_time:.2f} ms | Runs: {repeat}\n")
    return avg_time


# -------------------------
#  üöÄ Main Benchmark
# -------------------------
print("üîπ TC15: Aggregation Performance Test (Cache Isolated & Type Safe)\n")

results = {}
for label, query in QUERIES.items():
    results[label] = run_query(label, query)

print("üìä Summary (Average Times)")
for label, avg in results.items():
    print(f"{label:<8}: {avg:.2f} ms")


üîπ TC15: Aggregation Performance Test (Cache Isolated & Type Safe)

  ‚û§ HSTORE Run 1: 1416 groups in 792.37 ms
  ‚û§ HSTORE Run 2: 1416 groups in 721.76 ms
  ‚û§ HSTORE Run 3: 1416 groups in 717.06 ms
  ‚û§ HSTORE Run 4: 1416 groups in 732.25 ms
  ‚û§ HSTORE Run 5: 1416 groups in 725.55 ms
‚úÖ HSTORE   | Avg Time: 737.80 ms | Runs: 5

  ‚û§ JSONB Run 1: 1416 groups in 701.62 ms
  ‚û§ JSONB Run 2: 1416 groups in 690.51 ms
  ‚û§ JSONB Run 3: 1416 groups in 714.77 ms
  ‚û§ JSONB Run 4: 1416 groups in 741.49 ms
  ‚û§ JSONB Run 5: 1416 groups in 688.90 ms
‚úÖ JSONB    | Avg Time: 707.46 ms | Runs: 5

  ‚û§ VANILLA Run 1: 1416 groups in 79.40 ms
  ‚û§ VANILLA Run 2: 1416 groups in 78.50 ms
  ‚û§ VANILLA Run 3: 1416 groups in 77.65 ms
  ‚û§ VANILLA Run 4: 1416 groups in 77.93 ms
  ‚û§ VANILLA Run 5: 1416 groups in 89.44 ms
‚úÖ VANILLA  | Avg Time: 80.59 ms | Runs: 5

üìä Summary (Average Times)
HSTORE  : 737.80 ms
JSONB   : 707.46 ms
VANILLA : 80.59 ms


TC15: Aggregation Performance Test Results

üîπ TC15: Aggregation Performance Test (Cache Isolated & Type Safe)

  ‚û§ HSTORE Run 1: 1416 groups in 792.37 ms
  ‚û§ HSTORE Run 2: 1416 groups in 721.76 ms
  ‚û§ HSTORE Run 3: 1416 groups in 717.06 ms
  ‚û§ HSTORE Run 4: 1416 groups in 732.25 ms
  ‚û§ HSTORE Run 5: 1416 groups in 725.55 ms
‚úÖ HSTORE   | Avg Time: 737.80 ms | Runs: 5

  ‚û§ JSONB Run 1: 1416 groups in 701.62 ms
  ‚û§ JSONB Run 2: 1416 groups in 690.51 ms
  ‚û§ JSONB Run 3: 1416 groups in 714.77 ms
  ‚û§ JSONB Run 4: 1416 groups in 741.49 ms
  ‚û§ JSONB Run 5: 1416 groups in 688.90 ms
‚úÖ JSONB    | Avg Time: 707.46 ms | Runs: 5

  ‚û§ VANILLA Run 1: 1416 groups in 79.40 ms
  ‚û§ VANILLA Run 2: 1416 groups in 78.50 ms
  ‚û§ VANILLA Run 3: 1416 groups in 77.65 ms
  ‚û§ VANILLA Run 4: 1416 groups in 77.93 ms
  ‚û§ VANILLA Run 5: 1416 groups in 89.44 ms
‚úÖ VANILLA  | Avg Time: 80.59 ms | Runs: 5

üìä Summary (Average Times)
HSTORE  : 737.80 ms
JSONB   : 707.46 ms
VANILLA : 80.59 ms

In [None]:
import psycopg2, json, csv, time

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Path to your dataset (update this to your local file path)
csv_path = r"datasets\yelp_business.csv"

# Read CSV file
with open(csv_path, newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    rows = list(reader)

print(f"Loaded {len(rows)} rows from dataset")

# Create table
cur.execute("""
    DROP TABLE IF EXISTS jsonb_table;
    CREATE TABLE jsonb_table (
        id SERIAL PRIMARY KEY,
        data JSONB
    );
""")
conn.commit()

# Insert JSONB data
print("Inserting JSONB data...")
start = time.perf_counter()

for row in rows:
    cur.execute("INSERT INTO jsonb_table (data) VALUES (%s);", (json.dumps(row),))

conn.commit()
insert_time = (time.perf_counter() - start) * 1000
print(f"‚úÖ JSONB insert time: {insert_time:.2f} ms")

# Measure disk space usage
cur.execute("""
    SELECT 
        pg_size_pretty(pg_total_relation_size('jsonb_table')) AS total_size,
        pg_size_pretty(pg_relation_size('jsonb_table')) AS data_size,
        pg_size_pretty(pg_total_relation_size('jsonb_table') - pg_relation_size('jsonb_table')) AS index_overhead;
""")
size_info = cur.fetchone()

print("\nüì¶ Disk Usage Report:")
print(f"  ‚Üí Total Table Size : {size_info[0]}")
print(f"  ‚Üí Data Only Size   : {size_info[1]}")
print(f"  ‚Üí Index Overhead   : {size_info[2]}")

cur.close()
conn.close()
print("\n‚úÖ Done loading JSONB data!")


Loaded 150346 rows from dataset
Inserting JSONB data...
‚úÖ JSONB insert time: 17916.93 ms

üì¶ Disk Usage Report:
  ‚Üí Total Table Size : 265 MB
  ‚Üí Data Only Size   : 262 MB
  ‚Üí Index Overhead   : 3416 kB

‚úÖ Done loading JSONB data!


### Single Update Query

In [80]:
# ---------- TC19A: JSONB Update Test ----------
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

print("Running JSONB update query (city = 'Tucson')...")
start = time.perf_counter()

cur.execute("""
    UPDATE jsonb_table
    SET data = jsonb_set(data, '{stars}', '5.0'::jsonb)
    WHERE (data ->> 'city') = 'Tucson';
""")
conn.commit()

end = time.perf_counter()
print(f"‚úÖ JSONB update completed in {(end - start)*1000:.2f} ms")

cur.close()
conn.close()
print("‚úÖ Done!")


Running JSONB update query (city = 'Tucson')...
‚úÖ JSONB update completed in 1541.68 ms
‚úÖ Done!


In [126]:
# ---------- TC19C: JSONB Nested Update Test ----------
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

print("Running JSONB nested update query (state='CA', Restaurants only)...")
start = time.perf_counter()

cur.execute("""
    UPDATE jsonb_table
    SET data = jsonb_set(
        data,
        '{attributes,WheelchairAccessible}',
        '"True"'::jsonb,
        TRUE
    )
    WHERE (data ->> 'state') = 'CA'
      AND (data ->> 'is_open') = '1'
      AND (data ->> 'categories') ILIKE '%Restaurants%';
""")
conn.commit()

end = time.perf_counter()
print(f"‚úÖ JSONB update completed in {(end - start)*1000:.2f} ms")

cur.close()
conn.close()
print("‚úÖ Done!")


Running JSONB nested update query (state='CA', Restaurants only)...
‚úÖ JSONB update completed in 797.11 ms
‚úÖ Done!


Update Results Across JSON FOR SINGLE VALUE UPDATE AND MULTI VALUE UPDATES

Running JSONB update query (city = 'Tucson')...
‚úÖ JSONB update completed in 834.96 ms
‚úÖ Done!

Running JSONB update query (city = 'Tucson')...
‚úÖ JSONB update completed in 1250.26 ms
‚úÖ Done!

Running JSONB update query (city = 'Tucson')...
‚úÖ JSONB update completed in 1876.27 ms
‚úÖ Done!

Running JSONB update query (city = 'Tucson')...
‚úÖ JSONB update completed in 751.47 ms
‚úÖ Done!

Running JSONB update query (city = 'Tucson')...
‚úÖ JSONB update completed in 1541.68 ms
‚úÖ Done!

Avg:1250.928 ms.

Running JSONB nested update query (state='CA', Restaurants only)...
‚úÖ JSONB update completed in 797.11 ms
‚úÖ Done!

Running JSONB nested update query (state='CA', Restaurants only)...
‚úÖ JSONB update completed in 865.35 ms
‚úÖ Done!

Running JSONB nested update query (state='CA', Restaurants only)...
‚úÖ JSONB update completed in 843.35 ms
‚úÖ Done

Running JSONB nested update query (state='CA', Restaurants only)...
‚úÖ JSONB update completed in 872.74 ms
‚úÖ Done!

Running JSONB nested update query (state='CA', Restaurants only)...
‚úÖ JSONB update completed in 840.51 ms
‚úÖ Done!

In [None]:
import psycopg2, csv, time

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Enable HSTORE extension
cur.execute("CREATE EXTENSION IF NOT EXISTS hstore;")
conn.commit()

# Path to your dataset (update this path)
csv_path = r"datasets\yelp_business.csv"

# Read CSV file
with open(csv_path, newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    rows = list(reader)

print(f"Loaded {len(rows)} rows from dataset")

# Create the HSTORE table
cur.execute("""
    DROP TABLE IF EXISTS hstore_table;
    CREATE TABLE hstore_table (
        id SERIAL PRIMARY KEY,
        data HSTORE
    );
""")
conn.commit()

# --- Safe escaping function ---
def escape_hstore_value(value):
    """Escape backslashes and double quotes safely for hstore."""
    if value is None:
        return ''
    value = str(value)
    value = value.replace('\\', '\\\\').replace('"', '\\"')
    return value

# Insert HSTORE data
print("Inserting HSTORE data...")
start = time.perf_counter()

for row in rows:
    # Build hstore string safely
    hstore_pairs = [f'"{k}"=>"{escape_hstore_value(v)}"' for k, v in row.items()]
    hstore_str = ','.join(hstore_pairs)
    cur.execute("INSERT INTO hstore_table (data) VALUES (%s::hstore);", (hstore_str,))

conn.commit()
insert_time = (time.perf_counter() - start) * 1000
print(f"‚úÖ HSTORE insert time: {insert_time:.2f} ms")

# Measure disk usage
cur.execute("""
    SELECT 
        pg_size_pretty(pg_total_relation_size('hstore_table')) AS total_size,
        pg_size_pretty(pg_relation_size('hstore_table')) AS data_size,
        pg_size_pretty(pg_total_relation_size('hstore_table') - pg_relation_size('hstore_table')) AS index_overhead;
""")
size_info = cur.fetchone()

print("\nüì¶ Disk Usage Report:")
print(f"  ‚Üí Total Table Size : {size_info[0]}")
print(f"  ‚Üí Data Only Size   : {size_info[1]}")
print(f"  ‚Üí Index Overhead   : {size_info[2]}")

cur.close()
conn.close()
print("\n‚úÖ Done loading HSTORE data!")


Loaded 150346 rows from dataset
Inserting HSTORE data...
‚úÖ HSTORE insert time: 23184.63 ms

üì¶ Disk Usage Report:
  ‚Üí Total Table Size : 278 MB
  ‚Üí Data Only Size   : 274 MB
  ‚Üí Index Overhead   : 3416 kB

‚úÖ Done loading HSTORE data!


### SINGLE UPDATE QUERY ACROSS HSTORE

In [90]:
# ---------- TC19B: HSTORE Update Test ----------
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

print("Running HSTORE update query (city = 'Tucson')...")
start = time.perf_counter()

cur.execute("""
    UPDATE hstore_table
    SET data = data || '"stars"=>"5.0"'
    WHERE (data -> 'city') = 'Tucson';
""")
conn.commit()

end = time.perf_counter()
print(f"‚úÖ HSTORE update completed in {(end - start)*1000:.2f} ms")

cur.close()
conn.close()
print("‚úÖ Done!")


Running HSTORE update query (city = 'Tucson')...
‚úÖ HSTORE update completed in 1981.38 ms
‚úÖ Done!


### MULTI UPDATE QUERY ACROSS HSTORE 

In [122]:
# ---------- TC19B: HSTORE Flat Update Test ----------
import psycopg2, time

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

print("Running HSTORE update query (state='CA', Restaurants only)...")
start = time.perf_counter()

cur.execute("""
    UPDATE hstore_table
    SET data = data || '"attributes.WheelchairAccessible"=>"True"'
    WHERE (data -> 'state') = 'CA'
      AND (data -> 'is_open') = '1'
      AND (data -> 'categories') ILIKE '%Restaurants%';
""")
conn.commit()

end = time.perf_counter()
print(f"‚úÖ HSTORE update completed in {(end - start)*1000:.2f} ms")

cur.close()
conn.close()
print("‚úÖ Done!")


Running HSTORE update query (state='CA', Restaurants only)...
‚úÖ HSTORE update completed in 880.19 ms
‚úÖ Done!


SINGLE UPDATE AND MULTI UPDATE RESULTS ACROSS HSTORE

Running HSTORE update query (city = 'Tucson')...
‚úÖ HSTORE update completed in 948.60 ms
‚úÖ Done!

Running HSTORE update query (city = 'Tucson')...
‚úÖ HSTORE update completed in 1879.98 ms
‚úÖ Done!

Running HSTORE update query (city = 'Tucson')...
‚úÖ HSTORE update completed in 1018.27 ms
‚úÖ Done!

Running HSTORE update query (city = 'Tucson')...
‚úÖ HSTORE update completed in 1059.94 ms
‚úÖ Done!

Running HSTORE update query (city = 'Tucson')...
‚úÖ HSTORE update completed in 1981.38 ms
‚úÖ Done!

Avg:1377.634 ms

Running HSTORE update query (state='CA', Restaurants only)...
‚úÖ HSTORE update completed in 888.02 ms
‚úÖ Done!

Running HSTORE update query (state='CA', Restaurants only)...
‚úÖ HSTORE update completed in 867.16 ms
‚úÖ Done!

Running HSTORE update query (state='CA', Restaurants only)...
‚úÖ HSTORE update completed in 862.27 ms
‚úÖ Done!

Running HSTORE update query (state='CA', Restaurants only)...
‚úÖ HSTORE update completed in 850.62 ms
‚úÖ Done!

Running HSTORE update query (state='CA', Restaurants only)...
‚úÖ HSTORE update completed in 880.19 ms
‚úÖ Done!

In [None]:
import psycopg2, csv, time

# Database connection
conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Path to dataset
csv_path = r"datasets\yelp_business.csv"

# Read CSV
with open(csv_path, newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    rows = list(reader)
    columns = reader.fieldnames

print(f"Loaded {len(rows)} rows from dataset with {len(columns)} columns")

# Drop and recreate table dynamically
cur.execute("DROP TABLE IF EXISTS vanilla_table;")

# Build CREATE TABLE dynamically (all columns as TEXT)
create_cols = ", ".join([f'"{col}" TEXT' for col in columns])
create_sql = f"CREATE TABLE vanilla_table (id SERIAL PRIMARY KEY, {create_cols});"

cur.execute(create_sql)
conn.commit()
print("‚úÖ Table created dynamically based on CSV columns.")

# Build INSERT query dynamically
col_names = ", ".join([f'"{c}"' for c in columns])
placeholders = ", ".join(["%s"] * len(columns))
insert_sql = f"INSERT INTO vanilla_table ({col_names}) VALUES ({placeholders});"

# Insert data
print("Inserting rows into vanilla_table...")
start = time.perf_counter()

for row in rows:
    values = [row[c] for c in columns]
    cur.execute(insert_sql, values)

conn.commit()
insert_time = (time.perf_counter() - start) * 1000
print(f"‚úÖ Inserted {len(rows)} rows in {insert_time:.2f} ms")

# Measure disk usage
cur.execute("""
    SELECT 
        pg_size_pretty(pg_total_relation_size('vanilla_table')) AS total_size,
        pg_size_pretty(pg_relation_size('vanilla_table')) AS data_size,
        pg_size_pretty(pg_total_relation_size('vanilla_table') - pg_relation_size('vanilla_table')) AS index_overhead;
""")
size_info = cur.fetchone()

print("\nüì¶ Disk Usage Report:")
print(f"  ‚Üí Total Table Size : {size_info[0]}")
print(f"  ‚Üí Data Only Size   : {size_info[1]}")
print(f"  ‚Üí Index Overhead   : {size_info[2]}")

cur.close()
conn.close()
print("\n‚úÖ Done! Vanilla table created and populated successfully.")


Loaded 150346 rows from dataset with 60 columns
‚úÖ Table created dynamically based on CSV columns.
Inserting rows into vanilla_table...
‚úÖ Inserted 150346 rows in 22125.16 ms

üì¶ Disk Usage Report:
  ‚Üí Total Table Size : 71 MB
  ‚Üí Data Only Size   : 68 MB
  ‚Üí Index Overhead   : 3368 kB

‚úÖ Done! Vanilla table created and populated successfully.


### SINGLE UPDATE QUERY FOR VANILLA

In [100]:
# ---------- TC19C: VANILLA Update Test ----------
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

print("Running VANILLA update query (city = 'Tucson')...")
start = time.perf_counter()

cur.execute("""
    UPDATE vanilla_table
    SET stars = '5.0'
    WHERE city = 'Tucson';
""")
conn.commit()

end = time.perf_counter()
print(f"‚úÖ VANILLA update completed in {(end - start)*1000:.2f} ms")

cur.close()
conn.close()
print("‚úÖ Done!")


Running VANILLA update query (city = 'Tucson')...
‚úÖ VANILLA update completed in 120.90 ms
‚úÖ Done!


### MULTI UPDATE QUERY ACROSS VANILLA

In [137]:
# ---------- TC19D: VANILLA Column Update Test ----------
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

print("Running VANILLA update query (state='CA', Restaurants only)...")
start = time.perf_counter()

cur.execute("""
    UPDATE vanilla_table
    SET "attributes.WheelchairAccessible" = 'True'
    WHERE state = 'CA'
      AND is_open = '1'
      AND categories ILIKE '%Restaurants%';
""")
conn.commit()

end = time.perf_counter()
print(f"‚úÖ VANILLA update completed in {(end - start)*1000:.2f} ms")

cur.close()
conn.close()
print("‚úÖ Done!")


Running VANILLA update query (state='CA', Restaurants only)...
‚úÖ VANILLA update completed in 395.35 ms
‚úÖ Done!


SINGLE UPDATE AND MULTI UPDATE RESULTS ACROSS VANILLA

Running VANILLA update query (city = 'Tucson')...
‚úÖ VANILLA update completed in 150.53 ms
‚úÖ Done!

Running VANILLA update query (city = 'Tucson')...
‚úÖ VANILLA update completed in 122.58 ms
‚úÖ Done!

Running VANILLA update query (city = 'Tucson')...
‚úÖ VANILLA update completed in 518.87 ms
‚úÖ Done!

Running VANILLA update query (city = 'Tucson')...
‚úÖ VANILLA update completed in 213.11 ms
‚úÖ Done!

Running VANILLA update query (city = 'Tucson')...
‚úÖ VANILLA update completed in 120.90 ms
‚úÖ Done!

AVG:225.198 ms.

Running VANILLA update query (state='CA', Restaurants only)...
‚úÖ VANILLA update completed in 415.04 ms
‚úÖ Done!

Running VANILLA update query (state='CA', Restaurants only)...
‚úÖ VANILLA update completed in 397.47 ms
‚úÖ Done!

Running VANILLA update query (state='CA', Restaurants only)...
‚úÖ VANILLA update completed in 403.14 ms
‚úÖ Done!

Running VANILLA update query (state='CA', Restaurants only)...
‚úÖ VANILLA update completed in 405.98 ms
‚úÖ Done!

Running VANILLA update query (state='CA', Restaurants only)...
‚úÖ VANILLA update completed in 395.35 ms
‚úÖ Done!

In [14]:
# ---------- TC18: True Nested Query (JSON Path vs HSTORE) ----------
import psycopg2, time

conn = psycopg2.connect(
    dbname="mydb",
    user="postgres",
    password="sowmya2004",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

queries = {
    # JSONB uses hierarchical traversal with JSONPath
    "JSONB": """
        SELECT COUNT(*)
        FROM jsonb_table
        WHERE data @? '$ ? (
            @."attributes.WiFi" == "u''free''" &&
            @."attributes.BusinessParking" like_regex "street'': True" &&
            @."attributes.Alcohol" == "u''beer_and_wine''" &&
            @."attributes.Ambience" like_regex "casual'': True"
        )';
    """,

    # HSTORE has to fall back to flat key lookups (inefficient)
    "HSTORE": """
        SELECT COUNT(*)
        FROM hstore_table
        WHERE data->'attributes.WiFi' = 'u''free'''
          AND data->'attributes.BusinessParking' LIKE '%street'': True%'
          AND data->'attributes.Alcohol' = 'u''beer_and_wine'''
          AND data->'attributes.Ambience' LIKE '%casual'': True%';
    """,

    # VANILLA (simple column-based filters)
    "VANILLA": """
        SELECT COUNT(*)
        FROM vanilla_table
        WHERE "attributes.WiFi" = 'u''free'''
          AND "attributes.BusinessParking" LIKE '%street'': True%'
          AND "attributes.Alcohol" = 'u''beer_and_wine'''
          AND "attributes.Ambience" LIKE '%casual'': True%';
    """
}

print("üîπ TC18: True Nested Query Performance\n")

for label, query in queries.items():
    start = time.perf_counter()
    cur.execute(query)
    count = cur.fetchone()[0]
    end = time.perf_counter()
    print(f"{label:<8} ‚Üí count: {count:>6}, time: {(end - start)*1000:.2f} ms")

cur.close()
conn.close()
print("\n‚úÖ Done.")


üîπ TC18: True Nested Query Performance

JSONB    ‚Üí count:    469, time: 472.68 ms
HSTORE   ‚Üí count:    469, time: 276.33 ms
VANILLA  ‚Üí count:    469, time: 70.40 ms

‚úÖ Done.
