In [None]:
pip install ipython-sql


In [None]:
pip install psycopg2-binary


In [8]:
%load_ext sql


In [9]:
%sql sqlite://

In [10]:
%sql sqlite:///my_database.db


In [None]:
!python -m pip install ipython-sql psycopg2 prettytable==3.11.0

In [None]:
%load_ext sql
%sql postgresql://postgres:priya123@localhost:5432/my_database

In [None]:
!pip install --upgrade prettytable==3.11.0


In [1]:
%config SqlMagic.style = '_DEPRECATED_DEFAULT'



In [None]:
!pip install prettytable==3.11.0


In [46]:
import psycopg2
import time
import random
import string
from psycopg2.extras import execute_values, register_hstore

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="my_database",
    user="postgres",
    password="priya123",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Enable hstore and register adapter
cur.execute("CREATE EXTENSION IF NOT EXISTS hstore;")
register_hstore(conn)  # <-- This is required!
cur.execute("DROP TABLE IF EXISTS latency_test;")
cur.execute("CREATE TABLE latency_test (id SERIAL PRIMARY KEY, attributes hstore);")
conn.commit()

# Function to create random key-value data
def random_hstore(batch_size):
    data = []
    for _ in range(batch_size):
        kv_pairs = {f"key{j}": ''.join(random.choices(string.ascii_lowercase, k=5))
                    for j in range(5)}
        data.append((kv_pairs,))
    return data

# Function to test latency for different row counts
def test_latency(row_counts):
    results = []
    for n in row_counts:
        batch = random_hstore(n)
        start = time.time()
        execute_values(cur, "INSERT INTO latency_test (attributes) VALUES %s", batch)
        conn.commit()
        end = time.time()
        latency = (end - start) * 1000  # in milliseconds
        results.append((n, latency))
        print(f"Inserted {n} rows in {latency:.2f} ms")
    return results

# Run experiments
row_counts = [100, 1000, 5000, 10000, 50000]
#row_counts = [1, 2, 3, 4, 5]
results = test_latency(row_counts)

# Display results
print("\nLatency Results:")
for n, t in results:
    print(f"{n:6d} rows → {t:8.2f} ms")

cur.close()
conn.close()


Inserted 100 rows in 13.53 ms
Inserted 1000 rows in 52.99 ms
Inserted 5000 rows in 303.17 ms
Inserted 10000 rows in 836.14 ms
Inserted 50000 rows in 3527.49 ms

Latency Results:
   100 rows →    13.53 ms
  1000 rows →    52.99 ms
  5000 rows →   303.17 ms
 10000 rows →   836.14 ms
 50000 rows →  3527.49 ms


In [31]:
import psycopg2
import time
import random
import string
from psycopg2.extras import Json
from psycopg2.extras import execute_values

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="my_database",
    user="postgres",
    password="priya123",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Drop table if exists and create a JSONB table
cur.execute("DROP TABLE IF EXISTS latency_test_jsonb;")
cur.execute("CREATE TABLE latency_test_jsonb (id SERIAL PRIMARY KEY, attributes jsonb);")
conn.commit()

# Function to create random key-value data
def random_jsonb(batch_size):
    data = []
    for _ in range(batch_size):
        kv_pairs = {f"key{j}": ''.join(random.choices(string.ascii_lowercase, k=5))
                    for j in range(5)}
        data.append((Json(kv_pairs),))  # wrap dict in Json()
    return data

# Function to test latency for different row counts
def test_latency_jsonb(row_counts):
    results = []
    for n in row_counts:
        batch = random_jsonb(n)
        start = time.time()
        execute_values(cur, "INSERT INTO latency_test_jsonb (attributes) VALUES %s", batch)
        conn.commit()
        end = time.time()
        latency = (end - start) * 1000  # in milliseconds
        results.append((n, latency))
        print(f"Inserted {n} rows in {latency:.2f} ms")
    return results

# Run experiments
#row_counts = [1, 2, 3, 4, 5]
row_counts = [100, 1000, 5000, 10000, 50000]
results = test_latency_jsonb(row_counts)

# Display results
print("\nLatency Results (JSONB):")
for n, t in results:
    print(f"{n:6d} rows → {t:8.2f} ms")

cur.close()
conn.close()


Inserted 100 rows in 8.15 ms
Inserted 1000 rows in 15.47 ms
Inserted 5000 rows in 122.28 ms
Inserted 10000 rows in 315.27 ms
Inserted 50000 rows in 1787.58 ms

Latency Results (JSONB):
   100 rows →     8.15 ms
  1000 rows →    15.47 ms
  5000 rows →   122.28 ms
 10000 rows →   315.27 ms
 50000 rows →  1787.58 ms


In [42]:
import psycopg2
import time
import csv
from psycopg2.extras import execute_values, register_hstore

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="my_database",
    user="postgres",
    password="priya123",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Enable hstore and register adapter
cur.execute("CREATE EXTENSION IF NOT EXISTS hstore;")
register_hstore(conn)
cur.execute("DROP TABLE IF EXISTS latency_test;")
cur.execute("CREATE TABLE latency_test (id SERIAL PRIMARY KEY, attributes hstore);")
conn.commit()

# Function to read data from CSV file
def load_hstore_from_csv(csv_file_path, max_rows=None):
    data = []
    with open(csv_file_path, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            # Each row is a dict: {'key1': 'apple', 'key2': 'banana', ...}
            data.append((row,))  # Must be tuple for execute_values
            if max_rows and len(data) >= max_rows:
                break
    return data

# Function to test latency for different row counts
def test_latency(row_counts, csv_path):
    results = []
    for n in row_counts:
        batch = load_hstore_from_csv(csv_path, max_rows=n)
        start = time.time()
        execute_values(cur, "INSERT INTO latency_test (attributes) VALUES %s", batch)
        conn.commit()
        end = time.time()
        latency = (end - start) * 1000  # ms
        results.append((n, latency))
        print(f"Inserted {n} rows in {latency:.2f} ms")
    return results


    

# Run experiments

csv_path = "D:/semesters/SEM7/COL868/benchmark/archive/Food_Supply_kcal_Data.csv"
  # <-- put your CSV path here
row_counts = [1000, 50000, 102000]   # can adjust as per file size

results = test_latency(row_counts, csv_path)

# Display results
print("\nLatency Results:")
for n, t in results:
    print(f"{n:6d} rows → {t:8.2f} ms")

'''try:
    cur.execute("SELECT AVG((attributes -> 'Animal fats')::float) FROM latency_test;")
    avg_animal_fats = cur.fetchone()[0]
    print(f"\nAverage value of 'Animal fats': {avg_animal_fats:.2f}")
except Exception as e:
    print("\nError querying 'Animal fats':", e)'''
print("\nExecuting average query for 'Animal fats'...")
'''query = """
    SELECT AVG((attributes -> 'Animal fats')::float) FROM latency_test;
"""'''
query = "SELECT attributes -> 'Country' AS country FROM latency_test;"
start_time = time.time()
cur.execute(query)
countries = cur.fetchall()
#avg_animal_fats = cur.fetchone()[0]
end_time = time.time()

query_latency = (end_time - start_time) * 1000  # ms
#print(f"Average value of 'Animal fats': {avg_animal_fats:.2f}")
print(f"Found {len(countries)} unique countries")
print(f"Query execution time: {query_latency:.2f} ms")


cur.close()
conn.close()


Inserted 1000 rows in 133.87 ms
Inserted 50000 rows in 17999.87 ms
Inserted 102000 rows in 23353.14 ms

Latency Results:
  1000 rows →   133.87 ms
 50000 rows → 17999.87 ms
102000 rows → 23353.14 ms

Executing average query for 'Animal fats'...
Found 153000 unique countries
Query execution time: 404.82 ms


In [46]:
import psycopg2
import time
import csv
from psycopg2.extras import execute_values, register_hstore

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="my_database",
    user="postgres",
    password="priya123",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Enable hstore and register adapter
cur.execute("CREATE EXTENSION IF NOT EXISTS hstore;")
register_hstore(conn)
cur.execute("DROP TABLE IF EXISTS latency_test;")
cur.execute("CREATE TABLE latency_test (id SERIAL PRIMARY KEY, attributes hstore);")
conn.commit()

# Function to read data from CSV file
def load_hstore_from_csv(csv_file_path, max_rows=None):
    data = []
    with open(csv_file_path, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            # Each row is a dict: {'key1': 'apple', 'key2': 'banana', ...}
            data.append((row,))  # Must be tuple for execute_values
            if max_rows and len(data) >= max_rows:
                break
    return data

# Function to test latency for different row counts
def test_latency(row_counts, csv_path):
    results = []
    for n in row_counts:
        batch = load_hstore_from_csv(csv_path, max_rows=n)
        start = time.time()
        execute_values(cur, "INSERT INTO latency_test (attributes) VALUES %s", batch)
        conn.commit()
        end = time.time()
        latency = (end - start) * 1000  # ms
        results.append((n, latency))
        print(f"Inserted {n} rows in {latency:.2f} ms")
    return results


    

# Run experiments

csv_path = "D:/semesters/SEM7/COL868/benchmark/archive/Food_Supply_kcal_Data.csv"
  # <-- put your CSV path here
row_counts = [1000, 50000, 102000]   # can adjust as per file size

results = test_latency(row_counts, csv_path)

# Display results
print("\nLatency Results:")
for n, t in results:
    print(f"{n:6d} rows → {t:8.2f} ms")

'''try:
    cur.execute("SELECT AVG((attributes -> 'Animal fats')::float) FROM latency_test;")
    avg_animal_fats = cur.fetchone()[0]
    print(f"\nAverage value of 'Animal fats': {avg_animal_fats:.2f}")
except Exception as e:
    print("\nError querying 'Animal fats':", e)'''
print("\nExecuting average query for 'Animal fats'...")
'''query = """
    SELECT AVG((attributes -> 'Animal fats')::float) FROM latency_test;
"""'''
print("\nExecuting update query for 'Animal fats' where Country='India'...")

query = """
    UPDATE latency_test
    SET attributes = attributes || hstore('Animal fats', '2000')
    WHERE attributes -> 'Country' = 'India';
"""

start_time = time.time()
cur.execute(query)
conn.commit()
end_time = time.time()

update_latency = (end_time - start_time) * 1000  # milliseconds
print(f"Update executed successfully in {update_latency:.2f} ms")



cur.close()
conn.close()


Inserted 1000 rows in 304.99 ms
Inserted 50000 rows in 11591.22 ms
Inserted 102000 rows in 23966.04 ms

Latency Results:
  1000 rows →   304.99 ms
 50000 rows → 11591.22 ms
102000 rows → 23966.04 ms

Executing average query for 'Animal fats'...

Executing update query for 'Animal fats' where Country='India'...
Update executed successfully in 225.39 ms


In [41]:
import psycopg2
import time
import csv
from psycopg2.extras import Json, execute_values

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="my_database",
    user="postgres",
    password="priya123",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Drop table if exists and create JSONB table
cur.execute("DROP TABLE IF EXISTS latency_test_jsonb;")
cur.execute("CREATE TABLE latency_test_jsonb (id SERIAL PRIMARY KEY, attributes JSONB);")
conn.commit()

# Function to read data from CSV file
def load_jsonb_from_csv(csv_file_path, max_rows=None):
    data = []
    with open(csv_file_path, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            # Convert each row (dict) to JSONB
            data.append((Json(row),))  # must wrap in Json() for PostgreSQL
            if max_rows and len(data) >= max_rows:
                break
    return data

# Function to test latency for different row counts
def test_latency_jsonb(row_counts, csv_path):
    results = []
    for n in row_counts:
        batch = load_jsonb_from_csv(csv_path, max_rows=n)
        start = time.time()
        execute_values(cur, "INSERT INTO latency_test_jsonb (attributes) VALUES %s", batch)
        conn.commit()
        end = time.time()
        latency = (end - start) * 1000  # milliseconds
        results.append((n, latency))
        print(f"Inserted {n} rows in {latency:.2f} ms")
    return results

# Run experiments
csv_path = "D:/semesters/SEM7/COL868/benchmark/archive/Food_Supply_kcal_Data.csv"  # your CSV path
row_counts = [1000, 50000, 102000]  # adjust as needed

results = test_latency_jsonb(row_counts, csv_path)

# Display results
print("\nLatency Results (JSONB):")
for n, t in results:
    print(f"{n:6d} rows → {t:8.2f} ms")
'''try:
    cur.execute("""
        SELECT AVG((attributes ->> 'Animal fats')::float)
        FROM latency_test_jsonb;
    """)
    avg_animal_fats = cur.fetchone()[0]
    print(f"\nAverage value of 'Animal fats': {avg_animal_fats:.2f}")
except Exception as e:
    print("\nError querying 'Animal fats':", e)'''
print("\nExecuting average query for 'Animal fats'...")
'''query = """
    SELECT AVG((attributes ->> 'Animal fats')::float)
    FROM latency_test_jsonb;
"""'''
query = """
    SELECT  attributes ->> 'Country' AS country
    FROM latency_test_jsonb;
"""

start_time = time.time()
cur.execute(query)
countries = cur.fetchall()
#avg_animal_fats = cur.fetchone()[0]
end_time = time.time()

query_latency = (end_time - start_time) * 1000  # ms
#print(f"Average value of 'Animal fats': {avg_animal_fats:.2f}")
print(f"Found {len(countries)} unique countries")
print(f"Query execution time: {query_latency:.2f} ms")

cur.close()
conn.close()


Inserted 1000 rows in 115.08 ms
Inserted 50000 rows in 8392.55 ms
Inserted 102000 rows in 13011.63 ms

Latency Results (JSONB):
  1000 rows →   115.08 ms
 50000 rows →  8392.55 ms
102000 rows → 13011.63 ms

Executing average query for 'Animal fats'...
Found 153000 unique countries
Query execution time: 177.85 ms


In [45]:
import psycopg2
import time
import csv
from psycopg2.extras import Json, execute_values

# Connect to PostgreSQL
conn = psycopg2.connect(
    dbname="my_database",
    user="postgres",
    password="priya123",
    host="localhost",
    port="5432"
)
cur = conn.cursor()

# Drop table if exists and create JSONB table
cur.execute("DROP TABLE IF EXISTS latency_test_jsonb;")
cur.execute("CREATE TABLE latency_test_jsonb (id SERIAL PRIMARY KEY, attributes JSONB);")
conn.commit()

# Function to read data from CSV file
def load_jsonb_from_csv(csv_file_path, max_rows=None):
    data = []
    with open(csv_file_path, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            # Convert each row (dict) to JSONB
            data.append((Json(row),))  # must wrap in Json() for PostgreSQL
            if max_rows and len(data) >= max_rows:
                break
    return data

# Function to test latency for different row counts
def test_latency_jsonb(row_counts, csv_path):
    results = []
    for n in row_counts:
        batch = load_jsonb_from_csv(csv_path, max_rows=n)
        start = time.time()
        execute_values(cur, "INSERT INTO latency_test_jsonb (attributes) VALUES %s", batch)
        conn.commit()
        end = time.time()
        latency = (end - start) * 1000  # milliseconds
        results.append((n, latency))
        print(f"Inserted {n} rows in {latency:.2f} ms")
    return results

# Run experiments
csv_path = "D:/semesters/SEM7/COL868/benchmark/archive/Food_Supply_kcal_Data.csv"  # your CSV path
row_counts = [1000, 50000, 102000]  # adjust as needed

results = test_latency_jsonb(row_counts, csv_path)

# Display results
print("\nLatency Results (JSONB):")
for n, t in results:
    print(f"{n:6d} rows → {t:8.2f} ms")
'''try:
    cur.execute("""
        SELECT AVG((attributes ->> 'Animal fats')::float)
        FROM latency_test_jsonb;
    """)
    avg_animal_fats = cur.fetchone()[0]
    print(f"\nAverage value of 'Animal fats': {avg_animal_fats:.2f}")
except Exception as e:
    print("\nError querying 'Animal fats':", e)'''
print("\nExecuting average query for 'Animal fats'...")
'''query = """
    SELECT AVG((attributes ->> 'Animal fats')::float)
    FROM latency_test_jsonb;
"""'''
query = """
    UPDATE latency_test_jsonb
    SET attributes = jsonb_set(attributes, '{Animal fats}', '2000'::jsonb)
    WHERE attributes ->> 'Country' = 'India';
"""

start = time.time()
cur.execute(query)
conn.commit()
end = time.time()

print(f"Update time: {(end - start) * 1000:.2f} ms")


cur.close()
conn.close()


Inserted 1000 rows in 120.90 ms
Inserted 50000 rows in 6667.35 ms
Inserted 102000 rows in 11090.75 ms

Latency Results (JSONB):
  1000 rows →   120.90 ms
 50000 rows →  6667.35 ms
102000 rows → 11090.75 ms

Executing average query for 'Animal fats'...
Update time: 359.40 ms
