In [1]:
import json

schema_file_path = './db_schema.json'
with open(schema_file_path, 'r') as file:
    schema_data = json.load(file)

In [2]:
import sqlite3
import csv
import pandas as pd

db_name = 'sample.db'
table_name = 'sales_table'
#sample_data_path = 'real_sample.csv'
sample_data_path = 'generated_sample.csv'

conn = sqlite3.connect(db_name)
cursor = conn.cursor()

In [3]:
drop_table_query = f'''
DROP TABLE IF EXISTS {table_name}
'''

create_table_query = f'''
CREATE TABLE {table_name} (
'''

for column in schema_data:
    column_name = column['Column name']
    data_type = column['Data type']
    create_table_query += f"    {column_name} {data_type},\n"

create_table_query = create_table_query.rstrip(',\n') + '\n)'

In [4]:
try:
    cursor.execute(drop_table_query)
    cursor.execute(create_table_query)
    conn.commit()
    print(f"Table {table_name} has been recreated successfully.")
    
    with open(sample_data_path, 'r', encoding='utf-8') as csv_file:
        csv_reader = csv.reader(csv_file)
        headers = next(csv_reader)

        placeholders = ','.join(['?' for _ in headers])
        insert_query = f'''
        INSERT INTO {table_name} ({','.join(headers)})
        VALUES ({placeholders})
        '''

        for row in csv_reader:
            cursor.execute(insert_query, row)

    conn.commit()
    print("Data has been inserted successfully.")

except sqlite3.Error as e:
    print(f"An error occurred: {e}")
    conn.rollback()

finally:
    conn.close()

Table sales_table has been recreated successfully.
Data has been inserted successfully.


In [5]:
conn = sqlite3.connect(db_name)

query = f"""
SELECT 
    reference_year_month,
    product_name,
    product_category_major_name,
    current_product_team,
    SUM(sales_quantity_sku) as total_quantity,
    SUM(sales_amount_gross_basis) as total_sales,
    SUM(sales_profit_amount) as total_profit
FROM 
    {table_name}
WHERE 
    reference_year_month = '2024.08'
    AND performance_summary_flag = 'O'
GROUP BY 
    reference_year_month,
    product_name,
    product_category_major_name,
    current_product_team
ORDER BY 
    total_sales DESC
LIMIT 10
"""

df = pd.read_sql_query(query, conn)
print(df)

conn.close()

   reference_year_month                       product_name  \
0               2024.08          Beef Sirloin (Product KG)   
1               2024.08        Atlantic Salmon (Fillet KG)   
2               2024.08       Potato Chips (Large 300g/EA)   
3               2024.08               Lettuce (Product KG)   
4               2024.08      Canned Tuna in Oil (150g/Can)   
5               2024.08              Fuji Apple (10Kg/BOX)   
6               2024.08            Spicy Ramen (120g/Pack)   
7               2024.08      Cheddar Cheese (Block 2Kg/EA)   
8               2024.08        Fresh Shiitake (Product KG)   
9               2024.08  Pepperoni Pizza (Large 12inch/EA)   

  product_category_major_name current_product_team  total_quantity  \
0               Meat Products            Meat Team           300.0   
1                     Seafood         Seafood Team           150.0   
2                      Snacks           Snack Team          1000.0   
3       Agricultural Products    Agri