In [None]:
import pandas as pd
import sqlite3

In [5]:

# --- Configuration ---
# The name of your CSV file
csv_file = 'bmi.csv' 
# The name of the SQLite database file (it will be created if it doesn't exist)
database_file = 'my_database.db' 
# The name of the table you want to create in the database
table_name = 'data_table' 
# --- End Configuration ---

def upload_csv_to_sqlite(csv_path, db_path, table):
    """
    Reads a CSV file and uploads its contents to an SQLite table.
    """
    try:
        # 1. Connect to the SQLite database
        conn = sqlite3.connect(db_path)
        print(f"‚úÖ Successfully connected to database: {db_path}")

        # 2. Read the CSV file into a pandas DataFrame
        # 'header=0' means the first row is used for column names
        df = pd.read_csv(csv_path)
        print(f"‚úÖ Successfully read CSV file: {csv_path}")

        # 3. Write the DataFrame to the SQLite database
        # 'if_exists': 'replace' will drop the table and create a new one
        # 'if_exists': 'append' will add rows to an existing table
        # 'index=False' prevents pandas from writing the DataFrame index as a column
        df.to_sql(table, conn, if_exists='replace', index=False)
        print(f"‚úÖ Successfully uploaded data to table: {table}")
        
        # Optional: Verify the upload
        check_query = f"SELECT COUNT(*) FROM {table};"
        count = pd.read_sql_query(check_query, conn).iloc[0, 0]
        print(f"üìä Rows uploaded: {count}")

    except FileNotFoundError:
        print(f"‚ùå Error: The file '{csv_path}' was not found.")
    except Exception as e:
        print(f"‚ùå An unexpected error occurred: {e}")
    finally:
        # 4. Close the connection
        if 'conn' in locals() and conn:
            conn.close()
            print("‚û°Ô∏è Database connection closed.")

# Run the function
upload_csv_to_sqlite(csv_file, database_file, table_name)

‚úÖ Successfully connected to database: my_database.db
‚úÖ Successfully read CSV file: bmi.csv
‚úÖ Successfully uploaded data to table: data_table
üìä Rows uploaded: 500
‚û°Ô∏è Database connection closed.


In [6]:


database_file = 'my_database.db' # The database file you want to connect to
table_name = 'data_table' # The table you want to query

def retrieve_data_sqlite3(db_path, table):
    """
    Connects to the database, executes a SELECT query, and prints the results.
    """
    conn = None
    try:
        # 1. Connect to the database
        conn = sqlite3.connect(db_path)
        # Create a cursor object to execute SQL commands
        cursor = conn.cursor()
        
        # 2. Define the SQL query
        # This query selects ALL columns from the specified table
        query = f"SELECT * FROM {table};"
        
        print(f"Executing query: {query}")
        
        # 3. Execute the query
        cursor.execute(query)
        
        # 4. Fetch the results
        # .fetchall() retrieves all rows of the query result set
        rows = cursor.fetchall()
        
        # 5. Print the results
        if rows:
            print("\nRetrieved Data:")
            for row in rows:
                print(row)
        else:
            print("No data found in the table.")

    except sqlite3.Error as e:
        print(f"‚ùå SQLite error occurred: {e}")
    except Exception as e:
        print(f"‚ùå An unexpected error occurred: {e}")
    finally:
        # 6. Close the connection
        if conn:
            conn.close()
            print("\n‚û°Ô∏è Database connection closed.")

# Run the function
retrieve_data_sqlite3(database_file, table_name)

Executing query: SELECT * FROM data_table;

Retrieved Data:
('Male', 174, 96, 4)
('Male', 189, 87, 2)
('Female', 185, 110, 4)
('Female', 195, 104, 3)
('Male', 149, 61, 3)
('Male', 189, 104, 3)
('Male', 147, 92, 5)
('Male', 154, 111, 5)
('Male', 174, 90, 3)
('Female', 169, 103, 4)
('Male', 195, 81, 2)
('Female', 159, 80, 4)
('Female', 192, 101, 3)
('Male', 155, 51, 2)
('Male', 191, 79, 2)
('Female', 153, 107, 5)
('Female', 157, 110, 5)
('Male', 140, 129, 5)
('Male', 144, 145, 5)
('Male', 172, 139, 5)
('Male', 157, 110, 5)
('Female', 153, 149, 5)
('Female', 169, 97, 4)
('Male', 185, 139, 5)
('Female', 172, 67, 2)
('Female', 151, 64, 3)
('Male', 190, 95, 3)
('Male', 187, 62, 1)
('Female', 163, 159, 5)
('Male', 179, 152, 5)
('Male', 153, 121, 5)
('Male', 178, 52, 1)
('Female', 195, 65, 1)
('Female', 160, 131, 5)
('Female', 157, 153, 5)
('Female', 189, 132, 4)
('Female', 197, 114, 3)
('Male', 144, 80, 4)
('Female', 171, 152, 5)
('Female', 185, 81, 2)
('Female', 175, 120, 4)
('Female', 149, 

# Insert into sqllite

In [30]:
import sqlite3
import pandas as pd
import logging
from datetime import datetime
import os
import json

## Part 1 : read files and store in JSON file

In [33]:

# Configuration for folders and JSON file path
FOLDERS = ["BRONZE", "SILVER", "GOLD"]
JSON_FILE_PATH = "sqllite_config.json"

# --- 1. Fetch Filenames ---
csv_files = {}

for folder in FOLDERS:
    try:
        # List all files ending with .csv in the folder
        files = [f for f in os.listdir(folder) if f.endswith(".csv")]
        csv_files[folder] = files
    except FileNotFoundError:
        print(f"Warning: Folder '{folder}' not found. Skipping file fetching for this folder.")
        csv_files[folder] = []


# --- 2. Prepare 'files' dictionary for JSON update ---
file_config = {}

# Bronze: Remains single file (first file in the list)
if csv_files["BRONZE"]:
    file_config["bronze_csv_file"] = csv_files["BRONZE"][0]

# Silver: Updated to include ALL files found as a list
if csv_files["SILVER"]:
    file_config["silver_csv_files_list"] = csv_files["SILVER"]
else:
    file_config["silver_csv_files_list"] = [] # Ensure the key exists even if empty

# CORRECTION: Gold is now updated to include ALL files found as a list
if csv_files["GOLD"]:
    file_config["gold_csv_files_list"] = csv_files["GOLD"]
else:
    file_config["gold_csv_files_list"] = [] # Ensure the key exists even if empty


# --- 3. Update JSON File with Filenames ---
try:
    # Read the existing data
    with open(JSON_FILE_PATH, 'r') as f:
        config_data = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
    # Initialize if file doesn't exist or is invalid
    config_data = {}

# Update the 'files' section (keeping other data intact)
# Use .update() to merge the new file configurations into the existing 'files' dictionary
existing_files = config_data.get("files", {})
existing_files.update(file_config)
config_data["files"] = existing_files


# Write the modified data back to the file
print(f"Writing updated filenames to {JSON_FILE_PATH}...")
with open(JSON_FILE_PATH, 'w') as f:
    json.dump(config_data, f, indent=4)

print(f"Part 1: Filenames updated in {JSON_FILE_PATH}.")

Writing updated filenames to sqllite_config.json...
Part 1: Filenames updated in sqllite_config.json.


## Read filenames and create table names dynamically

In [None]:
# import os
# import json

# JSON_FILE_PATH = "sqllite_config.json"
# TIER_KEYWORDS = ["bronze", "silver", "gold"]

# # Function to implement the simplified table naming logic
# def derive_custom_table_name(filename):
#     """
#     Extracts the table name starting from the tier keyword and includes
#     the next two underscore-separated words, effectively stopping after the 3rd word.
#     Example: 'ailab_bronze_eres_flight_data1.csv' -> 'bronze_eres_flight'
#     """
#     if not filename:
#         return None

#     # 1. Remove the extension and convert to lowercase for searching
#     base_name, _ = os.path.splitext(filename)
#     base_name_lower = base_name.lower()
    
#     # 2. Find the starting position of the tier keyword
#     start_index = -1
    
#     for tier in TIER_KEYWORDS:
#         if tier in base_name_lower:
#             start_index = base_name_lower.find(tier)
#             break
    
#     if start_index != -1:
#         # Start the relevant part of the name from the tier keyword
#         custom_name = base_name[start_index:]
        
#         # Split by underscore
#         parts = custom_name.split('_')
        
#         # --- MODIFIED LOGIC HERE ---
#         # We need the tier word (parts[0]) + the next two words (parts[1] and parts[2])
#         # We use a slice [:3] to ensure we only take the first three elements.
#         final_parts = parts[:3]
        
#         # Ensure we have at least one part before joining
#         if final_parts:
#             # Join the first three parts (e.g., "bronze_eres_flight")
#             return "_".join(final_parts)
        
#         # Fallback to the whole string starting from the tier
#         return custom_name 
            
#     # Fallback if no tier keyword found
#     return base_name

# # --- 1. Read JSON file to get filenames ---
# try:
#     print(f"Reading existing configuration from {JSON_FILE_PATH}...")
#     with open(JSON_FILE_PATH, 'r') as f:
#         config_data = json.load(f)
# except (FileNotFoundError, json.JSONDecodeError):
#     print(f"Error: Could not read or decode JSON file at {JSON_FILE_PATH}. Aborting Part 2.")
#     exit()

# # Safely extract the filenames dictionary
# file_config = config_data.get("files", {})

# # Get the full filenames
# bronze_file = file_config.get("bronze_csv_file")
# silver_file = file_config.get("silver_csv_file")
# gold_file = file_config.get("gold_csv_file")


# # --- 2. Prepare the NEW Nested Table Dictionaries ---

# # Initialize the dictionary to hold the new nested structures
# new_table_config = {}

# # Helper function to generate and append the table structure
# def append_table_config(file_path, tier_key):
#     if file_path:
#         table_name = derive_custom_table_name(file_path)
        
#         # Enforce desired casing for silver (e.g., 'silver_eres_flight')
#         if tier_key == "silver" and table_name.lower().startswith("silver"):
#             table_name = "silver" + table_name[6:]
            
#         db_file_name = f"{table_name}.db"
#         new_table_config[f"{tier_key}_tables"] = {
#             table_name: db_file_name
#         }
#         return table_name # Return the generated name for print feedback
#     return None

# # Generate and append configurations for all three tiers
# bronze_table = append_table_config(bronze_file, "bronze")
# silver_table = append_table_config(silver_file, "silver")
# gold_table = append_table_config(gold_file, "gold")


# # --- 3. Update JSON File with New Nested Tables ---

# # Update the root config_data with the new bronze/silver/gold_tables dictionaries
# config_data.update(new_table_config)

# # Remove the old generic 'tables' key if it exists (cleanup from prior attempts)
# if "tables" in config_data:
#     del config_data["tables"]
    
# # Write the modified data back to the file
# print(f"Writing updated nested table configuration back to {JSON_FILE_PATH}...")
# with open(JSON_FILE_PATH, 'w') as f:
#     json.dump(config_data, f, indent=4)

# print(f"\nPart 2: Configuration file updated successfully.")
# print(f" - Bronze Table Name: **{bronze_table if bronze_table else 'N/A'}**")
# print(f" - Silver Table Name: **{silver_table if silver_table else 'N/A'}**")
# print(f" - Gold Table Name: **{gold_table if gold_table else 'N/A'}**")

Reading existing configuration from sqllite_config.json...


In [34]:
JSON_FILE_PATH = "sqllite_config.json"
TIER_KEYWORDS = ["bronze", "silver", "gold"]

# Function to implement the simplified table naming logic (remains unchanged)
def derive_custom_table_name(filename):
    """
    Extracts the table name starting from the tier keyword and includes
    the next two underscore-separated words, effectively stopping after the 3rd word.
    Example: 'ailab_bronze_eres_flight_data1.csv' -> 'bronze_eres_flight'
    """
    if not filename:
        return None

    # 1. Remove the extension and convert to lowercase for searching
    base_name, _ = os.path.splitext(filename)
    base_name_lower = base_name.lower()
    
    # 2. Find the starting position of the tier keyword
    start_index = -1
    
    for tier in TIER_KEYWORDS:
        if tier in base_name_lower:
            start_index = base_name_lower.find(tier)
            break
    
    if start_index != -1:
        # Start the relevant part of the name from the tier keyword
        custom_name = base_name[start_index:]
        
        # Split by underscore
        parts = custom_name.split('_')
        
        # We take the tier word (parts[0]) + the next two words (parts[1] and parts[2])
        final_parts = parts[:3]
        
        if final_parts:
            # Join the first three parts (e.g., "bronze_eres_flight")
            return "_".join(final_parts)
        
        return custom_name 
            
    # Fallback if no tier keyword found
    return base_name

# --- 1. Read JSON file to get filenames ---
try:
    print(f"Reading existing configuration from {JSON_FILE_PATH}...")
    with open(JSON_FILE_PATH, 'r') as f:
        config_data = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
    print(f"Error: Could not read or decode JSON file at {JSON_FILE_PATH}. Aborting Part 2.")
    exit()

# Safely extract the filenames dictionary
file_config = config_data.get("files", {})

# --- MODIFIED: Get single file for Bronze, and lists for Silver/Gold ---
bronze_file = file_config.get("bronze_csv_file")
silver_files_list = file_config.get("silver_csv_files_list", []) # Default to empty list
gold_files_list = file_config.get("gold_csv_files_list", [])     # Default to empty list


# --- 2. Prepare the NEW Nested Table Dictionaries ---

# Initialize the dictionaries for the new nested structures
bronze_tables = {}
silver_tables = {}
gold_tables = {}
all_generated_table_names = []


# Helper function to process files and populate the respective table dictionary
def process_files_for_tier(file_list_or_single_file, tier_key, table_dict):
    # Ensure we are working with an iterable list, even if it's a single string
    if isinstance(file_list_or_single_file, str):
        files_to_process = [file_list_or_single_file]
    elif file_list_or_single_file is None:
        files_to_process = []
    else:
        files_to_process = file_list_or_single_file
    
    tier_generated_names = []
    
    for file_name in files_to_process:
        if file_name:
            # Derive the table name using the custom logic
            table_name = derive_custom_table_name(file_name)
            
            # Enforce desired casing for silver (e.g., 'silver_eres_flight')
            if tier_key == "silver" and table_name.lower().startswith("silver"):
                table_name = "silver" + table_name[6:]
            
            # Use the full filename (with extension removed) for the database file name
            # NOTE: For simplicity, we use the derived table_name + '.db' as the value
            db_file_name = f"{table_name}.db"
            
            # Populate the table dictionary: Key = Table Name, Value = DB File Name
            table_dict[table_name] = db_file_name
            tier_generated_names.append(table_name)
            
    return tier_generated_names


# Generate configurations for all three tiers
bronze_names = process_files_for_tier(bronze_file, "bronze", bronze_tables)
silver_names = process_files_for_tier(silver_files_list, "silver", silver_tables)
gold_names = process_files_for_tier(gold_files_list, "gold", gold_tables)


# --- 3. Update JSON File with New Nested Tables ---

# Update the root config_data with the new bronze/silver/gold_tables dictionaries
config_data["bronze_tables"] = bronze_tables
config_data["silver_tables"] = silver_tables
config_data["gold_tables"] = gold_tables

# Remove the old generic 'tables' key if it exists (cleanup from prior attempts)
if "tables" in config_data:
    del config_data["tables"]
    
# Write the modified data back to the file
print(f"Writing updated nested table configuration back to {JSON_FILE_PATH}...")
with open(JSON_FILE_PATH, 'w') as f:
    json.dump(config_data, f, indent=4)


# --- Print Summary ---
print(f"\nPart 2: Configuration file updated successfully.")
print(f" - Bronze Table(s) Generated: **{', '.join(bronze_names) if bronze_names else 'N/A'}**")
print(f" - Silver Table(s) Generated: **{', '.join(silver_names) if silver_names else 'N/A'}**")
print(f" - Gold Table(s) Generated: **{', '.join(gold_names) if gold_names else 'N/A'}**")

Reading existing configuration from sqllite_config.json...
Writing updated nested table configuration back to sqllite_config.json...

Part 2: Configuration file updated successfully.
 - Bronze Table(s) Generated: **bronze_eres_flight**
 - Silver Table(s) Generated: **Ailab_curated_flight, silver_eres_airportcode, silver_eres_flight, silver_eres_seasoncode**
 - Gold Table(s) Generated: **AILab_Consumption_Customer_traveller, Gold_dimension_flightdata**


## load into SQLLITE

In [35]:
import pandas as pd
import sqlite3
import json
import os
import logging

# --- Configuration File Path ---
CONFIG_FILE_PATH = "sqllite_config.json"

def load_data_from_csv_to_db():
    """Reads configuration, sets up logging, and loads specified CSV files into their respective SQLite databases."""
    
    # 1. Load Configuration from JSON
    try:
        with open(CONFIG_FILE_PATH, 'r') as f:
            config = json.load(f)
    except Exception as e:
        print(f"‚ùå FATAL ERROR: Could not load configuration file {CONFIG_FILE_PATH}. Aborting. Error: {e}")
        return

    # --- 2. Setup Logging ---
    
    try:
        log_file = config['logging']['log_file']
    except KeyError:
        log_file = "default_db_operations.log"
        print(f"‚ö†Ô∏è Warning: 'logging' key not found in config. Using default log file: {log_file}")
    
    logging.basicConfig(
        filename=log_file,
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        filemode='a'
    )
    
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)
    
    logging.info(f"--- Data Loading Process Started ---")
    logging.info(f"‚úÖ Configuration loaded successfully from {CONFIG_FILE_PATH}.")


    # --- 3. Extract Configuration Values ---
    
    # Files
    bronze_filename = config['files'].get('bronze_csv_file')
    silver_files = config['files'].get('silver_csv_files_list', [])
    gold_files = config['files'].get('gold_csv_files_list', [])
    
    # Databases
    db_map = {
        "bronze": config['databases']['bronze_database_file'],
        "silver": config['databases']['silver_database_file'],
        "gold": config['databases']['gold_database_file'],
    }
    
    # Schemas (for dynamic mapping)
    silver_schema = config.get('silver_schema', {})
    gold_schema = config.get('gold_schema', {})
    
    # Tables (The actual table name inside the DB)
    bronze_table_name = list(config['bronze_tables'].keys())[0]


    # --- 4. Schema Mapping and Processing Function ---
    
    def get_target_schema_name(tier_key, filename, silver_schema, gold_schema):
        """Determines the target table name based on the 'curation' keyword."""
        
        # Determine the correct schema dictionary based on the tier
        schema_dict = silver_schema if tier_key == "silver" else gold_schema
        
        # Rule: If filename contains "curated" (case-insensitive)
        if "curated" in filename.lower():
            # Find the value in the schema dictionary that contains "curation"
            for value in schema_dict.values():
                if "curation" in value:
                    return value  # e.g., 'silver_curation_flight'
            # Fallback if curated file exists but no matching schema is found
            logging.warning(f"‚ö†Ô∏è Curation file '{filename}' found, but no matching curation schema in {tier_key}_schema.")
            
        # Rule: Use the non-curation schema as the default
        else:
            # Assuming the other schema key is the default (e.g., 'silver_flight')
            # This is a weak assumption; a stronger rule is better. 
            # We'll use the first key that does NOT contain "curation"
            for value in schema_dict.values():
                 if "curation" not in value:
                     return value # e.g., 'silver_flight'
            # Fallback if only the curation schema key exists
            logging.warning(f"‚ö†Ô∏è No default schema found for file '{filename}'.")
            
        return None # Return None if mapping fails


    def process_data_load(tier_name, folder, csv_file, db_file, table_name):
        """Loads a single CSV file into a specified SQLite table."""
        full_csv_path = os.path.join(folder, csv_file)
        
        logging.info(f"\n--- Processing {tier_name.upper()} File: {csv_file} ---")
        logging.info(f"Source Path: {full_csv_path}")
        logging.info(f"Target DB: {db_file}")
        logging.info(f"Target Table: {table_name}") # This is the target table name
        
        if not os.path.exists(full_csv_path):
            logging.error(f"‚ùå ERROR: CSV file not found at {full_csv_path}. Skipping load.")
            return

        try:
            df = pd.read_csv(full_csv_path)
            logging.info(f"   Successfully read {len(df)} rows from CSV.")
            
            conn = sqlite3.connect(db_file)
            
            # The table name passed to this function is the SCHEMA name from the JSON.
            # We will use the *derived table name* (from bronze_tables) for Bronze, 
            # and the *schema name* for Silver/Gold as the table name.
            df.to_sql(table_name, conn, if_exists='replace', index=False)

            conn.commit()
            conn.close()
            logging.info(f"   ‚úÖ Data successfully loaded into table '{table_name}' in {db_file}.")

        except Exception as e:
            logging.error(f"‚ùå ERROR during {tier_name} data loading for {csv_file}: {e}")

    # --- 5. Execute Data Loading ---
    
    # A. Load Bronze Data (Single File)
    logging.info("\n--- EXECUTING BRONZE LOAD ---")
    process_data_load(
        tier_name="bronze",
        folder="BRONZE",
        csv_file=bronze_filename,
        db_file=db_map["bronze"],
        table_name=bronze_table_name # Uses the single pre-derived table name
    )

    # B. Load Silver Data (Multiple Files with Schema Mapping)
    logging.info("\n--- EXECUTING SILVER LOAD (Multiple Files) ---")
    for silver_file in silver_files:
        target_table = get_target_schema_name("silver", silver_file, silver_schema, gold_schema)
        
        if target_table:
            process_data_load(
                tier_name="silver",
                folder="SILVER",
                csv_file=silver_file,
                db_file=db_map["silver"],
                table_name=target_table # Uses the mapped schema name as the SQL table name
            )

    # C. Load Gold Data (Multiple Files with Schema Mapping)
    logging.info("\n--- EXECUTING GOLD LOAD (Multiple Files) ---")
    for gold_file in gold_files:
        target_table = get_target_schema_name("gold", gold_file, silver_schema, gold_schema)
        
        if target_table:
            process_data_load(
                tier_name="gold",
                folder="GOLD",
                csv_file=gold_file,
                db_file=db_map["gold"],
                table_name=target_table # Uses the mapped schema name as the SQL table name
            )
    
    logging.info("\n--- Data Loading Process Finished ---")

if __name__ == "__main__":
    load_data_from_csv_to_db()

--- Data Loading Process Started ---
--- Data Loading Process Started ---
‚úÖ Configuration loaded successfully from sqllite_config.json.
‚úÖ Configuration loaded successfully from sqllite_config.json.

--- EXECUTING BRONZE LOAD ---

--- EXECUTING BRONZE LOAD ---

--- Processing BRONZE File: ailab_bronze_eres_flight_data1.csv ---

--- Processing BRONZE File: ailab_bronze_eres_flight_data1.csv ---
Source Path: BRONZE\ailab_bronze_eres_flight_data1.csv
Source Path: BRONZE\ailab_bronze_eres_flight_data1.csv
Target DB: bronze_data.db
Target DB: bronze_data.db
Target Table: bronze_eres_flight
Target Table: bronze_eres_flight
   Successfully read 46 rows from CSV.
   Successfully read 46 rows from CSV.
   ‚úÖ Data successfully loaded into table 'bronze_eres_flight' in bronze_data.db.
   ‚úÖ Data successfully loaded into table 'bronze_eres_flight' in bronze_data.db.

--- EXECUTING SILVER LOAD (Multiple Files) ---

--- EXECUTING SILVER LOAD (Multiple Files) ---

--- Processing SILVER File: Ail

In [None]:

# # --- Configuration File Path ---
# CONFIG_FILE_PATH = "sqllite_config.json"

# def load_data_from_csv_to_db():
#     """Reads configuration and loads specified CSV files into their respective SQLite databases."""
    
#     # 1. Load Configuration from JSON
#     try:
#         with open(CONFIG_FILE_PATH, 'r') as f:
#             config = json.load(f)
#         print(f"Configuration loaded successfully from {CONFIG_FILE_PATH}.")
#     except Exception as e:
#         print(f"Error loading configuration file: {e}")
#         return

#     # --- Extract Configuration Values ---
    
#     # Files and Folders (Assuming folders match file tier: BRONZE/SILVER)
#     bronze_filename = config['files']['bronze_csv_file']
#     silver_filename = config['files']['silver_csv_file']
    
#     # Databases
#     bronze_db_file = config['databases']['bronze_database_file']
#     silver_db_file = config['databases']['silver_database_file']
    
#     # Table Names (Keys in the nested dictionaries)
#     bronze_table_name = list(config['bronze_tables'].keys())[0]
#     silver_table_name = list(config['silver_tables'].keys())[0]

#     # --- Processing Function ---

#     def process_tier_data(tier_name, folder, csv_file, db_file, table_name):
#         """Loads a single CSV file into a specified SQLite table."""
#         full_csv_path = os.path.join(folder, csv_file)
        
#         print(f"\n--- Processing {tier_name.upper()} Data ---")
#         print(f"Source: {full_csv_path}")
#         print(f"Target DB: {db_file}")
#         print(f"Target Table: {table_name}")

#         # Check if the source CSV file exists
#         if not os.path.exists(full_csv_path):
#             print(f"ERROR: CSV file not found at {full_csv_path}. Skipping {tier_name} load.")
#             return

#         try:
#             # 1. Read the CSV file into a pandas DataFrame
#             df = pd.read_csv(full_csv_path)
#             print(f"   Successfully read {len(df)} rows from CSV.")
            
#             # 2. Connect to the SQLite Database (creates it if it doesn't exist)
#             conn = sqlite3.connect(db_file)
#             cursor = conn.cursor()

#             # 3. Load the DataFrame into the SQLite table
#             # 'replace' will overwrite the table if it exists. Use 'append' to add new data.
#             # 'fail' will stop if the table exists.
#             df.to_sql(table_name, conn, if_exists='replace', index=False)

#             conn.commit()
#             conn.close()
#             print(f"Data successfully loaded into table '{table_name}' in {db_file}.")

#         except Exception as e:
#             print(f"ERROR during {tier_name} data loading: {e}")

#     # --- Execute Data Loading for Bronze and Silver Tiers ---
    
#     # Load Bronze Data
#     process_tier_data(
#         tier_name="bronze",
#         folder="BRONZE", # Assumes your folder is named BRONZE
#         csv_file=bronze_filename,
#         db_file=bronze_db_file,
#         table_name=bronze_table_name
#     )

#     # Load Silver Data
#     process_tier_data(
#         tier_name="silver",
#         folder="SILVER", # Assumes your folder is named SILVER
#         csv_file=silver_filename,
#         db_file=silver_db_file,
#         table_name=silver_table_name
#     )

# if __name__ == "__main__":
#     load_data_from_csv_to_db()

‚úÖ Configuration loaded successfully from sqllite_config.json.

--- Processing BRONZE Data ---
Source: BRONZE\ailab_bronze_eres_flight_data1.csv
Target DB: bronze_data.db
Target Table: bronze_eres_flight
   Successfully read 46 rows from CSV.
   ‚úÖ Data successfully loaded into table 'bronze_eres_flight' in bronze_data.db.

--- Processing SILVER Data ---
Source: SILVER\AiLab_Silver_eres_flight.csv
Target DB: silver_data.db
Target Table: silver_eres_flight
   Successfully read 50 rows from CSV.
   ‚úÖ Data successfully loaded into table 'silver_eres_flight' in silver_data.db.


In [None]:

# # --- Configuration File Path ---
# CONFIG_FILE_PATH = "sqllite_config.json"

# def load_data_from_csv_to_db():
#     """Reads configuration, sets up logging, and loads specified CSV files into their respective SQLite databases."""
    
#     # 1. Load Configuration from JSON (Temporary load to get log file name)
#     try:
#         with open(CONFIG_FILE_PATH, 'r') as f:
#             config = json.load(f)
#     except Exception as e:
#         # Cannot log error if logging is not yet set up
#         print(f"‚ùå FATAL ERROR: Could not load configuration file {CONFIG_FILE_PATH}. Aborting. Error: {e}")
#         return

#     # --- 2. Setup Logging ---
    
#     # Get the log file name from the configuration
#     try:
#         log_file = config['logging']['log_file']
#     except KeyError:
#         log_file = "default_db_operations.log"
#         print(f"‚ö†Ô∏è Warning: 'logging' key not found in config. Using default log file: {log_file}")
    
#     # Configure logging to write to the file and output to the console
#     logging.basicConfig(
#         filename=log_file,
#         level=logging.INFO, # Capture INFO and above
#         format='%(asctime)s - %(levelname)s - %(message)s',
#         datefmt='%Y-%m-%d %H:%M:%S',
#         filemode='a' # Append to the log file
#     )
    
#     # Also set up a stream handler to print logs to the console
#     console = logging.StreamHandler()
#     console.setLevel(logging.INFO)
#     formatter = logging.Formatter('%(message)s') # Keep console output simple
#     console.setFormatter(formatter)
#     logging.getLogger('').addHandler(console)
    
#     logging.info(f"--- Data Loading Process Started ---")
#     logging.info(f"‚úÖ Configuration loaded successfully from {CONFIG_FILE_PATH}.")


#     # --- Extract Configuration Values ---
    
#     # Files and Folders (Assuming folders match file tier: BRONZE/SILVER)
#     bronze_filename = config['files']['bronze_csv_file']
#     silver_filename = config['files']['silver_csv_file']
    
#     # Databases
#     bronze_db_file = config['databases']['bronze_database_file']
#     silver_db_file = config['databases']['silver_database_file']
    
#     # Table Names (Keys in the nested dictionaries)
#     try:
#         bronze_table_name = list(config['bronze_tables'].keys())[0]
#         silver_table_name = list(config['silver_tables'].keys())[0]
#     except (IndexError, KeyError) as e:
#         logging.error(f"‚ùå ERROR: Table configuration missing or invalid: {e}. Aborting.")
#         return


#     # --- Processing Function ---

#     def process_tier_data(tier_name, folder, csv_file, db_file, table_name):
#         """Loads a single CSV file into a specified SQLite table."""
#         full_csv_path = os.path.join(folder, csv_file)
        
#         # Replaced print() with logging.info()
#         logging.info(f"\n--- Processing {tier_name.upper()} Data ---")
#         logging.info(f"Source: {full_csv_path}")
#         logging.info(f"Target DB: {db_file}")
#         logging.info(f"Target Table: {table_name}")

#         # Check if the source CSV file exists
#         if not os.path.exists(full_csv_path):
#             # Replaced print(f"ERROR:...") with logging.error()
#             logging.error(f"‚ùå ERROR: CSV file not found at {full_csv_path}. Skipping {tier_name} load.")
#             return

#         try:
#             # 1. Read the CSV file into a pandas DataFrame
#             df = pd.read_csv(full_csv_path)
#             logging.info(f"   Successfully read {len(df)} rows from CSV.")
            
#             # 2. Connect to the SQLite Database (creates it if it doesn't exist)
#             conn = sqlite3.connect(db_file)
#             # Removed unnecessary cursor = conn.cursor()
            
#             # 3. Load the DataFrame into the SQLite table
#             df.to_sql(table_name, conn, if_exists='replace', index=False)

#             conn.commit()
#             conn.close()
#             # Replaced print(f"Data successfully...") with logging.info()
#             logging.info(f"   ‚úÖ Data successfully loaded into table '{table_name}' in {db_file}.")

#         except Exception as e:
#             # Replaced print(f"ERROR during...") with logging.error()
#             logging.error(f"‚ùå ERROR during {tier_name} data loading: {e}")

#     # --- Execute Data Loading for Bronze and Silver Tiers ---
    
#     # Load Bronze Data
#     process_tier_data(
#         tier_name="bronze",
#         folder="BRONZE", # Assumes your folder is named BRONZE
#         csv_file=bronze_filename,
#         db_file=bronze_db_file,
#         table_name=bronze_table_name
#     )

#     # Load Silver Data
#     process_tier_data(
#         tier_name="silver",
#         folder="SILVER", # Assumes your folder is named SILVER
#         csv_file=silver_filename,
#         db_file=silver_db_file,
#         table_name=silver_table_name
#     )
    
#     logging.info("\n--- Data Loading Process Finished ---")

# if __name__ == "__main__":
#     load_data_from_csv_to_db()

--- Data Loading Process Started ---
‚úÖ Configuration loaded successfully from sqllite_config.json.

--- Processing BRONZE Data ---
Source: BRONZE\ailab_bronze_eres_flight_data1.csv
Target DB: bronze_data.db
Target Table: bronze_eres_flight
   Successfully read 46 rows from CSV.
   ‚úÖ Data successfully loaded into table 'bronze_eres_flight' in bronze_data.db.

--- Processing SILVER Data ---
Source: SILVER\AiLab_Silver_eres_flight.csv
Target DB: silver_data.db
Target Table: silver_eres_flight
   Successfully read 50 rows from CSV.
   ‚úÖ Data successfully loaded into table 'silver_eres_flight' in silver_data.db.

--- Data Loading Process Finished ---


In [4]:


# --- Configuration ---
bronze_csv_file = 'ailab_bronze_eres.flight_data1.csv'
silver_csv_file = 'AiLab_Silver_eres_flight.csv'
bronze_database_file = 'bronze_data.db'
silver_database_file = 'silver_data'
table_name = 'data_table'
log_file = "db_operations.log"
# --- End Configuration ---

# Split filename and extension
filename, _ = os.path.splitext(bronze_csv_file)

print(filename)  # Output: ailab_bronze_eres.flight_data1

ailab_bronze_eres.flight_data1


In [None]:
# --- 1. Setup (Assuming these lists are populated from your earlier code) ---

# Define your folders (required for the file population step)
# This setup is needed if you are running the entire script from scratch.
# folders = ["BRONZE", "SILVER", "GOLD"]
# csv_files = {}
# ... (code to populate csv_files)

# --- Define Example Data (If running without actual file system access) ---
# Assuming these variables hold the lists of files and the derived table names
# You will replace this with your actual variables from the previous steps.
bronze_files = ['user_data.csv', 'transaction_log.csv']
silver_files = ['aggregated_sales.csv']
gold_files = ['final_report.csv']
# --------------------------------------------------------------------------

# --- 2. Define the File and Table Configuration Dictionaries ---

# Get the first filename from each list for the 'files' section
file_config = {}
bronze_csv_file = silver_csv_file = gold_csv_file = None

if 'bronze_files' in locals() and bronze_files:
    bronze_csv_file = bronze_files[0]
    file_config["bronze_csv_file"] = bronze_csv_file

if 'silver_files' in locals() and silver_files:
    silver_csv_file = silver_files[0]
    file_config["silver_csv_file"] = silver_csv_file

if 'gold_files' in locals() and gold_files:
    gold_csv_file = gold_files[0]
    file_config["gold_csv_file"] = gold_csv_file


# Define the 'tables' section using os.path.splitext()

table_config = {
    "bronze_table": "bronze_data.db", # Database file name (from your prior example)
    "silver_table": "silver_data.db", # Database file name (from your prior example)
    "gold_table": "gold_data.db"     # Database file name (from your prior example)
}

# Use os.path.splitext to extract the filename for the 'table_name' key

if gold_csv_file:
    # Split the filename and extension
    filename_without_ext, _ = os.path.splitext(gold_csv_file)
    # Assign the extracted filename (without the extension) as the table name
    table_config["table_name"] = filename_without_ext
else:
    table_config["table_name"] = "default_data_table"

# --- 3. Update the JSON File ---

JSON_FILE_PATH = "sqllite_config.json"

try:
    # 3a. Read the existing data
    print(f"Reading existing configuration from {JSON_FILE_PATH}...")
    with open(JSON_FILE_PATH, 'r') as f:
        config_data = json.load(f)

except FileNotFoundError:
    # Handle case where the file doesn't exist
    print(f"Warning: {JSON_FILE_PATH} not found. Creating a new configuration object.")
    config_data = {}
except json.JSONDecodeError:
    # Handle case where the file is corrupt or empty
    print(f"Error decoding JSON from {JSON_FILE_PATH}. Starting with an empty configuration object.")
    config_data = {}

# 3b. Update the specific keys
config_data["files"] = file_config
config_data["tables"] = table_config

# 3c. Write the modified data back to the file
print(f"Writing updated configuration back to {JSON_FILE_PATH}...")
with open(JSON_FILE_PATH, 'w') as f:
    json.dump(config_data, f, indent=4)

print("\n‚úÖ Configuration updated successfully.")
print(f"The final table name assigned to 'table_name' is: **{table_config['table_name']}**")

Reading existing configuration from sqllite_config.json...
Writing updated configuration back to sqllite_config.json...

‚úÖ Configuration updated successfully.


In [None]:

# --- Logging Setup ---
logging.basicConfig(
    filename=log_file,
    level=logging.INFO,
    format="%(asctime)s - %(message)s"
)

# Operation counters
insert_count = 0
update_count = 0
delete_count = 0


def log_operation(query):
    """Log each SQL operation into the log file."""
    logging.info(query)


def upload_csv_to_sqlite(csv_path, db_path, table):
    global insert_count, update_count, delete_count

    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        print(f"‚úÖ Connected to database: {db_path}")

        df = pd.read_csv(csv_path)
        print(f"‚úÖ Loaded CSV file: {csv_path}")

        # Drop & recreate table (your original behaviour)
        df.to_sql(table, conn, if_exists='replace', index=False)

        # Count INSERTs manually
        cursor.execute(f"SELECT COUNT(*) FROM {table}")
        row_count = cursor.fetchone()[0]
        insert_count = row_count

        # Log each INSERT record
        for index, row in df.iterrows():
            query = f"INSERT INTO {table} VALUES ({', '.join([repr(x) for x in row.values])});"
            log_operation(query)

        print(f"üìä Rows inserted: {insert_count}")

        # Simulate or count UPDATE and DELETE queries
        # (No updates/deletes in your original script, so they remain zero.)

    except Exception as e:
        print(f"‚ùå Error: {e}")

    finally:
        conn.close()
        print("‚û°Ô∏è Database connection closed.")

        # Log summary
        summary = (
            f"Run Summary ‚Üí INSERTS={insert_count}, "
            f"UPDATES={update_count}, DELETES={delete_count}"
        )
        logging.info(summary)

        print("üìÅ Log updated:", log_file)




# Run function
upload_csv_to_sqlite(csv_file, database_file, table_name)
