In [None]:
%load_ext sql
%config SqlMagic.style = '_DEPRECATED_DEFAULT'
%sql mysql+pymysql://root:password@localhost:3306/

# Create Database

In [17]:
%%sql
DROP DATABASE IF EXISTS customersDB;
CREATE DATABASE customersDB;
USE customersDB;

 * mysql+pymysql://root:***@localhost:3306/


11 rows affected.
1 rows affected.
0 rows affected.


[]

In [18]:
%%sql
show tables;

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.


Tables_in_customersdb


# Create Tables

In [19]:
%%sql
# customers table dimension table
DROP TABLE IF EXISTS customers;
CREATE TABLE customers(customer_id INT PRIMARY KEY AUTO_INCREMENT,                          #auto
                        first_name VARCHAR(100) NOT NULL,                                   #manual
                        last_name VARCHAR(100) NOT NULL,                                    #manual
                        gender ENUM('male','female','other','prefer not to say') NOT NULL,  #manual (trigger for prefer not to say)
                        age INT,                                                            #manual
                        email VARCHAR(100) UNIQUE NOT NULL,                                 #manual
                        ph_num VARCHAR(20) NOT NULL,                                        #manual
                        address1 VARCHAR(100) NOT NULL,                                     #manual
                        city VARCHAR(50) NOT NULL,                                          #manual
                        state VARCHAR(50) NOT NULL,                                        #manual
                        postal_code VARCHAR(10) NOT NULL,                                   #manual
                        country VARCHAR(100) NOT NULL,                                       #manual
                        registration_date DATETIME DEFAULT CURRENT_TIMESTAMP                #auto
                        );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [20]:
%%sql
# category dim table 
DROP TABLE IF EXISTS category;
CREATE TABLE category(
    category_id INT PRIMARY KEY AUTO_INCREMENT,         #auto
    category_name VARCHAR(50) NOT NULL UNIQUE,          #manual
    description VARCHAR(100) NOT NULL,
    updated_on DATETIME DEFAULT CURRENT_TIMESTAMP       #auto/(trigger)
    );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [21]:
%%sql
# products table fact table(contains category table)
DROP TABLE IF EXISTS products;
CREATE TABLE products(
    product_id INT PRIMARY KEY AUTO_INCREMENT,  # automatic
    product_name VARCHAR(50) NOT NULL UNIQUE,          # manual 
    description VARCHAR(100) NOT NULL UNIQUE,          # manual
    category_id INT NOT NULL,                   # manual
    FOREIGN KEY (category_id) REFERENCES category(category_id)
    );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [22]:
%%sql
# inventory fact(1) dimension table
DROP TABLE IF EXISTS inventory;
CREATE TABLE inventory(
    inventory_id INT PRIMARY KEY AUTO_INCREMENT,        #auto
    product_id INT NOT NULL,                            #manual
    quantity INT NOT NULL CHECK (quantity >=0),         #manual
    last_updated DATETIME DEFAULT CURRENT_TIMESTAMP,    #auto/trigger
    FOREIGN KEY (product_id) REFERENCES products(product_id)
    );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [23]:
%%sql
# price table fact(1) table
DROP TABLE IF EXISTS price;
CREATE TABLE price(
    price_id INT PRIMARY KEY AUTO_INCREMENT,                    # automatic
    product_id INT NOT NULL UNIQUE,                             # manual
    price DECIMAL(10,2) NOT NULL CHECK(price>0),                # manual
    last_updated DATETIME DEFAULT CURRENT_TIMESTAMP,            # auto/trigger
    FOREIGN KEY (product_id) REFERENCES products(product_id)
    );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [24]:
%%sql
# orderitems table fact(3) table
DROP TABLE IF EXISTS orderitems;
CREATE TABLE orderitems(
    orderitem_id INT PRIMARY KEY AUTO_INCREMENT,                    #auto
    order_id INT NOT NULL,                                          #manual/create here
    product_id INT NOT NULL,                                        #manual
    customer_id INT NOT NULL,                                       #manual
    seller_id INT NOT NULL,                                         #manual
    quantity INT NOT NULL,                                          #manual
    total_amount INT NOT NULL,                                      #trigger data from price table/procedure
    FOREIGN KEY(product_id) REFERENCES products(product_id),
    FOREIGN KEY(customer_id) REFERENCES customers(customer_id)
    );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.


0 rows affected.


[]

In [25]:
%%sql
# employees table dim
DROP TABLE IF EXISTS employees;
CREATE TABLE employees(
    employee_id INT PRIMARY KEY AUTO_INCREMENT,                     #auto
    first_name VARCHAR(50) NOT NULL,                                #manual
    last_name VARCHAR(50) NOT NULL,                                 #manual
    age INT NOT NULL CHECK(age<=60),                                #manual
    phone_number VARCHAR(16) NOT NULL,                              #manual
    email VARCHAR(50) NOT NULL UNIQUE,                              #manual
    hire_date DATE DEFAULT (CURRENT_DATE),                          #manual
    role VARCHAR(20) NOT NULL,                                      #manual
    termination_date DATE DEFAULT NULL,                             #manual/default null
    manager_id INT,                                                 #manual
    is_working BOOLEAN NOT NULL DEFAULT TRUE,                       #manual/fefault null
    FOREIGN KEY (manager_id) REFERENCES employees(employee_id)
    );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [26]:
%%sql
# sales table fact(2) table
DROP TABLE IF EXISTS orders;
CREATE TABLE orders(
    order_id INT PRIMARY KEY,   #CREATE FROM ORDERITEMS BY procedure unique
    customer_id INT NOT NULL,   #CREATE FROM ORDERITEMS BY procedure unique
    total_quantity INT NOT NULL,     #sum trigger from orderitems
    total_amount INT NOT NULL,  #sum trigger from orderitems
    order_date DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, # auto
    seller_id INT NOT NULL                                 #manual
);

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [27]:
%%sql
# sales table fact(2) table
DROP TABLE IF EXISTS shipping;
CREATE TABLE shipping(
    shipping_id INT PRIMARY KEY AUTO_INCREMENT,   #auto
    order_id INT NOT NULL,                              #procedure
    shipping_date DATE NULL,                            #manual
    shipping_status ENUM('shipped', 'pending', 'delivered','cancelled') DEFAULT 'pending',#manual
    delivery_date DATE,                                     #MANUAL
    shipping_addresss VARCHAR(200) NOT NULL,                #manual or can be populate from customers table if same address
    city VARCHAR(50) NOT NULL,                              #manual
    state VARCHAR(50) NOT NULL,                             #manual
    postalcode VARCHAR(10) NOT NULL,                        #manual
    country VARCHAR(100) NOT NULL,
    FOREIGN KEY (order_id) REFERENCES orders(order_id)
    );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [28]:
%%sql
# log table (audit table)
DROP TABLE IF EXISTS log;
CREATE TABLE log(
    log_id INT PRIMARY KEY AUTO_INCREMENT,
    action VARCHAR(10) NOT NULL,
    changed_by VARCHAR(50) NOT NULL,
    on_table VARCHAR(20) NOT NULL,
    on_column VARCHAR(20) NOT NULL,
    old_value VARCHAR(20),
    new_value VARCHAR(50),
    time DATETIME NOT NULL
    );

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.


0 rows affected.


[]

In [29]:
%%sql
DROP TABLE IF EXISTS ordering;
CREATE TABLE ordering(
    ordering_id INT PRIMARY KEY AUTO_INCREMENT,
    product_id INT NOT NULL,
    customer_id INT NOT NULL,
    seller_id INT NOT NULL,
    quantity INT NOT NULL,
    total_amount INT NOT NULL,
    done BOOLEAN NOT NULL DEFAULT 0,
    FOREIGN KEY(seller_id) REFERENCES employees(employee_id),
    FOREIGN KEY(product_id) REFERENCES products(product_id),
    FOREIGN KEY(customer_id) REFERENCES customers(customer_id)
);

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [30]:
%%sql
SHOW TABLES;

 * mysql+pymysql://root:***@localhost:3306/
11 rows affected.


Tables_in_customersdb
category
customers
employees
inventory
log
ordering
orderitems
orders
price
products


# Triggers

In [31]:
%%sql
DROP TRIGGER IF EXISTS gender_insert_on_customer_table;
CREATE TRIGGER gender_insert_on_customer_table
AFTER INSERT ON customers
FOR EACH ROW
BEGIN
    IF NEW.gender IS NULL THEN
        UPDATE customers
        SET gender = 'prefer not to say'
        WHERE customer_id = NEW.customer_id;
    END IF;
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [32]:
%%sql
DROP TRIGGER IF EXISTS update_date_on_category_table;
CREATE TRIGGER update_date_on_category_table
AFTER UPDATE ON category
FOR EACH ROW
BEGIN
    UPDATE category
    SET updated_on = NOW()
    WHERE category_id = OLD.category_id;
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [33]:
%%sql
DROP TRIGGER IF EXISTS price_update_on_price_table;
CREATE TRIGGER price_update_on_price_table
AFTER UPDATE ON price
FOR EACH ROW
BEGIN
    UPDATE price
    SET last_updated = NOW()
    WHERE product_id = OLD.product_id;
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [34]:
%%sql
DROP TRIGGER IF EXISTS update_date_on_inventory_table;
CREATE TRIGGER update_date_on_inventory_table
AFTER UPDATE ON inventory
FOR EACH ROW
BEGIN
    UPDATE inventory
    SET last_updated = NOW()
    WHERE product_id = OLD.product_id;
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

# Insert procedure

In [35]:
%%sql
DROP PROCEDURE IF EXISTS insert_date_in_customers;
CREATE PROCEDURE insert_date_in_customers(
    IN id INT, 
    IN fname VARCHAR(100), 
    IN lname VARCHAR(100),
    IN gen VARCHAR(10),
    IN age INT,
    IN email VARCHAR(100),
    IN ph VARCHAR(20),
    IN add1 VARCHAR(100),
    IN city VARCHAR(50),
    IN state VARCHAR(50),
    IN post VARCHAR(10),
    IN country VARCHAR(50)
    )
BEGIN
    INSERT INTO customers (customer_id,first_name,last_name,gender,age,email,ph_num,
    address1,address2,city,state,post_code,country)
    VALUES
    (id,fname,lname,gen,age,email,ph,add1,add2,city,state,post,country);
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [36]:
%%sql
DROP PROCEDURE IF EXISTS insert_values_category;
CREATE PROCEDURE insert_values_category(IN name VARCHAR(50),
                                        IN des VARCHAR(100))
BEGIN
    INSERT INTO category(category_name, description)
    VALUES (name,des);
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [37]:
%%sql
DROP PROCEDURE IF EXISTS insert_values_products;
CREATE PROCEDURE insert_values_products(
    IN name VARCHAR(50),
    IN des VARCHAR(100),
    IN cat INT
)
BEGIN 
    INSERT INTO products(product_name,description,category)
    VALUES (name,des,cat);
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [38]:
%%sql
DROP PROCEDURE IF EXISTS insert_values_inventory;
CREATE PROCEDURE insert_values_inventory(
    IN pid INT,
    IN qty INT)
BEGIN
    INSERT INTO inventory(product_id,quantity)
    VALUES (pid,qty);
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [39]:
%%sql
DROP PROCEDURE IF EXISTS insert_values_price;
CREATE PROCEDURE insert_values_price(
    IN pid INT,
    IN price INT)
BEGIN
    INSERT INTO price(product_id,price)
    VALUES (pid,price);
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [40]:
%%sql
DROP PROCEDURE IF EXISTS insert_values_ordering;
CREATE PROCEDURE insert_values_ordering(
    IN pid INT,
    IN cid INT,
    IN sid INT,
    IN qty INT,
    IN done INT)
BEGIN
    DECLARE price_value INT;
    DECLARE msg TEXT;
    DECLARE oid INT;
    DECLARE tqty INT;
    DECLARE tamt INT;

    SELECT price INTO price_value FROM price WHERE product_id = pid;

    IF price_value > 0 AND qty > 0 THEN
        INSERT INTO ordering(product_id,customer_id,seller_id,quantity,total_amount,done)
        VALUES (pid,cid,sid,qty,qty*price_value,done);
    ELSE 
        SET msg = CONCAT('Values cannot be 0 or less. Price = ', price_value, ', Quantity = ', qty);
        SIGNAL SQLSTATE '45000'
        SET MESSAGE_TEXT = msg;
    END IF;
    
    IF EXISTS (SELECT 1 FROM ordering WHERE customer_id = cid AND done = 1) THEN
        SELECT COALESCE(MAX(order_id)+1,1) INTO oid FROM orders;

        INSERT INTO orderitems(order_id,product_id,customer_id,seller_id,quantity,total_amount)
        SELECT oid,product_id,customer_id,seller_id,quantity,total_amount FROM ordering WHERE customer_id = cid;



        DELETE FROM ordering WHERE customer_id = cid;
        

        INSERT INTO orders(order_id,customer_id,total_quantity,total_amount,seller_id)
        VALUES (oid,cid,)
    END IF;
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
(pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ')\n    END IF;\nEND' at line 37")
[SQL: CREATE PROCEDURE insert_values_ordering(
    IN pid INT,
    IN cid INT,
    IN sid INT,
    IN qty INT,
    IN done INT)
BEGIN
    DECLARE price_value INT;
    DECLARE msg TEXT;
    DECLARE oid INT;
    DECLARE tqty INT;
    DECLARE tamt INT;

    SELECT price INTO price_value FROM price WHERE product_id = pid;

    IF price_value > 0 AND qty > 0 THEN
        INSERT INTO ordering(product_id,customer_id,seller_id,quantity,total_amount,done)
        VALUES (pid,cid,sid,qty,qty*price_value,done);
    ELSE 
        SET msg = CONCAT('Values cannot be 0 or less. Price = ', price_value, ', Quantity = ', qty);
        SIGNAL SQLSTATE '45000'
        SET MESSAGE_TEXT = msg;
    END IF;

    IF EXISTS (SELECT 1 FROM ordering

In [41]:
%%sql
DROP PROCEDURE IF EXISTS insert_values_employees;
CREATE PROCEDURE insert_values_employees(
    IN fname VARCHAR(50),
    IN lname VARCHAR(50),
    IN age INT,
    IN ph VARCHAR(16),
    IN email VARCHAR(50),
    IN role VARCHAR(20)
)
BEGIN
    INSERT INTO employees(first_name, last_name, age, phone_number, email, role)
    VALUES (fname,lname,age,ph,email,role);
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

%%sql


# update triggers

In [42]:
%%sql
DROP PROCEDURE IF EXISTS update_manager_employees;
CREATE PROCEDURE update_manager_employees(
    IN e_id INT,
    IN m_id INT)
BEGIN
    UPDATE employees
    SET manager_id = m_id
    WHERE employee_id = e_id;
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [43]:
%%sql
DROP PROCEDURE IF EXISTS update_termination_employees;
CREATE PROCEDURE update_termination_employees(
    IN e_id INT
)
BEGIN
    UPDATE employees
    SET termination_date = CURRDATE(), is_working = 0
    WHERE employee_id = e_id;
END

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


[]

In [44]:
%%sql


UsageError: %%sql is a cell magic, but the cell body is empty. Did you mean the line magic %sql (single %)?


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql
USE customersDB;
select * from customers;

 * mysql+pymysql://root:***@localhost:3306/
0 rows affected.
0 rows affected.


customer_id,first_name,last_name,gender,age,email,ph_num,address1,city,state,postal_code,country,registration_date


In [None]:
%%sql


In [None]:
import mysql.connector

# Connect to the database
conn = mysql.connector.connect(
    host='localhost',
    user='root',
    password='password',
    database='customersdb'
)

cursor = conn.cursor()

# Get list of tables
cursor.execute("SHOW TABLES")
tables = cursor.fetchall()

# Loop through and get CREATE TABLE statement for each
for (table_name,) in tables:
    cursor.execute(f"SHOW CREATE TABLE `{table_name}`")
    result = cursor.fetchone()
    print(f"\n--- {table_name} ---")
    print(result[1])  # The CREATE TABLE statement

cursor.close()
conn.close()



--- category ---
CREATE TABLE `category` (
  `category_id` int NOT NULL AUTO_INCREMENT,
  `category_name` varchar(50) NOT NULL,
  `description` varchar(100) NOT NULL,
  `updated_on` datetime DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (`category_id`),
  UNIQUE KEY `category_name` (`category_name`)
) ENGINE=InnoDB AUTO_INCREMENT=21 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci

--- customers ---
CREATE TABLE `customers` (
  `customer_id` int NOT NULL AUTO_INCREMENT,
  `first_name` varchar(100) NOT NULL,
  `last_name` varchar(100) NOT NULL,
  `gender` enum('male','female','other','prefer not to say') NOT NULL,
  `age` int DEFAULT NULL,
  `email` varchar(100) NOT NULL,
  `ph_num` varchar(20) NOT NULL,
  `address1` varchar(100) NOT NULL,
  `city` varchar(50) NOT NULL,
  `state` varchar(50) NOT NULL,
  `postal_code` varchar(10) NOT NULL,
  `country` varchar(50) NOT NULL,
  `registration_date` datetime DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (`customer_id`),
  UNIQUE KEY `email` (`email`

In [57]:
import mysql.connector
import random
from datetime import datetime, timedelta
from faker import Faker
import os

# Initialize Faker
fake = Faker()

class DataGenerator:
    def __init__(self):
        self.config = {
            'host': os.getenv('DB_HOST', 'localhost'),
            'port': int(os.getenv('DB_PORT', '3306')),
            'database': os.getenv('DB_NAME', 'customersdb'),
            'user': os.getenv('DB_USER', 'root'),
            'password': os.getenv('DB_PASSWORD', 'password'),
            'charset': 'utf8mb4',
            'autocommit': True
        }
        self.connection = None
    
    def connect(self):
        """Establish database connection"""
        try:
            self.connection = mysql.connector.connect(**self.config)
            return True
        except mysql.connector.Error as e:
            print(f"Database connection error: {e}")
            return False
    
    def disconnect(self):
        """Close database connection"""
        if self.connection and self.connection.is_connected():
            self.connection.close()
    
    def execute_query(self, query, data=None, many=False):
        """Execute a query"""
        try:
            cursor = self.connection.cursor()
            if many:
                cursor.executemany(query, data)
            else:
                cursor.execute(query, data)
            self.connection.commit()
            return cursor.lastrowid
        except mysql.connector.Error as e:
            print(f"Query execution error: {e}")
            return None
        finally:
            cursor.close()
    
    def clear_tables(self):
        """Clear existing data from all tables"""
        print("Clearing existing data...")
        
        # Disable foreign key checks
        self.execute_query("SET FOREIGN_KEY_CHECKS = 0")
        
        tables = ['log', 'shipping', 'orderitems', 'ordering', 'orders', 'inventory', 'price', 'products', 'employees', 'customers', 'category']
        
        for table in tables:
            self.execute_query(f"TRUNCATE TABLE {table}")
        
        # Re-enable foreign key checks
        self.execute_query("SET FOREIGN_KEY_CHECKS = 1")
        print("Data cleared successfully!")
    
    def generate_categories(self, count=20):
        """Generate category data"""
        print(f"Generating {count} categories...")
        
        categories = [
            "Electronics", "Clothing", "Home & Garden", "Sports & Outdoors", "Books",
            "Health & Beauty", "Toys & Games", "Automotive", "Food & Beverages", "Music",
            "Movies & TV", "Software", "Office Supplies", "Pet Supplies", "Jewelry",
            "Tools & Hardware", "Arts & Crafts", "Baby Products", "Travel", "Photography"
        ]
        
        category_data = []
        for i, cat_name in enumerate(categories[:count]):
            category_data.append((
                cat_name,
                fake.text(max_nb_chars=100),
                fake.date_time_between(start_date='-2y', end_date='now')
            ))
        
        query = "INSERT INTO category (category_name, description, updated_on) VALUES (%s, %s, %s)"
        self.execute_query(query, category_data, many=True)
        print(f"Generated {len(category_data)} categories")
    
    def generate_customers(self, count=1500):
        """Generate customer data"""
        print(f"Generating {count} customers...")
        
        customer_data = []
        used_emails = set()
        
        for _ in range(count):
            # Ensure unique email
            email = fake.email()
            while email in used_emails:
                email = fake.email()
            used_emails.add(email)
            
            customer_data.append((
                fake.first_name(),
                fake.last_name(),
                random.choice(['male', 'female', 'other', 'prefer not to say']),
                random.randint(18, 80),
                email,
                fake.phone_number()[:20],  # Limit to 20 chars
                fake.street_address(),
                fake.city(),
                fake.state(),
                fake.postcode(),
                fake.country(),
                fake.date_time_between(start_date='-3y', end_date='now')
            ))
        
        query = """INSERT INTO customers 
                   (first_name, last_name, gender, age, email, ph_num, address1, city, state, postal_code, country, registration_date) 
                   VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
        self.execute_query(query, customer_data, many=True)
        print(f"Generated {len(customer_data)} customers")

    def generate_employees(self, count=100):
        """Generate employee data"""
        print(f"Generating {count} employees...")
        
        roles = ['Manager', 'Sales Rep', 'Developer', 'Analyst', 'Support', 'Admin', 'Marketing', 'HR']
        employee_data = []
        used_emails = set()
        
        for i in range(count):
            # Ensure unique email
            email = fake.email()
            while email in used_emails:
                email = fake.email()
            used_emails.add(email)
            
            # Some employees have managers (not the first ones)
            manager_id = random.randint(1, max(1, i//3)) if i > 5 else None
            
            employee_data.append((
                fake.first_name(),
                fake.last_name(),
                random.randint(22, 60),
                fake.phone_number()[:16],
                email,
                fake.date_between(start_date='-5y', end_date='-30d'),
                random.choice(roles),
                fake.date_between(start_date='-1y', end_date='now') if random.random() < 0.1 else None,  # 10% terminated
                manager_id,
                1 if random.random() > 0.1 else 0  # 90% still working
            ))
        
        query = """INSERT INTO employees 
                   (first_name, last_name, age, phone_number, email, hire_date, role, termination_date, manager_id, is_working) 
                   VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
        self.execute_query(query, employee_data, many=True)
        print(f"Generated {len(employee_data)} employees")

    
    def generate_products(self, count=500):
        """Generate product data"""
        print(f"Generating {count} products...")
        
        product_types = [
            "Smartphone", "Laptop", "T-Shirt", "Jeans", "Sofa", "Table", "Basketball", "Book",
            "Shampoo", "Toy Car", "Wrench", "Paint", "Coffee", "Headphones", "Watch", "Backpack"
        ]
        
        product_data = []
        used_names = set()
        used_descriptions = set()
        
        for _ in range(count):
            # Ensure unique names and descriptions
            name = f"{fake.company()} {random.choice(product_types)} {fake.word()}"
            while name in used_names:
                name = f"{fake.company()} {random.choice(product_types)} {fake.word()}"
            used_names.add(name)
            
            description = fake.text(max_nb_chars=100)
            while description in used_descriptions:
                description = fake.text(max_nb_chars=100)
            used_descriptions.add(description)
            
            product_data.append((
                name[:50],  # Limit to 50 chars
                description[:100],  # Limit to 100 chars
                random.randint(1, 20)  # category_id
            ))
        
        query = "INSERT INTO products (product_name, description, category_id) VALUES (%s, %s, %s)"
        self.execute_query(query, product_data, many=True)
        print(f"Generated {len(product_data)} products")
    
    def generate_prices(self, product_count=500):
        """Generate price data for products"""
        print(f"Generating prices for {product_count} products...")
        
        price_data = []
        for product_id in range(1, product_count + 1):
            price_data.append((
                product_id,
                round(random.uniform(9.99, 999.99), 2),
                fake.date_time_between(start_date='-1y', end_date='now')
            ))
        
        query = "INSERT INTO price (product_id, price, last_updated) VALUES (%s, %s, %s)"
        self.execute_query(query, price_data, many=True)
        print(f"Generated {len(price_data)} prices")
    
    def generate_inventory(self, product_count=500):
        """Generate inventory data"""
        print(f"Generating inventory for {product_count} products...")
        
        inventory_data = []
        for product_id in range(1, product_count + 1):
            inventory_data.append((
                product_id,
                random.randint(0, 1000),
                fake.date_time_between(start_date='-30d', end_date='now')
            ))
        
        query = "INSERT INTO inventory (product_id, quantity, last_updated) VALUES (%s, %s, %s)"
        self.execute_query(query, inventory_data, many=True)
        print(f"Generated {len(inventory_data)} inventory records")
    
    def generate_orders(self, count=2000):
        """Generate order data"""
        print(f"Generating {count} orders...")
        
        order_data = []
        for order_id in range(1, count + 1):
            customer_id = random.randint(1, 1500)  # Ensure valid customer ID
            seller_id = random.randint(1, 100)     # Ensure valid seller ID
            total_quantity = random.randint(1, 10)
            total_amount = random.randint(50, 5000)
            
            order_data.append((
                order_id,
                customer_id,
                total_quantity,
                total_amount,
                fake.date_time_between(start_date='-2y', end_date='now'),
                seller_id
            ))
        
        query = """INSERT INTO orders 
                   (order_id, customer_id, total_quantity, total_amount, order_date, seller_id) 
                   VALUES (%s, %s, %s, %s, %s, %s)"""
        self.execute_query(query, order_data, many=True)
        print(f"Generated {len(order_data)} orders")
    
    def generate_orderitems(self, count=3000):
        """Generate order items data"""
        print(f"Generating {count} order items...")
        
        orderitem_data = []
        for _ in range(count):
            order_id = random.randint(1, 2000)     # Ensure valid order ID
            product_id = random.randint(1, 500)    # Ensure valid product ID
            customer_id = random.randint(1, 1500)  # Ensure valid customer ID
            seller_id = random.randint(1, 100)     # Ensure valid seller ID
            quantity = random.randint(1, 5)
            total_amount = random.randint(20, 1000)
            
            orderitem_data.append((
                order_id,
                product_id,
                customer_id,
                seller_id,
                quantity,
                total_amount
            ))
        
        query = """INSERT INTO orderitems 
                   (order_id, product_id, customer_id, seller_id, quantity, total_amount) 
                   VALUES (%s, %s, %s, %s, %s, %s)"""
        self.execute_query(query, orderitem_data, many=True)
        print(f"Generated {len(orderitem_data)} order items")
    
    def generate_shipping(self, count=1500):
        """Generate shipping data"""
        print(f"Generating {count} shipping records...")
        
        statuses = ['shipped', 'pending', 'delivered', 'cancelled']
        shipping_data = []
        
        for _ in range(count):
            order_id = random.randint(1, 2000)  # Ensure valid order ID
            shipping_date = fake.date_between(start_date='-2y', end_date='now')
            status = random.choice(statuses)
            
            # Delivery date only if shipped or delivered
            delivery_date = None
            if status in ['shipped', 'delivered']:
                delivery_date = fake.date_between(start_date=shipping_date, end_date='now')
            
            shipping_data.append((
                order_id,
                shipping_date,
                status,
                delivery_date,
                fake.street_address()[:200],
                fake.city(),
                fake.state(),
                fake.postcode(),
                fake.country()
            ))
        
        query = """INSERT INTO shipping 
                   (order_id, shipping_date, shipping_status, delivery_date, shipping_addresss, city, state, postalcode, country) 
                   VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"""
        self.execute_query(query, shipping_data, many=True)
        print(f"Generated {len(shipping_data)} shipping records")
    
    def generate_logs(self, count=500):
        """Generate log data"""
        print(f"Generating {count} log records...")
        
        actions = ['INSERT', 'UPDATE', 'DELETE']
        tables = ['customers', 'orders', 'products', 'inventory', 'employees']
        columns = ['name', 'email', 'price', 'quantity', 'status', 'date']
        
        log_data = []
        for _ in range(count):
            log_data.append((
                random.choice(actions),
                fake.user_name(),
                random.choice(tables),
                random.choice(columns),
                fake.word(),
                fake.word(),
                fake.date_time_between(start_date='-1y', end_date='now')
            ))
        
        query = """INSERT INTO log 
                   (action, changed_by, on_table, on_column, old_value, new_value, time) 
                   VALUES (%s, %s, %s, %s, %s, %s, %s)"""
        self.execute_query(query, log_data, many=True)
        print(f"Generated {len(log_data)} log records")

    def generate_all_data(self):
        """Generate all sample data"""
        if not self.connect():
            print("Failed to connect to database")
            return
        
        try:
            print("Starting data generation...")
            print("=" * 50)
            
            # Clear existing data
            self.clear_tables()
            
            # Generate data in dependency order
            self.generate_categories(50)
            self.generate_customers(1500)
            self.generate_employees(100)
            self.generate_products(1000)
            self.generate_prices(1000)
            self.generate_inventory(1000)
            self.generate_orders(2000)
            self.generate_orderitems(5000)
            self.generate_shipping(2000)
            # self.generate_logs(500)
            
            print("=" * 50)
            print("Data generation complete!")
            
        except Exception as e:
            print(f"Error during data generation: {e}")
        finally:
            self.disconnect()

if __name__ == "__main__":
    generator = DataGenerator()
    generator.generate_all_data()



Starting data generation...
Clearing existing data...
Data cleared successfully!
Generating 50 categories...
Generated 20 categories
Generating 1500 customers...
Generated 1500 customers
Generating 100 employees...
Generated 100 employees
Generating 1000 products...
Generated 1000 products
Generating prices for 1000 products...
Generated 1000 prices
Generating inventory for 1000 products...
Generated 1000 inventory records
Generating 2000 orders...
Generated 2000 orders
Generating 5000 order items...
Generated 5000 order items
Generating 2000 shipping records...
Generated 2000 shipping records
Data generation complete!


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql


In [None]:
%%sql
