In [1]:
from faker import Faker
import random
import sqlite3
import pandas as pd

fake = Faker()

# Create a database connection
conn = sqlite3.connect('ecommerce.db')
cursor = conn.cursor()

# Create Customers table
cursor.execute('''CREATE TABLE Customers (
                    customer_id INTEGER PRIMARY KEY,
                    name TEXT,
                    shipping_addr TEXT,
                    contact_number TEXT,
                    current_flag INTEGER DEFAULT 1
                )''')

# Create Orders table
cursor.execute('''CREATE TABLE Orders (
                    order_id INTEGER PRIMARY KEY,
                    customer_id INTEGER,
                    order_date TEXT,
                    total_amount REAL NOT NULL,
                    FOREIGN KEY (customer_id) REFERENCES Customers(customer_id)
                )''')

# Create Order Items table
cursor.execute('''CREATE TABLE OrderItems (
                    item_id INTEGER PRIMARY KEY,
                    order_id INTEGER,
                    product_id INTEGER,
                    quantity INTEGER,
                    price REAL,
                    FOREIGN KEY (order_id) REFERENCES Orders(order_id),
                    FOREIGN KEY (product_id) REFERENCES Products(product_id)
                )''')

# Create Products table
cursor.execute('''CREATE TABLE Products (
                    product_id INTEGER PRIMARY KEY,
                    name TEXT,
                    description TEXT
                )''')

# Create Variants table
cursor.execute('''CREATE TABLE Variants (
                    variant_id INTEGER PRIMARY KEY,
                    name TEXT,
                    parent_product_id INTEGER,
                    FOREIGN KEY (parent_product_id) REFERENCES Products(product_id)
                )''') 

# Generate customers
for _ in range(10):
    name = fake.name()
    shipping_addr = fake.address().replace("\n", ", ")
    contact_number = fake.phone_number()
    cursor.execute('''INSERT INTO Customers (name, shipping_addr, contact_number)
                      VALUES (?, ?, ?)''', (name, shipping_addr, contact_number))
    conn.commit()

# Generate products
for i in range(10):
    name = ['iPhone', 'iPhone', 'MacBook', 'MacBook', 'Apple Watch', 'AirPods', 'Apple TV', 'iPod', 'Beats', 'iPad']
    description = fake.sentence(nb_words=6)
    cursor.execute('''INSERT INTO Products (name, description)
                      VALUES (?, ?)''', (name[i], description))
    conn.commit()

# Generate variants
for i in range(4):
    name = ['iPhone 13','iphone 14', 'MacBook Air', 'MacBook pro']
    parent_id = [1,2,3,4]
    cursor.execute('''INSERT INTO Variants (name, parent_product_id)
                      VALUES (?, ?)''', (name[i], parent_id[i]))
    conn.commit()

# Generate orders and order items
for _ in range(100):
    customer_id = random.randint(1, 10)
    order_date = fake.date_between(start_date='-2y', end_date='today')
    total_amount = round(random.uniform(10, 1000), 2)
    cursor.execute('''INSERT INTO Orders (customer_id, order_date, total_amount)
                      VALUES (?, ?, ?)''', (customer_id, order_date, total_amount))
    order_id = cursor.lastrowid

    # Generate order items
    num_items = random.randint(1, 5)
    for _ in range(num_items):
        product_id = random.randint(1, 10)
        quantity = random.randint(1, 10)
        price = round(random.uniform(10, 100), 2)
        cursor.execute('''INSERT INTO OrderItems (order_id, product_id, quantity, price)
                          VALUES (?, ?, ?, ?)''', (order_id, product_id, quantity, price))

    conn.commit()

# Close the database connection
conn.close()

In [2]:
# Create a database connection
conn = sqlite3.connect('ecommerce.db')
cursor = conn.cursor()

# Get orders in dataframe
cursor.execute("SELECT * FROM Orders")
orders = cursor.fetchall()
orders_df = pd.DataFrame(orders, columns=[column[0] for column in cursor.description])

# Get Customers in dataframe
cursor.execute("SELECT * FROM Customers")
customers = cursor.fetchall()
customers_df = pd.DataFrame(customers, columns=[column[0] for column in cursor.description])

# Get Products in dataframe
cursor.execute("SELECT * FROM Products")
products = cursor.fetchall()
products_df = pd.DataFrame(products, columns=[column[0] for column in cursor.description])

# Get Variants in dataframe
cursor.execute("SELECT * FROM Variants")
variants = cursor.fetchall()
variants_df = pd.DataFrame(variants, columns=[column[0] for column in cursor.description])

# Get OrderItems in dataframe
cursor.execute("SELECT * FROM OrderItems")
order_items = cursor.fetchall()
order_items_df = pd.DataFrame(order_items, columns=[column[0] for column in cursor.description])

# Close the database connection
conn.close()

In [3]:
orders_df.head()

Unnamed: 0,order_id,customer_id,order_date,total_amount
0,1,1,2021-07-29,20.71
1,2,1,2022-03-01,352.09
2,3,6,2023-03-24,484.17
3,4,1,2021-12-09,788.4
4,5,9,2022-09-27,812.18


In [4]:
customers_df.head()

Unnamed: 0,customer_id,name,shipping_addr,contact_number,current_flag
0,1,Patrick Robinson,"446 Bryan Valleys, West Davidborough, DE 14742",(809)311-0442x462,1
1,2,Austin West,"Unit 6483 Box 6700, DPO AE 75134",(311)991-9678x0228,1
2,3,Emily Hendricks,"674 Courtney Centers, Lake Wendyville, DE 77373",(929)834-6800x187,1
3,4,Barbara Pacheco,"34477 Richards Meadow, West Erichaven, WY 97830",914.730.6949x8314,1
4,5,John Stone,"4043 Martinez Manors, Amandaville, PR 72395",8377749272,1


In [5]:
products_df.head()

Unnamed: 0,product_id,name,description
0,1,iPhone,Firm remember message its law institution.
1,2,iPhone,First stock agent soon lay president general.
2,3,MacBook,Another easy almost affect.
3,4,MacBook,So hold decide safe art.
4,5,Apple Watch,Attack top pattern kind.


In [6]:
variants_df.head()

Unnamed: 0,variant_id,name,parent_product_id
0,1,iPhone 13,1
1,2,iphone 14,2
2,3,MacBook Air,3
3,4,MacBook pro,4


In [7]:
order_items_df.head()

Unnamed: 0,item_id,order_id,product_id,quantity,price
0,1,1,7,9,80.55
1,2,2,9,9,47.31
2,3,3,7,9,54.95
3,4,3,3,1,35.98
4,5,3,4,7,18.13
