# importing Libraries

In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3
from tabulate import tabulate


# importing CSV

In [3]:
# Try different encodings
try:
    ecom = pd.read_csv('superstore.csv', encoding='utf-8')
except UnicodeDecodeError:
    # Try alternative encodings
    ecom = pd.read_csv('superstore.csv', encoding='latin-1')

In [4]:
#display the first few rows of the dataset
ecom.head()

#display the last few rows of the dataset
ecom.tail()



Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
9989,9990,CA-2014-110422,1/21/2014,1/23/2014,Second Class,TB-21400,Tom Boeckenhauer,Consumer,United States,Miami,...,33180,South,FUR-FU-10001889,Furniture,Furnishings,Ultra Door Pull Handle,25.248,3,0.2,4.1028
9990,9991,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,FUR-FU-10000747,Furniture,Furnishings,Tenex B1-RE Series Chair Mats for Low Pile Car...,91.96,2,0.0,15.6332
9991,9992,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,TEC-PH-10003645,Technology,Phones,Aastra 57i VoIP phone,258.576,2,0.2,19.3932
9992,9993,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,92627,West,OFF-PA-10004041,Office Supplies,Paper,"It's Hot Message Books with Stickers, 2 3/4"" x 5""",29.6,4,0.0,13.32
9993,9994,CA-2017-119914,5/4/2017,5/9/2017,Second Class,CC-12220,Chris Cortes,Consumer,United States,Westminster,...,92683,West,OFF-AP-10002684,Office Supplies,Appliances,"Acco 7-Outlet Masterpiece Power Center, Wihtou...",243.16,2,0.0,72.948


In [5]:
#create a copy of the dataset
ecom_copy = ecom.copy()

#display the first few rows of the copied dataset
ecom_copy.head()



Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [9]:
#change the column names to lowercase and replace spaces with underscores
ecom_copy.columns = ecom_copy.columns.str.lower().str.replace(' ', '_')

#display the first few rows of the dataset
ecom_copy.head()

# number of rows and columns in the dataset
print(ecom_copy.shape)

# number of unique customers in the dataset
print(ecom_copy['customer_id'].nunique())

#show the data types of the columns
print(ecom_copy.dtypes)



(9994, 21)
793
row_id                    int64
order_id                 object
order_date       datetime64[ns]
ship_date        datetime64[ns]
ship_mode                object
customer_id              object
customer_name            object
segment                  object
country                  object
city                     object
state                    object
postal_code               int64
region                   object
product_id               object
category                 object
sub-category             object
product_name             object
sales                   float64
quantity                  int64
discount                float64
profit                  float64
dtype: object


# Converting Column types

In [None]:
#change the column types to the appropriate data types
ecom_copy['order_date'] = pd.to_datetime(ecom_copy['order_date'])  # Already datetime64[ns]
ecom_copy['ship_date'] = pd.to_datetime(ecom_copy['ship_date'])    # Already datetime64[ns]
ecom_copy['customer_id'] = ecom_copy['customer_id'].astype('category')  # Change from object to category
ecom_copy['product_id'] = ecom_copy['product_id'].astype('category')    # Change from object to category
ecom_copy['category'] = ecom_copy['category'].astype('category')        # Change from object to category
ecom_copy['product_name'] = ecom_copy['product_name'].astype(str)       # Keep as string (object)
ecom_copy['sales'] = ecom_copy['sales'].astype(float)              # Already float64
ecom_copy['quantity'] = ecom_copy['quantity'].astype(int)          # Already int64
ecom_copy['discount'] = ecom_copy['discount'].astype(float)        # Already float64
ecom_copy['profit'] = ecom_copy['profit'].astype(float)            # Already float64
ecom_copy['customer_name'] = ecom_copy['customer_name'].astype('category')  # Change from object to category
ecom_copy['segment'] = ecom_copy['segment'].astype('category')          # Change from object to category
ecom_copy['city'] = ecom_copy['city'].astype('category')                # Change from object to category
ecom_copy['state'] = ecom_copy['state'].astype('category')              # Change from object to category
ecom_copy['country'] = ecom_copy['country'].astype('category')          # Change from object to category
ecom_copy['region'] = ecom_copy['region'].astype('category')            # Change from object to category
ecom_copy['order_id'] = ecom_copy['order_id'].astype('category')        # Change from object to category
ecom_copy['row_id'] = ecom_copy['row_id'].astype(int)                   # Keep as int64
ecom_copy['ship_mode'] = ecom_copy['ship_mode'].astype('category')      # Change from object to category
ecom_copy['postal_code'] = ecom_copy['postal_code'].astype(str)         # Change from int64 to string
ecom_copy['sub-category'] = ecom_copy['sub-category'].astype('category')  # Change from object to category

# Convert postal_code from object to category
ecom_copy['postal_code'] = ecom_copy['postal_code'].astype('category')

# Optionally convert product_name to category if there are many duplicates
# First check the cardinality ratio
product_name_ratio = ecom_copy['product_name'].nunique() / len(ecom_copy)
if product_name_ratio < 0.5:  # If less than 50% unique values
    ecom_copy['product_name'] = ecom_copy['product_name'].astype('category')
    




Unnamed: 0,row_id,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,...,postal_code,region,product_id,category,sub-category,product_name,sales,quantity,discount,profit
0,1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [44]:
ecom_copy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9994 entries, 0 to 9993
Data columns (total 25 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   row_id          9994 non-null   int64         
 1   order_id        9994 non-null   category      
 2   order_date      9994 non-null   datetime64[ns]
 3   ship_date       9994 non-null   datetime64[ns]
 4   ship_mode       9994 non-null   category      
 5   customer_id     9994 non-null   category      
 6   customer_name   9994 non-null   category      
 7   segment         9994 non-null   category      
 8   country         9994 non-null   category      
 9   city            9994 non-null   category      
 10  state           9994 non-null   category      
 11  postal_code     9994 non-null   category      
 12  region          9994 non-null   category      
 13  product_id      9994 non-null   category      
 14  category        9994 non-null   category      
 15  sub-

# Feature Enginnering

In [40]:
def enrich_ecom_data(df):
    """
    Adds category, subcategory, segment, and region IDs to the DataFrame.
    
    Args:
        df (pandas.DataFrame): Original DataFrame containing columns:
                              'Product ID', 'Segment', 'Region'
        
    Returns:
        pandas.DataFrame: Enriched DataFrame with ID columns
    """
    # Create a copy to avoid modifying the original
    result_df = df.copy()
    
    # 1. Extract category_id and subcategory_id from Product ID
    result_df['category_id'] = result_df['product_id'].apply(
        lambda x: x.split('-')[0] + '-' + x.split('-')[-1][:4]
    )
    
    result_df['subcategory_id'] = result_df['product_id'].apply(
        lambda x: x.split('-')[0] + '-' + x.split('-')[1] + '-' + x.split('-')[-1][:4]
    )
    
    # 2. Define the segment ID mapping
    segment_id_map = {
        'Consumer': 'CONS-1000',
        'Corporate': 'CORP-1000',
        'Home Office': 'HOME-1000'
    }
    
    # Add the segment_id column
    result_df['segment_id'] = result_df['segment'].map(segment_id_map)
    
    # 3. Define the region ID mapping
    region_id_map = {
        'Central': 'CENT-1000',
        'East': 'EAST-1000',
        'South': 'SOUT-1000',
        'West': 'WEST-1000'
    }
    
    # Add the region_id column
    result_df['region_id'] = result_df['region'].map(region_id_map)
    
    return result_df

# Apply to your ecom_copy DataFrame
ecom_copy = enrich_ecom_data(ecom_copy)



In [41]:
ecom_copy.head()

Unnamed: 0,row_id,order_id,order_date,ship_date,ship_mode,customer_id,customer_name,segment,country,city,...,sub-category,product_name,sales,quantity,discount,profit,category_id,subcategory_id,segment_id,region_id
0,1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136,FUR-1000,FUR-BO-1000,CONS-1000,SOUT-1000
1,2,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582,FUR-1000,FUR-CH-1000,CONS-1000,SOUT-1000
2,3,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714,OFF-1000,OFF-LA-1000,CORP-1000,WEST-1000
3,4,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031,FUR-1000,FUR-TA-1000,CONS-1000,SOUT-1000
4,5,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164,OFF-1000,OFF-ST-1000,CONS-1000,SOUT-1000


In [47]:
def create_tables_from_enriched_df(ecom_copy, db_name='ecommerce.db'):
    """
    Creates tables in SQLite database from the enriched DataFrame.
    
    Args:
        ecom_copy (pandas.DataFrame): Enriched DataFrame with the column names as shown
        db_name (str): Name of the SQLite database file
    """
    # Connect to the database (creates it if it doesn't exist)
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    
    print(f"Creating database: {db_name}")
    
    # Step 1: Create all tables
    
    # 1. Lookup tables
    print("Creating lookup tables...")
    
    # Segments lookup table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS segments (
        segment_id TEXT PRIMARY KEY,
        segment_name TEXT UNIQUE
    )
    ''')
    
    # Regions lookup table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS regions (
        region_id TEXT PRIMARY KEY,
        region_name TEXT UNIQUE
    )
    ''')
    
    # Categories lookup table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS categories (
        category_id TEXT PRIMARY KEY,
        category_name TEXT UNIQUE
    )
    ''')
    
    # Subcategories lookup table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS subcategories (
        subcategory_id TEXT PRIMARY KEY,
        subcategory_name TEXT,
        category_id TEXT,
        FOREIGN KEY (category_id) REFERENCES categories (category_id)
    )
    ''')
    
    # 2. Main tables
    print("Creating main tables...")
    
    # Customers table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS customers (
        customer_id TEXT PRIMARY KEY,
        customer_name TEXT,
        segment_id TEXT,
        country TEXT,
        city TEXT,
        state TEXT,
        postal_code TEXT,
        region_id TEXT,
        FOREIGN KEY (segment_id) REFERENCES segments (segment_id),
        FOREIGN KEY (region_id) REFERENCES regions (region_id)
    )
    ''')
    
    # Products table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS products (
        product_id TEXT PRIMARY KEY,
        product_name TEXT,
        category_id TEXT,
        subcategory_id TEXT,
        FOREIGN KEY (category_id) REFERENCES categories (category_id),
        FOREIGN KEY (subcategory_id) REFERENCES subcategories (subcategory_id)
    )
    ''')
    
    # Orders table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS orders (
        order_id TEXT PRIMARY KEY,
        row_id TEXT,
        order_date TEXT,
        ship_date TEXT,
        ship_mode TEXT,
        customer_id TEXT,
        FOREIGN KEY (customer_id) REFERENCES customers (customer_id)
    )
    ''')
    
    # Order details table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS order_details (
        row_id TEXT PRIMARY KEY,
        order_id TEXT,
        product_id TEXT,
        quantity INTEGER,
        sales REAL,
        discount REAL,
        profit REAL,
        FOREIGN KEY (order_id) REFERENCES orders (order_id),
        FOREIGN KEY (product_id) REFERENCES products (product_id)
    )
    ''')
    
    # Step 2: Populate all tables
    
    # 1. Populate lookup tables
    print("Populating lookup tables...")
    
    # Segments
    segments_df = ecom_copy[['segment_id', 'segment']].drop_duplicates()
    for _, row in segments_df.iterrows():
        cursor.execute(
            "INSERT OR IGNORE INTO segments (segment_id, segment_name) VALUES (?, ?)",
            (row['segment_id'], row['segment'])
        )
    
    # Regions
    regions_df = ecom_copy[['region_id', 'region']].drop_duplicates()
    for _, row in regions_df.iterrows():
        cursor.execute(
            "INSERT OR IGNORE INTO regions (region_id, region_name) VALUES (?, ?)",
            (row['region_id'], row['region'])
        )
    
    # Categories
    categories_df = ecom_copy[['category_id', 'category']].drop_duplicates()
    for _, row in categories_df.iterrows():
        cursor.execute(
            "INSERT OR IGNORE INTO categories (category_id, category_name) VALUES (?, ?)",
            (row['category_id'], row['category'])
        )
    
    # Subcategories
    subcategories_df = ecom_copy[['subcategory_id', 'sub-category', 'category_id']].drop_duplicates()
    for _, row in subcategories_df.iterrows():
        cursor.execute(
            "INSERT OR IGNORE INTO subcategories (subcategory_id, subcategory_name, category_id) VALUES (?, ?, ?)",
            (row['subcategory_id'], row['sub-category'], row['category_id'])
        )
    
    # 2. Populate main tables
    print("Populating main tables...")
    
    # Customers
    customers_df = ecom_copy[['customer_id', 'customer_name', 'segment_id', 'country', 'city', 
                       'state', 'postal_code', 'region_id']].drop_duplicates()
    for _, row in customers_df.iterrows():
        cursor.execute('''
        INSERT OR IGNORE INTO customers 
        (customer_id, customer_name, segment_id, country, city, state, postal_code, region_id)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        ''', (row['customer_id'], row['customer_name'], row['segment_id'], row['country'], 
              row['city'], row['state'], row['postal_code'], row['region_id']))
    
    # Products
    products_df = ecom_copy[['product_id', 'product_name', 'category_id', 'subcategory_id']].drop_duplicates()
    for _, row in products_df.iterrows():
        cursor.execute('''
        INSERT OR IGNORE INTO products
        (product_id, product_name, category_id, subcategory_id)
        VALUES (?, ?, ?, ?)
        ''', (row['product_id'], row['product_name'], row['category_id'], row['subcategory_id']))
    
    # Orders - Converting timestamps to strings to avoid SQLite binding errors
    orders_df = ecom_copy[['order_id', 'row_id', 'order_date', 'ship_date', 'ship_mode', 'customer_id']].drop_duplicates()
    for _, row in orders_df.iterrows():
        # Convert timestamps to strings in ISO format
        order_date_str = row['order_date'].strftime('%Y-%m-%d') if pd.notna(row['order_date']) else None
        ship_date_str = row['ship_date'].strftime('%Y-%m-%d') if pd.notna(row['ship_date']) else None
        
        cursor.execute('''
        INSERT OR IGNORE INTO orders
        (order_id, row_id, order_date, ship_date, ship_mode, customer_id)
        VALUES (?, ?, ?, ?, ?, ?)
        ''', (row['order_id'], row['row_id'], order_date_str, ship_date_str, 
              row['ship_mode'], row['customer_id']))
    
    # Order details
    for _, row in ecom_copy.iterrows():
        cursor.execute('''
        INSERT OR IGNORE INTO order_details
        (row_id, order_id, product_id, quantity, sales, discount, profit)
        VALUES (?, ?, ?, ?, ?, ?, ?)
        ''', (row['row_id'], row['order_id'], row['product_id'], row['quantity'], 
              row['sales'], row['discount'], row['profit']))
    
    # Commit changes
    conn.commit()
    print(f"Database {db_name} created and populated successfully.")
    
    # Step 3: Verify tables were created and populated correctly
    print("\nTable record counts:")
    tables = ['segments', 'regions', 'categories', 'subcategories', 
              'customers', 'products', 'orders', 'order_details']
    
    for table in tables:
        cursor.execute(f"SELECT COUNT(*) FROM {table}")
        count = cursor.fetchone()[0]
        print(f"- {table}: {count} records")
    
    conn.close()
    print("\nDatabase connection closed.")

# Usage - directly calling with ecom_copy
if __name__ == "__main__":
    # Call the function with the ecom_copy DataFrame
    create_tables_from_enriched_df(ecom_copy)
    
    # Example query
    conn = sqlite3.connect('ecommerce.db')
    query = '''
    SELECT r.region_name, s.segment_name, SUM(od.sales) as total_sales
    FROM order_details od
    JOIN orders o ON od.order_id = o.order_id
    JOIN customers c ON o.customer_id = c.customer_id
    JOIN regions r ON c.region_id = r.region_id
    JOIN segments s ON c.segment_id = s.segment_id
    GROUP BY r.region_id, s.segment_id
    ORDER BY total_sales DESC
    '''
    
    results = pd.read_sql_query(query, conn)
    print("\nExample query results - Sales by region and segment:")
    print(results)
    
    conn.close()

Creating database: ecommerce.db
Creating lookup tables...
Creating main tables...
Populating lookup tables...
Populating main tables...
Database ecommerce.db created and populated successfully.

Table record counts:
- segments: 3 records
- regions: 4 records
- categories: 3 records
- subcategories: 17 records
- customers: 793 records
- products: 1862 records
- orders: 5009 records
- order_details: 9994 records

Database connection closed.

Example query results - Sales by region and segment:
   region_name segment_name  total_sales
0         West     Consumer  444735.5120
1         East     Consumer  270603.3537
2      Central     Consumer  260293.7913
3         East    Corporate  217976.9024
4         West    Corporate  200888.6719
5        South     Consumer  185768.6880
6      Central    Corporate  164847.5322
7         East  Home Office  123154.0434
8        South    Corporate  122433.2603
9         West  Home Office  119010.2614
10       South  Home Office   93830.0350
11     Cent

In [48]:
import sqlite3

def view_database_tables(db_name='ecommerce.db'):
    """
    Shows all tables in the SQLite database and their row counts.
    
    Args:
        db_name (str): Name of the SQLite database file
    """
    try:
        # Connect to the database
        conn = sqlite3.connect(db_name)
        cursor = conn.cursor()
        
        # Query to get all table names
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cursor.fetchall()
        
        print(f"\nTables in {db_name}:")
        print("-" * 40)
        
        # Print table names and row counts
        for table in tables:
            table_name = table[0]
            cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
            row_count = cursor.fetchone()[0]
            print(f"{table_name} - {row_count} rows")
        
        # Print sample data from each table
        print("\nSample data from each table:")
        print("-" * 40)
        
        for table in tables:
            table_name = table[0]
            print(f"\n{table_name}:")
            
            # Get column names
            cursor.execute(f"PRAGMA table_info({table_name})")
            columns = [column[1] for column in cursor.fetchall()]
            print(f"Columns: {', '.join(columns)}")
            
            # Get sample data (first 3 rows)
            cursor.execute(f"SELECT * FROM {table_name} LIMIT 3")
            rows = cursor.fetchall()
            
            if rows:
                for row in rows:
                    print(row)
            else:
                print("(No data)")
        
        conn.close()
        
    except Exception as e:
        print(f"Error accessing database: {str(e)}")

# Call the function to view tables
view_database_tables()


Tables in ecommerce.db:
----------------------------------------
segments - 3 rows
regions - 4 rows
categories - 3 rows
subcategories - 17 rows
customers - 793 rows
products - 1862 rows
orders - 5009 rows
order_details - 9994 rows

Sample data from each table:
----------------------------------------

segments:
Columns: segment_id, segment_name
('CONS-1000', 'Consumer')
('CORP-1000', 'Corporate')
('HOME-1000', 'Home Office')

regions:
Columns: region_id, region_name
('SOUT-1000', 'South')
('WEST-1000', 'West')
('CENT-1000', 'Central')

categories:
Columns: category_id, category_name
('FUR-1000', 'Furniture')
('OFF-1000', 'Office Supplies')
('TEC-1000', 'Technology')

subcategories:
Columns: subcategory_id, subcategory_name, category_id
('FUR-BO-1000', 'Bookcases', 'FUR-1000')
('FUR-CH-1000', 'Chairs', 'FUR-1000')
('OFF-LA-1000', 'Labels', 'OFF-1000')

customers:
Columns: customer_id, customer_name, segment_id, country, city, state, postal_code, region_id
('CG-12520', 'Claire Gute', 'C

In [53]:
# Without a function
conn = sqlite3.connect('ecommerce.db')
results = pd.read_sql_query("SELECT * FROM order_details LIMIT 5", conn)
conn.close()
print(tabulate(results, headers='keys', tablefmt='psql'))

+----+----------+----------------+-----------------+------------+---------+------------+-----------+
|    |   row_id | order_id       | product_id      |   quantity |   sales |   discount |    profit |
|----+----------+----------------+-----------------+------------+---------+------------+-----------|
|  0 |        1 | CA-2016-152156 | FUR-BO-10001798 |          2 | 261.96  |       0    |   41.9136 |
|  1 |        2 | CA-2016-152156 | FUR-CH-10000454 |          3 | 731.94  |       0    |  219.582  |
|  2 |        3 | CA-2016-138688 | OFF-LA-10000240 |          2 |  14.62  |       0    |    6.8714 |
|  3 |        4 | US-2015-108966 | FUR-TA-10000577 |          5 | 957.577 |       0.45 | -383.031  |
|  4 |        5 | US-2015-108966 | OFF-ST-10000760 |          2 |  22.368 |       0.2  |    2.5164 |
+----+----------+----------------+-----------------+------------+---------+------------+-----------+
