In [1]:
# Imoports
import pandas as pd
import sqlite3
from pathlib import Path

# Load the CSVs

In [2]:
# Directory containing CSV files
directory = Path('SuperstoreTables')

# Filter CSV files
csv_files = directory.glob('*.csv')

# Read each CSV file
table_names = []
dfs = []

for file in csv_files:
    table_name = file.stem.split('/')[0]  # Extract the table name
    df = pd.read_csv(file)

    table_names.append(table_name)
    dfs.append(df)

In [3]:
# Remove spaces and dashes from column names to fit names used in db later.
for df in dfs:
    df.columns = [col.replace(' ', '').replace('-', '') for col in df.columns]

In [4]:
# Create a dictionary where table_name : df
table_dict = {table_names[i]: dfs[i] for i in range(len(table_names))}

# Create the DB

In [5]:
# Make path for the DBs
path = 'SuperstoreDB'
Path(path).mkdir(exist_ok=True)

In [6]:
# Initiate a new SQLite database connection object.
db_conn = sqlite3.connect(f"{path}/superstore.db")

In [7]:
# Establish a cursor object, which is a tool that executes SQL code against the database of interest.
c = db_conn.cursor()

In [8]:
# Products
c.execute(
    """
    CREATE TABLE Products (
        ProductID TEXT NOT NULL,
        ProductName TEXT NOT NULL,
        Category TEXT NOT NULL,
        SubCategory TEXT NOT NULL,
        PRIMARY KEY (ProductID)
        ); 
""")

# Customers
c.execute(
    """
    CREATE TABLE Customers (
        CustomerID TEXT NOT NULL,
        CustomerName TEXT NOT NULL,
        Segment TEXT NOT NULL,
        PRIMARY KEY (CustomerID)
        ); 

""")

# Addresses
c.execute(
    """
    CREATE TABLE Addresses (
        AddressID TEXT NOT NULL,
        PostalCode TEXT NOT NULL,
        City TEXT NOT NULL,
        State TEXT,
        Region TEXT,
        Country TEXT NOT NULL,
        PRIMARY KEY (AddressID)
        ); 
""")

# Orders
c.execute(
    """
    CREATE TABLE Orders (
        OrderID TEXT NOT NULL,
        OrderDate TEXT NOT NULL,
        ShipDate TEXT NOT NULL,
        ShipMode TEXT NOT NULL,
        CustomerID  TEXT NOT NULL,
        AddressID TEXT NOT NULL,
        PRIMARY KEY (OrderID)
        FOREIGN KEY(CustomerID) REFERENCES Customers(CustomerID)
        FOREIGN KEY(AddressID) REFERENCES Addresses(AddressID)
        ); 
""")

# OrdersDetails
c.execute(
    """
    CREATE TABLE OrdersDetails (
        OrderID TEXT NOT NULL,
        ProductID TEXT NOT NULL,
        Sales REAL NOT NULL,
        Quantity INTEGER NOT NULL,
        Discount REAL NOT NULL,
        Profit REAL NOT NULL,
        PRIMARY KEY (OrderID, ProductID)
        FOREIGN KEY(OrderID) REFERENCES Orders(OrderID)
        FOREIGN KEY(ProductID) REFERENCES Products(ProductID)
        ); 
""")

<sqlite3.Cursor at 0x12c09b040>

# Load the Data from CSVs to the DB

In [9]:
for table_name in  table_dict:
    print(f"Uploading data for: {table_name}")
    table_dict[table_name].to_sql(table_name, db_conn, if_exists='append', index=False)

Uploading data for: Addresses
Uploading data for: OrdersDetails
Uploading data for: Customers
Uploading data for: Products
Uploading data for: Orders


In [10]:
# Make a test query
pd.read_sql("SELECT * FROM Orders LIMIT 5", db_conn)

Unnamed: 0,OrderID,OrderDate,ShipDate,ShipMode,CustomerID,AddressID
0,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,42420-000001
1,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,90036-000001
2,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,33311-000001
3,CA-2014-115812,2014-06-09,2014-06-14,Standard Class,BH-11710,90032-000001
4,CA-2017-114412,2017-04-15,2017-04-20,Standard Class,AA-10480,28027-000001


In [11]:
# Close the db connection.
db_conn.close()