# Data Warehouse

## Imports

In [1]:
import mysql.connector
import pandas as pd
import warnings
from datetime import datetime

warnings.filterwarnings(
    "ignore",
    message="pandas only supports SQLAlchemy connectable",
    category=UserWarning,
)

## Connection

### Staging Area

In [2]:
staging_area_conn = mysql.connector.connect(host="host.docker.internal", user="deds", passwd="deds", database="deds")
staging_area_cursor = staging_area_conn.cursor()

### Data Warehouse

In [3]:
warehouse_conn = mysql.connector.connect(
    host="caboose.proxy.rlwy.net",
    user="root",
    passwd="SNjzPKnbJWApqnpzaislntSBuLBonIXI",
    database="railway",
    port=20756,
)
warehouse_cursor = warehouse_conn.cursor()

## Reset database

In [134]:
# warehouse_cursor.execute("DELETE FROM FactInkoop")
# warehouse_cursor.execute("DELETE FROM FactVerkoop")
# warehouse_cursor.execute("DELETE FROM DimProduct")
# warehouse_cursor.execute("DELETE FROM DimVendor")
# warehouse_cursor.execute("DELETE FROM DimEmployee")
# warehouse_cursor.execute("DELETE FROM DimCustomer")

warehouse_conn.commit()

## Tables

### Product

In [135]:
def create_product_row(
    id,
    name,
    color,
    size,
    size_unit_measure_code,
    weight,
    weight_unit_measure_code,
    discontinued,
    standard_cost,
    price,
    safety_stock_level,
    reorder_point,
    product_category_name,
):
    def handle_nan(value):
        return None if pd.isnull(value) else value

    id = handle_nan(id)
    name = handle_nan(name)
    color = handle_nan(color)
    size = handle_nan(size)
    size_unit_measure_code = handle_nan(size_unit_measure_code)
    weight = handle_nan(weight)
    weight_unit_measure_code = handle_nan(weight_unit_measure_code)
    discontinued = handle_nan(discontinued)
    standard_cost = handle_nan(standard_cost)
    price = handle_nan(price)
    safety_stock_level = handle_nan(safety_stock_level)
    reorder_point = handle_nan(reorder_point)
    product_category_name = handle_nan(product_category_name)

    query = """
    INSERT INTO DimProduct (
        ProductID, Name, Color, Size, SizeUnitMeasureCode, Weight, WeightUnitMeasureCode,
        Discontinued, StandardCost, Price, SafetyStockLevel, ReorderPoint, ProductCategoryName
    ) VALUES (
        %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
    )
    """

    values = (
        id,
        name,
        color,
        size,
        size_unit_measure_code,
        weight,
        weight_unit_measure_code,
        discontinued,
        standard_cost,
        price,
        safety_stock_level,
        reorder_point,
        product_category_name,
    )

    try:
        warehouse_cursor.execute(query, values)
        print(f"[PRODUCT] Successfully loaded product {id} into warehouse")
    except mysql.connector.Error as err:
        print(values)
        print(f"Failed to load product into warehouse: {err}")

    warehouse_conn.commit()


staging_area_product_data = pd.read_sql_query("SELECT * FROM Product", staging_area_conn)

for row in staging_area_product_data.itertuples():
    id = row.ProductID
    name = row.ProductName
    color = row.Color
    size = row.Size
    size_unit_measure_code = row.SizeUnitMeasureCode
    weight = row.Weight
    weight_unit_measure_code = row.WeightUnitMeasureCode

    discontinued = False

    staging_area_product_data["DiscontinuedDate"] = pd.to_datetime(staging_area_product_data["DiscontinuedDate"])
    current_date = datetime.now()

    if row.Discontinued == 1:
        discontinued = True
    # check if the discontinued date is already happend then set it to true
    elif row.DiscontinuedDate and row.DiscontinuedDate < current_date:
        discontinued = True

    standard_cost = row.StandardCost

    # price: we combine multiple prices into one price
    price = row.ListPrice

    if pd.isnull(price):
        price = row.UnitPrice

    safety_stock_level = row.SafetyStockLevel

    reorder_point = row.ReorderPoint

    if pd.isnull(reorder_point):
        reorder_point = row.ReorderLevel

    product_category_name = None

    if pd.notnull(row.ProductCategoryID):
        result = pd.read_sql_query(
            f"SELECT Name FROM ProductCategory WHERE ProductCategoryID = '{row.ProductCategoryID}'", staging_area_conn
        )
        if not result.empty:
            product_category_name = result.iloc[0].Name
    elif pd.notnull(row.category):
        product_category_name = row.category

    create_product_row(
        id,
        name,
        color,
        size,
        size_unit_measure_code,
        weight,
        weight_unit_measure_code,
        discontinued,
        standard_cost,
        price,
        safety_stock_level,
        reorder_point,
        product_category_name,
    )

[PRODUCT] Successfully loaded product 1 into warehouse
[PRODUCT] Successfully loaded product 2 into warehouse
[PRODUCT] Successfully loaded product 3 into warehouse
[PRODUCT] Successfully loaded product 4 into warehouse
[PRODUCT] Successfully loaded product 5 into warehouse
[PRODUCT] Successfully loaded product 6 into warehouse
[PRODUCT] Successfully loaded product 7 into warehouse
[PRODUCT] Successfully loaded product 8 into warehouse
[PRODUCT] Successfully loaded product 9 into warehouse
[PRODUCT] Successfully loaded product 10 into warehouse
[PRODUCT] Successfully loaded product 11 into warehouse
[PRODUCT] Successfully loaded product 12 into warehouse
[PRODUCT] Successfully loaded product 13 into warehouse
[PRODUCT] Successfully loaded product 14 into warehouse
[PRODUCT] Successfully loaded product 15 into warehouse
[PRODUCT] Successfully loaded product 16 into warehouse
[PRODUCT] Successfully loaded product 17 into warehouse
[PRODUCT] Successfully loaded product 18 into warehouse
[

### Vendor

In [136]:
def create_vendor_row(id, name):
    def handle_nan(value):
        return None if pd.isnull(value) else value

    id = handle_nan(id)
    name = handle_nan(name)

    query = """
    INSERT INTO DimVendor (
        VendorID, Name
    ) VALUES (
        %s, %s
    )
    """

    values = (id, name)

    try:
        warehouse_cursor.execute(query, values)
        print(f"[PRODUCT] Successfully loaded vendor {id} into warehouse")
    except mysql.connector.Error as err:
        print(values)
        print(f"Failed to load product into warehouse: {err}")

    warehouse_conn.commit()


staging_area_vendor_data = pd.read_sql_query("SELECT * FROM Vendor", staging_area_conn)

for row in staging_area_vendor_data.itertuples():
    create_vendor_row(row.VendorID, row.Name)

[PRODUCT] Successfully loaded vendor 1492 into warehouse
[PRODUCT] Successfully loaded vendor 1494 into warehouse
[PRODUCT] Successfully loaded vendor 1496 into warehouse
[PRODUCT] Successfully loaded vendor 1498 into warehouse
[PRODUCT] Successfully loaded vendor 1500 into warehouse
[PRODUCT] Successfully loaded vendor 1502 into warehouse
[PRODUCT] Successfully loaded vendor 1504 into warehouse
[PRODUCT] Successfully loaded vendor 1506 into warehouse
[PRODUCT] Successfully loaded vendor 1508 into warehouse
[PRODUCT] Successfully loaded vendor 1510 into warehouse
[PRODUCT] Successfully loaded vendor 1512 into warehouse
[PRODUCT] Successfully loaded vendor 1514 into warehouse
[PRODUCT] Successfully loaded vendor 1516 into warehouse
[PRODUCT] Successfully loaded vendor 1518 into warehouse
[PRODUCT] Successfully loaded vendor 1520 into warehouse
[PRODUCT] Successfully loaded vendor 1522 into warehouse
[PRODUCT] Successfully loaded vendor 1524 into warehouse
[PRODUCT] Successfully loaded v

### Employee

In [137]:
def create_employee_row(id, first_name, last_name, title, department_name, department_group_name):
    def handle_nan(value):
        return None if pd.isnull(value) else value

    id = handle_nan(id)
    first_name = handle_nan(first_name)
    last_name = handle_nan(last_name)
    title = handle_nan(title)
    department_name = handle_nan(department_name)
    department_group_name = handle_nan(department_group_name)

    query = """
    INSERT INTO DimEmployee (
        EmployeeID, FirstName, LastName, Title,
        DepartmentName, DepartmentGroupName
    ) VALUES (
        %s, %s, %s, %s, %s, %s
    )
    """

    values = (id, first_name, last_name, title, department_name, department_group_name)

    try:
        warehouse_cursor.execute(query, values)
        print(f"[EMPLOYEE] Successfully loaded employee {id} into warehouse")
    except mysql.connector.Error as err:
        print(values)
        print(f"Failed to load employee into warehouse: {err}")

    warehouse_conn.commit()


staging_area_employee_data = pd.read_sql_query("SELECT * FROM Employee", staging_area_conn)

for row in staging_area_employee_data.itertuples():
    id = row.EmployeeID

    first_name = row.FirstName
    if pd.isnull(first_name):
        # get from Person table
        result = pd.read_sql_query(
            f"SELECT FirstName FROM Person WHERE PersonID = '{row.EmployeeID}'", staging_area_conn
        )
        if not result.empty:
            first_name = result.iloc[0].FirstName

    last_name = row.LastName
    if pd.isnull(last_name):
        # get from Person table
        result = pd.read_sql_query(
            f"SELECT LastName FROM Person WHERE PersonID = '{row.EmployeeID}'", staging_area_conn
        )
        if not result.empty:
            last_name = result.iloc[0].LastName

    title = row.JobTitle

    department_name = None
    department_group_name = None

    # department
    if not pd.isnull(row.DepartmentID):
        result = pd.read_sql_query(
            f"SELECT * FROM Department WHERE DepartmentID = '{row.DepartmentID}'", staging_area_conn
        )
        if not result.empty:
            department_name = result.iloc[0].Name
            department_group_name = result.iloc[0].GroupName
            if department_group_name == "NULL":
                department_group_name = department_name

    create_employee_row(id, first_name, last_name, title, department_name, department_group_name)

[EMPLOYEE] Successfully loaded employee 1 into warehouse
[EMPLOYEE] Successfully loaded employee 2 into warehouse
[EMPLOYEE] Successfully loaded employee 3 into warehouse
[EMPLOYEE] Successfully loaded employee 4 into warehouse
[EMPLOYEE] Successfully loaded employee 5 into warehouse
[EMPLOYEE] Successfully loaded employee 6 into warehouse
[EMPLOYEE] Successfully loaded employee 7 into warehouse
[EMPLOYEE] Successfully loaded employee 8 into warehouse
[EMPLOYEE] Successfully loaded employee 9 into warehouse
[EMPLOYEE] Successfully loaded employee 10 into warehouse
[EMPLOYEE] Successfully loaded employee 11 into warehouse
[EMPLOYEE] Successfully loaded employee 12 into warehouse
[EMPLOYEE] Successfully loaded employee 13 into warehouse
[EMPLOYEE] Successfully loaded employee 14 into warehouse
[EMPLOYEE] Successfully loaded employee 15 into warehouse
[EMPLOYEE] Successfully loaded employee 16 into warehouse
[EMPLOYEE] Successfully loaded employee 17 into warehouse
[EMPLOYEE] Successfully

### Customer

In [138]:
def create_customer_row(id, address, city, zip, state, country, phone, first_name, last_name):
    def handle_nan(value):
        return None if pd.isnull(value) else value

    id = handle_nan(id)
    address = handle_nan(address)
    city = handle_nan(city)
    zip = handle_nan(zip)
    state = handle_nan(state)
    country = handle_nan(country)
    phone = handle_nan(phone)
    first_name = handle_nan(first_name)
    last_name = handle_nan(last_name)

    query = """
    INSERT INTO DimCustomer (
        CustomerID, Address, City, Zip, State,
        Country, Phone, FirstName, LastName
    ) VALUES (
        %s, %s, %s, %s, %s, %s, %s, %s, %s 
    )
    """

    values = (id, address, city, zip, state, country, phone, first_name, last_name)

    try:
        warehouse_cursor.execute(query, values)
        print(f"[CUSTOMER] Successfully loaded customer {id} into warehouse")
    except mysql.connector.Error as err:
        print(values)
        print(f"Failed to load customer into warehouse: {err}")

    warehouse_conn.commit()


staging_area_customer_data = pd.read_sql_query("SELECT * FROM Customer", staging_area_conn)

for row in staging_area_customer_data.itertuples():
    id = row.CustomerID

    # all different sources use other methods for adresses so this will get messy
    # we leave address, city, zip, state and country for now

    phone = row.Phone

    first_name = row.fname
    last_name = row.lname

    # when first_name is None last_name is also None
    if not pd.isnull(row.PersonID):
        # we will get the person and get the first and last name from there
        person = pd.read_sql_query(f"SELECT * FROM Person WHERE PersonID = {row.PersonID}", staging_area_conn)
        first_name = person.iloc[0]["FirstName"]
        last_name = person.iloc[0]["LastName"]

    create_customer_row(id, None, None, None, None, None, phone, first_name, last_name)

[CUSTOMER] Successfully loaded customer 1 into warehouse
[CUSTOMER] Successfully loaded customer 10 into warehouse
[CUSTOMER] Successfully loaded customer 100 into warehouse
[CUSTOMER] Successfully loaded customer 101 into warehouse
[CUSTOMER] Successfully loaded customer 102 into warehouse
[CUSTOMER] Successfully loaded customer 103 into warehouse
[CUSTOMER] Successfully loaded customer 104 into warehouse
[CUSTOMER] Successfully loaded customer 105 into warehouse
[CUSTOMER] Successfully loaded customer 106 into warehouse
[CUSTOMER] Successfully loaded customer 107 into warehouse
[CUSTOMER] Successfully loaded customer 108 into warehouse
[CUSTOMER] Successfully loaded customer 109 into warehouse
[CUSTOMER] Successfully loaded customer 11 into warehouse
[CUSTOMER] Successfully loaded customer 110 into warehouse
[CUSTOMER] Successfully loaded customer 11000 into warehouse
[CUSTOMER] Successfully loaded customer 11001 into warehouse
[CUSTOMER] Successfully loaded customer 11002 into wareh

### Inkoop

In [146]:
def create_inkoop_row(
    purchase_order_id,
    purchase_order_detail_id,
    vendor_id,
    employee_id,
    product_id,
    order_date,
    ship_date,
    order_qty,
    unit_price,
    line_total,
    received_qty,
    rejected_qty,
    stocked_qty,
    total_due,
    tax_amt,
    sub_total,
):
    def handle_nan(value):
        return None if pd.isnull(value) else value

    purchase_order_id = handle_nan(purchase_order_id)
    purchase_order_detail_id = handle_nan(purchase_order_detail_id)
    vendor_id = handle_nan(vendor_id)
    employee_id = handle_nan(employee_id)
    product_id = handle_nan(product_id)
    order_date = handle_nan(order_date)
    ship_date = handle_nan(ship_date)
    order_qty = handle_nan(order_qty)
    unit_price = handle_nan(unit_price)
    line_total = handle_nan(line_total)
    received_qty = handle_nan(received_qty)
    rejected_qty = handle_nan(rejected_qty)
    stocked_qty = handle_nan(stocked_qty)
    total_due = handle_nan(total_due)
    tax_amt = handle_nan(tax_amt)
    sub_total = handle_nan(sub_total)

    query = """
        INSERT INTO FactInkoop (
            PurchaseOrderID, PurchaseOrderDetailID, VendorID, EmployeeID, ProductID,
            OrderDate, ShipDate, OrderQty, UnitPrice, LineTotal, ReceivedQty,
            RejectedQty, StockedQty, TotalDue, TaxAmt, SubTotal 
        ) VALUES (
            %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
        )
    """

    values = (
        purchase_order_id,
        purchase_order_detail_id,
        vendor_id,
        employee_id,
        product_id,
        order_date,
        ship_date,
        order_qty,
        unit_price,
        line_total,
        received_qty,
        rejected_qty,
        stocked_qty,
        total_due,
        tax_amt,
        sub_total,
    )

    try:
        warehouse_cursor.execute(query, values)
        print(f"[INKOOP] Successfully loaded {purchase_order_id}:{purchase_order_detail_id} into warehouse")
    except mysql.connector.Error as err:
        print(values)
        print(f"Failed to load customer into warehouse: {err}")

    warehouse_conn.commit()


rows = pd.read_sql("SELECT * FROM PurchaseOrderDetail", staging_area_conn)

index = 0
for purchase_order_detail_data in rows.itertuples():
    if index <= 200:
        index = index + 1
        continue

    purchase_order_header_data = pd.read_sql(
        f"SELECT * FROM PurchaseOrderHeader WHERE PurchaseOrderID = '{purchase_order_detail_data.PurchaseOrderID}'",
        staging_area_conn,
    ).iloc[0]

    purchase_order_id = int(purchase_order_header_data.PurchaseOrderID)
    purchase_order_detail_id = int(purchase_order_detail_data.PurchaseOrderDetailID)
    vendor_id = int(purchase_order_header_data.VendorID)
    employee_id = int(purchase_order_header_data.EmployeeID)
    product_id = int(purchase_order_detail_data.ProductID)
    order_date = purchase_order_header_data.OrderDate
    ship_date = purchase_order_header_data.ShipDate
    order_qty = purchase_order_detail_data.OrderQty
    unit_price = purchase_order_detail_data.UnitPrice
    line_total = purchase_order_detail_data.LineTotal
    received_qty = purchase_order_detail_data.ReceivedQty
    rejected_qty = purchase_order_detail_data.RejectedQty
    stocked_qty = purchase_order_detail_data.StockedQty
    total_due = purchase_order_header_data.TotalDue
    tax_amt = purchase_order_header_data.TaxAmt
    sub_total = purchase_order_header_data.SubTotal

    create_inkoop_row(
        purchase_order_id,
        purchase_order_detail_id,
        vendor_id,
        employee_id,
        product_id,
        order_date,
        ship_date,
        order_qty,
        unit_price,
        line_total,
        received_qty,
        rejected_qty,
        stocked_qty,
        total_due,
        tax_amt,
        sub_total,
    )

[INKOOP] Successfully loaded 88:202 into warehouse
[INKOOP] Successfully loaded 88:203 into warehouse
[INKOOP] Successfully loaded 89:204 into warehouse
[INKOOP] Successfully loaded 89:205 into warehouse
[INKOOP] Successfully loaded 89:206 into warehouse
[INKOOP] Successfully loaded 90:207 into warehouse
[INKOOP] Successfully loaded 90:208 into warehouse
[INKOOP] Successfully loaded 90:209 into warehouse
[INKOOP] Successfully loaded 91:210 into warehouse
[INKOOP] Successfully loaded 92:211 into warehouse
[INKOOP] Successfully loaded 92:212 into warehouse
[INKOOP] Successfully loaded 93:213 into warehouse
[INKOOP] Successfully loaded 94:214 into warehouse
[INKOOP] Successfully loaded 95:215 into warehouse
[INKOOP] Successfully loaded 96:216 into warehouse
[INKOOP] Successfully loaded 96:217 into warehouse
[INKOOP] Successfully loaded 97:218 into warehouse
[INKOOP] Successfully loaded 97:219 into warehouse
[INKOOP] Successfully loaded 97:220 into warehouse
[INKOOP] Successfully loaded 97

### Verkoop

In [None]:
def create_verkoop_row(
    sales_order_id,
    sales_order_detail_id,
    order_date,
    ship_date,
    customer_id,
    employee_id,
    product_id,
    order_qty,
    unit_price,
    line_total,
    sub_total,
    tax_amt,
    total_due,
    status,
):
    def handle_nan(value):
        return None if pd.isnull(value) else value

    sales_order_id = handle_nan(sales_order_id)
    sales_order_detail_id = handle_nan(sales_order_detail_id)
    order_date = handle_nan(order_date)
    ship_date = handle_nan(ship_date)
    customer_id = handle_nan(customer_id)
    employee_id = handle_nan(employee_id)
    product_id = handle_nan(product_id)
    order_qty = handle_nan(order_qty)
    unit_price = handle_nan(unit_price)
    line_total = handle_nan(line_total)
    sub_total = handle_nan(sub_total)
    tax_amt = handle_nan(tax_amt)
    total_due = handle_nan(total_due)
    status = handle_nan(status)

    query = """
        INSERT INTO FactVerkoop (
            SalesOrderID, SalesOrderDetailID, OrderDate, ShipDate, CustomerID, EmployeeID,
            ProductID, OrderQty, UnitPrice, LineTotal, SubTotal, TaxAmt, TotalDue, Status
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    """

    values = (
        sales_order_id,
        sales_order_detail_id,
        order_date,
        ship_date,
        customer_id,
        employee_id,
        product_id,
        order_qty,
        unit_price,
        line_total,
        sub_total,
        tax_amt,
        total_due,
        status,
    )

    try:
        warehouse_cursor.execute(query, values)
        print(f"[VERKOOP] Successfully loaded {sales_order_id}:{sales_order_detail_id} into warehouse")
    except mysql.connector.Error as err:
        print(values)
        print(f"Failed to load verkoop into warehouse: {err}")

    warehouse_conn.commit()


rows = pd.read_sql("SELECT * FROM SalesOrderDetail", staging_area_conn)

index = 0

for sales_order_detail_data in rows.itertuples():
    if index <= 200:
        index = index + 1
        continue

    sales_order_header_data = pd.read_sql(
        f"SELECT * FROM SalesOrderHeader WHERE SalesOrderID = {sales_order_detail_data.SalesOrderID}", staging_area_conn
    ).iloc[0]

    sales_order_detail_id = str(sales_order_detail_data.SalesOrderDetailID)
    sales_order_id = str(sales_order_detail_data.SalesOrderID)
    order_date = sales_order_header_data.OrderDate
    ship_date = sales_order_header_data.ShipDate
    customer_id = str(sales_order_header_data.CustomerID)
    employee_id = str(sales_order_header_data.EmployeeID)
    product_id = str(sales_order_detail_data.ProductID)
    order_qty = int(sales_order_detail_data.OrderQty)
    unit_price = int(sales_order_detail_data.UnitPrice)
    line_total = int(sales_order_detail_data.LineTotal)
    sub_total = int(sales_order_header_data.SubTotal)
    tax_amt = int(sales_order_header_data.TaxAmt)
    total_due = int(sales_order_header_data.TotalDue)
    status = int(sales_order_header_data.Status)

    create_verkoop_row(
        sales_order_detail_id,
        sales_order_id,
        order_date,
        ship_date,
        customer_id,
        employee_id,
        product_id,
        order_qty,
        unit_price,
        line_total,
        sub_total,
        tax_amt,
        total_due,
        status,
    )

('353', '43697', Timestamp('2021-05-31 00:00:00'), Timestamp('2021-06-07 00:00:00'), '21768', '282', '749', 1, 3578, 3578, 3578, 286, 3953, 5)
Failed to load verkoop into warehouse: 1062 (23000): Duplicate entry '353-43697' for key 'FactVerkoop.PRIMARY'
('354', '43698', Timestamp('2021-05-31 00:00:00'), Timestamp('2021-06-07 00:00:00'), '28389', '290', '773', 1, 3399, 3399, 3399, 271, 3756, 5)
Failed to load verkoop into warehouse: 1062 (23000): Duplicate entry '354-43698' for key 'FactVerkoop.PRIMARY'
('355', '43699', Timestamp('2021-05-31 00:00:00'), Timestamp('2021-06-07 00:00:00'), '25863', '280', '773', 1, 3399, 3399, 3399, 271, 3756, 5)
Failed to load verkoop into warehouse: 1062 (23000): Duplicate entry '355-43699' for key 'FactVerkoop.PRIMARY'
('356', '43700', Timestamp('2021-05-31 00:00:00'), Timestamp('2021-06-07 00:00:00'), '14501', '276', '767', 1, 699, 699, 699, 55, 772, 5)
Failed to load verkoop into warehouse: 1062 (23000): Duplicate entry '356-43700' for key 'FactVerkoo

## Add location data

In [None]:
def add_location_data_for_customer(id, city):
    query = "UPDATE DimCustomer SET City = %s WHERE CustomerID = %s"

    try:
        warehouse_cursor.execute(query, (city, id))
        print(f"[CUSTOMER] Adding city to {id} customer")
    except mysql.connector.Error as err:
        print(f"Failed to load verkoop into warehouse: {err}")

    warehouse_conn.commit()


customers = pd.read_sql("SELECT * FROM Customer", staging_area_conn)

for customer in customers.itertuples():
    id = customer.CustomerID

    city = "Onbekend"

    if pd.isnull(customer.City):
        if not pd.isnull(customer.PersonID):
            # Fetch city from Address table if City is NULL in Customer table
            address_df = pd.read_sql(
                f"SELECT City FROM Address WHERE PersonID = '{customer.PersonID}'", staging_area_conn
            )

            # Check if the query returned any rows
            if not address_df.empty and not pd.isnull(address_df.iloc[0].City):
                city = address_df.iloc[0].City
    else:
        city = customer.City

    add_location_data_for_customer(id, city)

[CUSTOMER] Adding city to 1 customer
[CUSTOMER] Adding city to 10 customer
[CUSTOMER] Adding city to 100 customer
[CUSTOMER] Adding city to 101 customer
[CUSTOMER] Adding city to 102 customer
[CUSTOMER] Adding city to 103 customer
[CUSTOMER] Adding city to 104 customer
[CUSTOMER] Adding city to 105 customer
[CUSTOMER] Adding city to 106 customer
[CUSTOMER] Adding city to 107 customer
[CUSTOMER] Adding city to 108 customer
[CUSTOMER] Adding city to 109 customer
[CUSTOMER] Adding city to 11 customer
[CUSTOMER] Adding city to 110 customer
[CUSTOMER] Adding city to 11000 customer
[CUSTOMER] Adding city to 11001 customer
[CUSTOMER] Adding city to 11002 customer
[CUSTOMER] Adding city to 11003 customer
[CUSTOMER] Adding city to 11004 customer
[CUSTOMER] Adding city to 11005 customer
[CUSTOMER] Adding city to 11006 customer
[CUSTOMER] Adding city to 11007 customer
[CUSTOMER] Adding city to 11008 customer
[CUSTOMER] Adding city to 11009 customer
[CUSTOMER] Adding city to 11010 customer
[CUSTOM