# Source Database Model Opdracht

Operationele Databases:

In [1]:
import sqlite3
import pandas as pd
import pyodbc

go_sales_conn = sqlite3.connect("../../../data/raw/go_sales_train.sqlite")
go_crm_conn = sqlite3.connect("../../../data/raw/go_crm_train.sqlite")
go_staff_conn = sqlite3.connect("../../../data/raw/go_staff_train.sqlite")

Connectie met SSMS voor SDM:

In [41]:
DB = {"servername": r"localhost,1433", "database": "sdm", "username": "sa", "password": "iDTyjZx7dRL4"}

export_conn = pyodbc.connect(
    f"DRIVER={{ODBC Driver 17 for SQL Server}};"
    f"SERVER={DB['servername']};"
    f"DATABASE={DB['database']};"
    f"UID={DB['username']};"
    f"PWD={DB['password']}"
)

DataFrames maken voor tables:

In [37]:
def create_dataframes_sql(connection, db_type):
    dictionary : dict = {}
    query : str = ""
    key : str = ""

    if (db_type == "sqlite"):
        query = "SELECT name FROM sqlite_master WHERE type='table';"
        key = "name"
    elif (db_type == "ssms"):
        query = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';"
        key = "TABLE_NAME"

    table_names = pd.read_sql(query, connection)

    for table in table_names[key].tolist():
        dictionary[table] = pd.read_sql(f"SELECT * FROM {table}", connection)

    return dictionary

go_sales_tables = create_dataframes_sql(go_sales_conn, "sqlite")
go_crm_tables = create_dataframes_sql(go_crm_conn, "sqlite")
go_staff_tables = create_dataframes_sql(go_staff_conn, "sqlite")

print(list(go_sales_tables.keys()))

['country', 'order_details', 'order_header', 'order_method', 'product', 'product_line', 'product_type', 'retailer_site', 'return_reason', 'returned_item', 'sales_branch', 'sales_staff']


SQLite Dictionaries mergen naar 1 Dictionary:

In [39]:
go_sdm_tables = go_sales_tables | go_crm_tables | go_staff_tables # Alle drie mergen in 1 Dictionary met alle DataFrames

#Forecast en inventory tables toevoegen
inventory = pd.DataFrame(columns=['INVENTORY_YEAR', 'INVENTORY_MONTH', 'PRODUCT_NUMBER', 'INVENTORY_COUNT'])
forecast = pd.DataFrame(columns=['PRODUCT_NUMBER', 'YEAR', 'MONTH', 'EXPECTED_VOLUME'])

go_sdm_tables["inventory_levels"] = inventory
go_sdm_tables["forecast"] = forecast

# DataFrames met ontbrekende/verkeerd genoemde rows updaten:
go_sdm_tables["country"]["LANGUAGE"] = go_sales_tables["country"]["LANGUAGE"]

go_sdm_tables["country"]["CURRENCY_NAME"] = go_sales_tables["country"]["CURRENCY_NAME"]

go_sdm_tables["country"] = go_sdm_tables["country"].rename(columns={'COUNTRY_EN': 'COUNTRY'})

go_sdm_tables["product_line"] = go_sales_tables["product_line"]

try:
    go_sdm_tables["retailer_headquarters"].drop('POSTAL_ZONE', axis=1, inplace=True)
except KeyError:
    print("Removal of Postal Zone in tables has already been complete.")

# Dictionary in goede volgorde zetten (om inserts goed te laten werken):
dict_order = [
    'sales_territory', 
    'country', 
    'order_method', 
    'retailer_site', 
    'sales_branch', 
    'sales_staff', 
    'retailer_contact', 
    'order_header', 
    'product_line', 
    'product_type', 
    'product', 
    'order_details', 
    'return_reason', 
    'returned_item', 
    'course', 
    'satisfaction_type', 
    'satisfcation', 
    'training',
    'age_group',
    'retailer_segment',
    'retailer_headquarters',
    'retailer_type',
    'retailer'
    'sales_demographic',
    'inventory_levels',
    'forecast'
]

go_sdm_tables = {k: go_sdm_tables[k] for k in dict_order if k in go_sdm_tables}

print(list(go_sdm_tables.keys()))

go_sdm_tables['retailer_site']


Removal of Postal Zone in tables has already been complete.
['sales_territory', 'country', 'order_method', 'retailer_site', 'sales_branch', 'sales_staff', 'retailer_contact', 'order_header', 'product_line', 'product_type', 'product', 'order_details', 'return_reason', 'returned_item', 'course', 'satisfaction_type', 'training', 'age_group', 'retailer_segment', 'retailer_headquarters', 'retailer_type', 'inventory_levels', 'forecast']


Unnamed: 0,RETAILER_SITE_CODE,RETAILER_CODE,ADDRESS1,ADDRESS2,CITY,REGION,POSTAL_ZONE,COUNTRY_CODE,ACTIVE_INDICATOR
0,1,89,1117 Franklin Blvd,,Winnipeg,Manitoba,R2C 0M5,4,1
1,2,89,"45, rue Atwater",,Montréal,Québec,H2T 9K8,4,1
2,3,89,328 Hodgson Road,,Fredericton,New Brunswick,E3B 2H2,4,1
3,4,90,"2425, rue Peel",,Montréal,Québec,H4G 3T4,4,1
4,5,90,34 8th Avenue,,Sudbury,Ontario,M5V 1G5,4,1
...,...,...,...,...,...,...,...,...,...
386,439,209,Hüttenstraße 87,,Klosterneuburg,,A-3400,5,1
387,440,209,Hauptstraße 85,,Linz,,A-4015,5,1
388,441,209,Amselweg 8,,Innsbruck,,A-6020,5,1
389,442,209,Hasengasse 38,,Ischgl,,A-6561,5,1


SSMS Databases vullen:

In [42]:
export_cursor = export_conn.cursor()

for table_name, df in go_sdm_tables.items():
    try:
        for index, row in df.iterrows():
            columns = df.columns.tolist()

            values = []
            for col in columns:
                value = row[col]

                if pd.isna(value):
                    values.append("NULL")

                elif isinstance(value, str):
                    values.append(f"'{value.replace("'", "''")}'")

                else:
                    values.append(str(value))

            column_names = ", ".join(columns)
            value_string = ", ".join(values)
            query = f"INSERT INTO {table_name} ({column_names}) VALUES ({value_string})"

            export_cursor.execute(query)
    except pyodbc.Error as e:
        print(f"Error in table: {table_name}")
        print(f"Query: {query}")
        print(f"Error message: {e}")
        print("-" * 80)

export_conn.commit()
export_cursor.close()


Leegmaken van alle tables:

In [33]:
export_cursor = export_conn.cursor()

tables = [
    "forecast",
    "inventory_levels",
    "sales_demographic",
    "retailer",
    "retailer_type",
    "retailer_headquarters",
    "retailer_segment",
    "age_group",
    "training",
    "satisfaction",
    "satisfaction_type",
    "course",
    "returned_item",
    "return_reason",
    "order_details",
    "product",
    "product_type",
    "product_line",
    "order_header",
    "retailer_contact",
    "sales_staff",
    "sales_branch",
    "retailer_site",
    "order_method",
    "country",
    "sales_territory"
]

for table in tables:
    query = f"DELETE FROM {table};"
    try:
        export_cursor.execute(query)
        print(f"Cleared table: {table}")
    except pyodbc.Error as e:
        print(f"Error clearing table {table}: {e}")

export_conn.commit()
export_cursor.close()

Cleared table: forecast
Cleared table: inventory_levels
Cleared table: sales_demographic
Cleared table: retailer
Cleared table: retailer_type
Cleared table: retailer_headquarters
Cleared table: retailer_segment
Cleared table: age_group
Cleared table: training
Cleared table: satisfaction
Cleared table: satisfaction_type
Cleared table: course
Cleared table: returned_item
Cleared table: return_reason
Cleared table: order_details
Cleared table: product
Cleared table: product_type
Cleared table: product_line
Cleared table: order_header
Cleared table: retailer_contact
Cleared table: sales_staff
Cleared table: sales_branch
Cleared table: retailer_site
Cleared table: order_method
Cleared table: country
Cleared table: sales_territory
