# Import der Daten für den Cube

Liest die Exceldatei der Übung *OnlineOrder* in eine SQL Server Datenbank, sodass sie mit den Analysis Services weiter verarbeitet werden kann.
Es muss ein SQL Server verwendet werden, auf dem die multidimensional analysis services laufen.
Der Docker Container von SQL Server hat diesen Dienst nicht!

Quelle: http://griesmayer.com/?menu=Business%20Intelligence&semester=Semsester_6&topic=03_Cube

In [1]:
from pathlib import Path
import sqlalchemy, re
import pandas as pd

# Prompt for IP, username and password.
host = input("Host (IP), Enter für localhost.") or "localhost" if "host" not in locals() else host
username = input("Username, Enter für sa.") or "sa" if "username" not in locals() else username
password = input("Passwort, Enter für SqlServer2019.") or "SqlServer2019" if "password" not in locals() else password
database = "OnlineOrder"
connection_url = sqlalchemy.engine.URL.create("mssql+pyodbc", username=username,
    password=password, host=host, database=database,
    query={ "driver": "ODBC Driver 18 for SQL Server" })
# We cannot connect to sales to create the database (does not exist at this time). We use tempdb.
# Autocommit is necessary for create database and ddl statements.
tempdb_engine = sqlalchemy.create_engine(
    connection_url.set(database="tempdb"), isolation_level="AUTOCOMMIT", 
    connect_args={"TrustServerCertificate": "yes"})
# We drop the database just before connecting, so we set pool_pre_ping=True
engine = sqlalchemy.create_engine(
    connection_url, fast_executemany=True, pool_pre_ping=True,
    connect_args={"TrustServerCertificate": "yes"})


Zuerst löschen wir die Datenbank und erstellen sie neu.
Das ist natürlich nur zum Testen, sonst ist das Löschen der Datenbank nicht ideal...

In [2]:
with tempdb_engine.connect() as conn: 
    try: conn.execute(sqlalchemy.text(f"ALTER DATABASE {database} SET SINGLE_USER WITH ROLLBACK IMMEDIATE"))
    except: pass
    conn.execute(sqlalchemy.text(f"DROP DATABASE IF EXISTS {database}"))
    conn.execute(sqlalchemy.text(f"CREATE DATABASE {database}"))
with engine.connect() as conn:
    conn.execution_options(isolation_level="AUTOCOMMIT")
    conn.execute(sqlalchemy.text("""
        CREATE TABLE City (
            CityID    INTEGER      PRIMARY KEY,
            ZIP       INTEGER      NOT NULL,
            City      VARCHAR(255) NOT NULL,
            District  CHAR(2)      NOT NULL
        )
    """))
    conn.execute(sqlalchemy.text("""
        CREATE TABLE Store (
            StoreID INTEGER      PRIMARY KEY,
            Street  VARCHAR(255) NOT NULL,
            CityID  INTEGER      NOT NULL,
            FOREIGN KEY (CityID) REFERENCES City(CityID)
        )
    """))
    conn.execute(sqlalchemy.text("""
        CREATE TABLE Product (
            ProductID       INTEGER      PRIMARY KEY,
            ProductName     VARCHAR(255) NOT NULL,
            ProductCategory VARCHAR(255) NOT NULL,
            PurchasingPrice DECIMAL(9,4) NOT NULL,
            SellingPrice    DECIMAL(9,4) NOT NULL
        )
    """))
    conn.execute(sqlalchemy.text("""
        CREATE TABLE Customer (
            CustomerID INTEGER      PRIMARY KEY,
            FirstName  VARCHAR(255) NOT NULL,
            Gender     VARCHAR(8)   NOT NULL,
            Membership VARCHAR(64)  NOT NULL,
            CityID     INTEGER      NOT NULL,
            FOREIGN KEY (CityID) REFERENCES City(CityID)
        )
    """))
    conn.execute(sqlalchemy.text("""
        CREATE TABLE State (
            StateID   INTEGER     PRIMARY KEY,
            StateType VARCHAR(64) NOT NULL,
            State     VARCHAR(64) NOT NULL
        )
    """))    
    conn.execute(sqlalchemy.text("""
        CREATE TABLE [Order] (
            OrderID     INTEGER PRIMARY KEY IDENTITY(1,1),
            OrderDate   DATE NOT NULL,
            StoreID     INTEGER NOT NULL,
            ProductID   INTEGER NOT NULL,
            CustomerID  INTEGER NOT NULL,
            RecipientID INTEGER NOT NULL,
            State       INTEGER NOT NULL,
            Pieces      INTEGER NOT NULL,
            PurchasingPrice_Piece DECIMAL(9,4) NOT NULL,
            SellingPrice_Piece    DECIMAL(9,4) NOT NULL,
            PurchasingPrice       DECIMAL(9,4) NOT NULL,
            SellingPrice          DECIMAL(9,4) NOT NULL,
            FOREIGN KEY (StoreID) REFERENCES Store(StoreID),
            FOREIGN KEY (ProductID) REFERENCES Product(ProductID),
            FOREIGN KEY (CustomerID) REFERENCES Customer(CustomerID),
            FOREIGN KEY (RecipientID) REFERENCES Customer(CustomerID),
        )
    """))

Nun wird die Exceldatei *OnlineOrder.xls* gelesen.
Dafür muss das Python paket *xlrd* mit dem Befehl *pip3 install xlrd --upgrade* installiert werden.
Es liest das alte Excelformat (xls) ein.

In [3]:
# pip3 install xlrd --upgrade
with engine.connect() as conn:
    for sheet in ["City", "Store", "Product", "Customer", "State", "Order"]:
        data = pd.read_excel("OnlineOrder.xls", sheet_name=sheet)
        data.to_sql(sheet, conn, if_exists="append", index=False)
        conn.commit()
