In [1]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Read the decision table
decision_df = pd.read_csv('data_prep/DECISION_TABLE.csv')

# Pre-defined lists for random data
titles = ['Herr', 'Frau', 'Dr.', 'Prof.', 'Dipl.-Ing.']
forenames = ['Anna', 'Hans', 'Maria', 'Peter', 'Klaus', 'Sabine', 'Thomas', 'Julia', 'Michael', 'Petra']
surnames = ['Müller', 'Schmidt', 'Schneider', 'Fischer', 'Weber', 'Meyer', 'Wagner', 'Becker', 'Schulz', 'Hoffmann']
streets = ['Hauptstraße', 'Bahnhofstraße', 'Kirchweg', 'Gartenstraße', 'Schulstraße', 'Bergstraße', 'Waldweg', 'Dorfstraße', 'Lindenallee', 'Marktplatz']
streetnos = ['1', '2', '5', '10', '15', '20', '25', '30', '42', '50', '1a', '3b', '12c']

# Get unique EQUNRs
num_rows = len(decision_df)

# Add random columns
decision_df['TITLE'] = np.random.choice(titles, size=num_rows)
decision_df['FORENAME'] = np.random.choice(forenames, size=num_rows)
decision_df['SURNAME'] = np.random.choice(surnames, size=num_rows)
decision_df['STREET'] = np.random.choice(streets, size=num_rows)
decision_df['STREETNO'] = np.random.choice(streetnos, size=num_rows)
decision_df["EMAIL"] = (decision_df["FORENAME"].str.lower() + "." + decision_df["SURNAME"].str.lower() + "@example.com")

decision_df = decision_df[["EQUNR", "TITLE", "FORENAME", "SURNAME", "STREET", "STREETNO", "CITY1", "CITY2", "POST_CODE1", "EMAIL"]].copy()
decision_df = decision_df.drop_duplicates(subset=["EQUNR"]).reset_index(drop=True)
display(decision_df)

# Save as NET_MASTER.csv
decision_df.to_csv('data_prep/NET_MASTER.csv', index=False)

num_empty = int(len(decision_df) * 0.1)
empty_indices = np.random.choice(decision_df.index, size=num_empty, replace=False)
decision_df = decision_df.drop(index=empty_indices)
display(decision_df)

# Save as SALES_MASTER.csv
decision_df.to_csv('data_prep/SALES_MASTER.csv', index=False)

print(f"Created SALES_MASTER.csv with {len(decision_df)} rows")
print(f"Columns: {list(decision_df.columns)}")

Unnamed: 0,EQUNR,TITLE,FORENAME,SURNAME,STREET,STREETNO,CITY1,CITY2,POST_CODE1,EMAIL
0,bRR+ABSkiJkVKPVSTryyZIorgRWaZTeuHCeGvhZ,Prof.,Thomas,Schneider,Bahnhofstraße,25,Plettenberg,Holthausen,58840,thomas.schneider@example.com
1,jtehYPBOVjAWNSRQWUmHbXCcShSlPKLEEsWWQQz,Dipl.-Ing.,Maria,Weber,Dorfstraße,1,Hagen,Hohenlimburg,58119,maria.weber@example.com
2,tmTASBOFNtAivieUVWdwQOUCNSQK+vUWOPWVRwL,Dr.,Sabine,Wagner,Hauptstraße,10,Herdecke,Ende,58313,sabine.wagner@example.com
3,/APtTLQndVKMQZkAWhYayGzoEWtevhbyXdFWLMj,Dipl.-Ing.,Maria,Schulz,Schulstraße,10,Lüdenscheid,Brügge,58515,maria.schulz@example.com
4,xYotSqpoopKtRDluBupACKJHQXZtKWNOTKRmtAO,Dipl.-Ing.,Petra,Schulz,Marktplatz,15,Hagen,Hohenlimburg,58119,petra.schulz@example.com
...,...,...,...,...,...,...,...,...,...,...
21579,cdDnvfdPYRecShxWLsZeRtriXmhstfTWCmXfKdb,Prof.,Peter,Müller,Schulstraße,50,Altena,Altena,58762,peter.müller@example.com
21580,UEdsnFkFZQyGugSkjBSfhWGsjuFyQCVSLocJwkg,Herr,Michael,Meyer,Gartenstraße,10,Halver,Halver,58553,michael.meyer@example.com
21581,KUYSQhqWDCBFRr/yRkC+BTbdXVFKUKZcQQEVIUD,Herr,Sabine,Meyer,Bergstraße,42,Neuenrade,Neuenrade,58809,sabine.meyer@example.com
21582,aAZjASwUYzyEFLdLxRRQkfcRiLTUxTgTqldtX+B,Frau,Thomas,Schulz,Bergstraße,2,Neuenrade,Neuenrade,58809,thomas.schulz@example.com


Unnamed: 0,EQUNR,TITLE,FORENAME,SURNAME,STREET,STREETNO,CITY1,CITY2,POST_CODE1,EMAIL
0,bRR+ABSkiJkVKPVSTryyZIorgRWaZTeuHCeGvhZ,Prof.,Thomas,Schneider,Bahnhofstraße,25,Plettenberg,Holthausen,58840,thomas.schneider@example.com
1,jtehYPBOVjAWNSRQWUmHbXCcShSlPKLEEsWWQQz,Dipl.-Ing.,Maria,Weber,Dorfstraße,1,Hagen,Hohenlimburg,58119,maria.weber@example.com
2,tmTASBOFNtAivieUVWdwQOUCNSQK+vUWOPWVRwL,Dr.,Sabine,Wagner,Hauptstraße,10,Herdecke,Ende,58313,sabine.wagner@example.com
3,/APtTLQndVKMQZkAWhYayGzoEWtevhbyXdFWLMj,Dipl.-Ing.,Maria,Schulz,Schulstraße,10,Lüdenscheid,Brügge,58515,maria.schulz@example.com
4,xYotSqpoopKtRDluBupACKJHQXZtKWNOTKRmtAO,Dipl.-Ing.,Petra,Schulz,Marktplatz,15,Hagen,Hohenlimburg,58119,petra.schulz@example.com
...,...,...,...,...,...,...,...,...,...,...
21579,cdDnvfdPYRecShxWLsZeRtriXmhstfTWCmXfKdb,Prof.,Peter,Müller,Schulstraße,50,Altena,Altena,58762,peter.müller@example.com
21580,UEdsnFkFZQyGugSkjBSfhWGsjuFyQCVSLocJwkg,Herr,Michael,Meyer,Gartenstraße,10,Halver,Halver,58553,michael.meyer@example.com
21581,KUYSQhqWDCBFRr/yRkC+BTbdXVFKUKZcQQEVIUD,Herr,Sabine,Meyer,Bergstraße,42,Neuenrade,Neuenrade,58809,sabine.meyer@example.com
21582,aAZjASwUYzyEFLdLxRRQkfcRiLTUxTgTqldtX+B,Frau,Thomas,Schulz,Bergstraße,2,Neuenrade,Neuenrade,58809,thomas.schulz@example.com


Created SALES_MASTER.csv with 19426 rows
Columns: ['EQUNR', 'TITLE', 'FORENAME', 'SURNAME', 'STREET', 'STREETNO', 'CITY1', 'CITY2', 'POST_CODE1', 'EMAIL']


In [2]:
import pandas as pd
import sqlite3
from pathlib import Path

# Database path
DB_PATH = "database.db"

# Data folder path
data_folder = Path("data_prep")

# Dictionary to store DataFrames
dataframes = {}


# Load all CSV files from data folder
csv_files = list(data_folder.glob("*.csv"))
print(f"Found {len(csv_files)} CSV files in data folder:\n")

for csv_file in csv_files:
    table_name = csv_file.stem.lower()  # Use filename without extension as table name
    print(f"Loading {csv_file.name}...")
    df = pd.read_csv(csv_file).fillna("")  # Read all columns as strings TODO: check if it is okay without str
    if table_name.startswith("decision_table"):
        df["HOUSE_NUM1"] = df["HOUSE_NUM1"].apply(lambda x: "1111" if x == "" else x).astype(int)
        df = df.drop(columns=["ENTSCHEIDUNG", "REGEL"]).copy()
        display(df)
        table_name = "decision_talbe"
    dataframes[table_name] = df
    print(f"  - Shape: {df.shape}")
    print(f"  - Columns: {list(df.columns)[:5]}{'...' if len(df.columns) > 5 else ''}\n")

# Write all DataFrames to SQLite database
print(f"\nWriting tables to {DB_PATH}:")
conn = sqlite3.connect(DB_PATH)

for table_name, df in dataframes.items():
    print(f"  - Writing table '{table_name}'...")
    df.to_sql(table_name, conn, if_exists='replace', index=False)
    
conn.close()

print("\n✓ All tables successfully written to database!")
print(f"Tables created: {list(dataframes.keys())}")

Found 6 CSV files in data folder:

Loading NET_MASTER.csv...
  - Shape: (21584, 10)
  - Columns: ['EQUNR', 'TITLE', 'FORENAME', 'SURNAME', 'STREET']...

Loading SALES_MASTER.csv...
  - Shape: (19426, 10)
  - Columns: ['EQUNR', 'TITLE', 'FORENAME', 'SURNAME', 'STREET']...

Loading DECISION_TABLE.csv...


Unnamed: 0,HAUS,ANLAGE,ABLEINH,TOUR,ME_MA_ID,CITY1,CITY2,HOUSE_NUM1,POST_CODE1,EQUNR
0,VYzgXZCzMhdRNbVnp/zwCVWVSbiWCqNKferaQXw,GTnLsZsXvxrFnBajSVglSPmoddLsQRErrJCfCOt,J09D6241,J09D,HsWhZgTTSsWsEVUgYcUQDfJcFPXeXpdpVYTyvMu,Plettenberg,Holthausen,69,58840,bRR+ABSkiJkVKPVSTryyZIorgRWaZTeuHCeGvhZ
1,FBFcVZgYFsOSZUTOR/oFkmXrWWrMQRrjwpofDbz,MqqQPVAUQcZiRsnvIfYKooVaYSgpZWJUzwSqkcP,J09D3013,J09D_J,jHVvrpRzxFUMmoQiujBVpygCFbX/hagZKKwsHSO,Hagen,Hohenlimburg,33,58119,jtehYPBOVjAWNSRQWUmHbXCcShSlPKLEEsWWQQz
2,RATSVd+nFTBSpDJwLNcsWGbvMYNWyIWUvkKfulR,XjDLutwXLvbKYVYeAPAhHGZeAHWexGcoIOARjaa,J09D7024,J09D,aTshMMoXzZpLeUwkaDuUXZXPQtSNuwiZYSBfDYB,Herdecke,Ende,55,58313,tmTASBOFNtAivieUVWdwQOUCNSQK+vUWOPWVRwL
3,hBeavUvnpyDUoYQcQpYVEdaLqQAmXhKxCdRSJNB,nLXXxIjULqXVscYpNWHigTOnBVHYZWPYCYXSiQX,J09D4011,J09D_L,b+PYWiZeRZyVvR+QXA+ZnF/ZeTYfuT+DLVTbxtg,Lüdenscheid,Brügge,5,58515,/APtTLQndVKMQZkAWhYayGzoEWtevhbyXdFWLMj
4,yesZtLfQSxsLKzuPsYwIwYsAZQQTv+TiUWPaSsM,sXsVZQpSkyXKWLCEMxefRSRCRt/axWsYNHGwqMz,J09D3012,J09D_J,nMRPrwngUJS/qQiYdokXUe/sUIDlSorcVSXzpMs,Hagen,Hohenlimburg,11,58119,xYotSqpoopKtRDluBupACKJHQXZtKWNOTKRmtAO
...,...,...,...,...,...,...,...,...,...,...
21579,LtYZUXcXsXZPmSqS+OkvLd+KvgtUWUFeQVsiSbW,UWXKmZUzFRYzAQbrVZGeqhSiZujEWCeYTeAEsvc,J09A5012,J09A,QtXVsAsZOyLlsasTQhNWtXdqKsUMng/yBkdbQPU,Altena,Altena,14,58762,cdDnvfdPYRecShxWLsZeRtriXmhstfTWCmXfKdb
21580,dcJSPhTJCKTXAUVlEZlpCQQUkvKuRDhdNJWDgXV,blhCLCltmGFn+HVeVWRLTBITLxLDkJyFRFbDCll,J09A5405,J09A,BWXXUUJTUPwsNhdNsWziREkU+IMoXEChIYKTzMz,Halver,Halver,41,58553,UEdsnFkFZQyGugSkjBSfhWGsjuFyQCVSLocJwkg
21581,uRiFAqHcJSCXTXQqIZQ+AcQ/HuwVcDeVfFjdqKw,MILsWZEXatPcRX+kMZHcV/yZBvGiejirEVKYQnk,J09A6102,J09A,xgDqiWjfGEEgVbiVRZUDVR+nTVJbzEUhFaplWTW,Neuenrade,Neuenrade,26,58809,KUYSQhqWDCBFRr/yRkC+BTbdXVFKUKZcQQEVIUD
21582,SlnSpXjxcnpTWQVCHUDiGjZCBWbW/XxRh/AOXLw,GEQHvThCoDlLDIiUTssEsBRVKyWsFyBhQNMwqLL,J09A6105,J09A,YVdQyTzYzCGQMUToNbcLqpZWGfSrwaFxGhFUyVT,Neuenrade,Neuenrade,8,58809,aAZjASwUYzyEFLdLxRRQkfcRiLTUxTgTqldtX+B


  - Shape: (21584, 10)
  - Columns: ['HAUS', 'ANLAGE', 'ABLEINH', 'TOUR', 'ME_MA_ID']...

Loading EABL.csv...
  - Shape: (93925, 11)
  - Columns: ['Meter Reading (MR) Doc. No.', 'EQUNR', 'Geplante Ableseart', 'MR type', 'MR TYPE TEXT']...

Loading EABLG.csv...
  - Shape: (93926, 7)
  - Columns: ['Meter Reading (MR) Doc. No.', 'Installation', 'Meter Reading reason', 'MR Reason - Text', 'Scheduled MR Date']...

Loading EANL.csv...
  - Shape: (21584, 14)
  - Columns: ['Installation', 'Installation type', 'Record created on', 'Object changed on', 'SPARTE_TEXT']...


Writing tables to database.db:
  - Writing table 'net_master'...
  - Writing table 'sales_master'...
  - Writing table 'decision_talbe'...
  - Writing table 'eabl'...
  - Writing table 'eablg'...
  - Writing table 'eanl'...

✓ All tables successfully written to database!
Tables created: ['net_master', 'sales_master', 'decision_talbe', 'eabl', 'eablg', 'eanl']


In [3]:
# import sqlite3

# DB_PATH = "database.db"

# def write_process_rule(process_rule_id: str, process_rule: str) -> dict:
#     """
#     Write or update a process rule in the process_rules table.
    
#     Args:
#         process_rule_id: The unique identifier for the process
#         process_rule: The process rule text/description
    
#     Returns:
#         Dictionary with success status and message
#     """
#     try:
#         conn = sqlite3.connect(DB_PATH)
#         cursor = conn.cursor()
        
#         # Drop and recreate the process_rules table
#         cursor.execute("DROP TABLE IF EXISTS process_rules")
#         cursor.execute("""
#             CREATE TABLE process_rules (
#                 process_rule_id TEXT PRIMARY KEY,
#                 process_rule TEXT NOT NULL
#             )
#         """)
        
#         # Insert or replace the process rule
#         cursor.execute("""
#             INSERT OR REPLACE INTO process_rules (process_rule_id, process_rule)
#             VALUES (?, ?)
#         """, (process_rule_id, process_rule))
        
#         conn.commit()
#         conn.close()
        
#         return {
#             "success": True,
#             "process_rule_id": process_rule_id,
#             "message": f"Successfully written process rule for '{process_rule_id}'"
#         }
        
#     except Exception as e:
#         return {
#             "success": False,
#             "error": str(e),
#             "message": "Failed to write process rule"
#         }

# # Test the function
# process_rule_id = "J09A"
# prompt = """1. Make a copy of decision_table with TOURS starting with J09A named J09A_temp
# 2. Update the table with an additional empty column ENTSCHEIDUNG
# 3. Apply the following rules to the joined table:
#     1. Count the number of EQUNR for every HAUS and set ENTSCHEIDUNG to "KSA" if there are less than 3 EQUNR. ELSE set ENTSCHEIDUNG to "EVU"
#     2. SET all houses with HOUSE_NUM1 of 0 to "EVU"
#     3. SET all houses with HOUSE_NUM1 500 - 599 to "EVU"
#     4. SET all houses with HOUSE_NUM1 900 - 999 to "EVU"
#     5. SET all houses with address area Wiblingwerde or Breckerfeld to "EVU"
# 4. JOIN this table with the EABLG table using INSTALLATION.
# 5. JOIN this table with the NET_MASTER table and the SALES_MASTER table using EQUNR.
# 6. Compare sales data and net data for these IDs:
#     If sales data is there, take sales data and empty net data.
#     Otherwise stay with net data and empty sales data.
# 5. Export the final data from the table as CSV file named J09A including all columns. 
# 6. Send them to the EVU team to meter.readings@evu.com
# 7. Delete the temporary table after exporting the data.
# """
# result = write_process_rule(process_rule_id, prompt)
# print(result)

In [4]:
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()

# Get all table names
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

print("Database Schema:\n" + "="*80 + "\n")

for table in tables:
    table_name = table[0]
    print(f"Table: {table_name}")
    print("-" * 80)
    
    # Get table info (column names and types)
    cursor.execute(f"PRAGMA table_info({table_name});")
    columns = cursor.fetchall()
    
    for col in columns:
        col_id, col_name, col_type, not_null, default_val, is_pk = col
        pk_marker = " (PRIMARY KEY)" if is_pk else ""
        not_null_marker = " NOT NULL" if not_null else ""
        print(f"  {col_name}: {col_type}{not_null_marker}{pk_marker}")
    
    print("\n")

conn.close()

Database Schema:

Table: net_master
--------------------------------------------------------------------------------
  EQUNR: TEXT
  TITLE: TEXT
  FORENAME: TEXT
  SURNAME: TEXT
  STREET: TEXT
  STREETNO: TEXT
  CITY1: TEXT
  CITY2: TEXT
  POST_CODE1: INTEGER
  EMAIL: TEXT


Table: sales_master
--------------------------------------------------------------------------------
  EQUNR: TEXT
  TITLE: TEXT
  FORENAME: TEXT
  SURNAME: TEXT
  STREET: TEXT
  STREETNO: TEXT
  CITY1: TEXT
  CITY2: TEXT
  POST_CODE1: INTEGER
  EMAIL: TEXT


Table: decision_talbe
--------------------------------------------------------------------------------
  HAUS: TEXT
  ANLAGE: TEXT
  ABLEINH: TEXT
  TOUR: TEXT
  ME_MA_ID: TEXT
  CITY1: TEXT
  CITY2: TEXT
  HOUSE_NUM1: INTEGER
  POST_CODE1: INTEGER
  EQUNR: TEXT


Table: eabl
--------------------------------------------------------------------------------
  Meter Reading (MR) Doc. No.: TEXT
  EQUNR: TEXT
  Geplante Ableseart: INTEGER
  MR type: TEXT
  MR TYPE 