In [None]:
import pandas as pd
from datetime import datetime


def load_csv(filepath, sep=";", encoding="cp850"):
    """Load a CSV file."""
    return pd.read_csv(filepath, sep=sep, encoding=encoding, low_memory=False)

In [None]:
def preprocess_v2ad1056(df):
    """Preprocess v2ad1056 DataFrame."""
    df['AUF_ANLAGE'] = pd.to_datetime(df['AUF_ANLAGE'], format='%Y/%m/%d', errors='coerce')
    df['AUF_ANLAGE'] = df['AUF_ANLAGE'].dt.strftime('%Y-%m-%d')
    df['RECH_NR'] = df['RECH_NR'].astype(str).str.replace('.0', '').str.rjust(12, '0')
    df['NUMMER'] = df['VERWEIS'].str[2:12].astype(str).str.replace('.0', '').str.zfill(10)
    df = df[df['AUF_ANLAGE'] >= '2023-01-01']
    print(f"v2ad1056 data loaded with {len(df)} records.")
    return df

In [None]:
def preprocess_v2ad1156(df):
    """Preprocess v2ad1156 DataFrame."""
    df['RECHNUNG'] = df['RECHNUNG'].astype(str).str.replace('.0', '').str.rjust(12, '0')
    hg = pd.read_csv('http://c1.websale.net/$WS/hg1ht/autoexport/91/export_report-01-aa.csv',usecols=['ProdIndex','Thumbnail'],sep='\t',encoding='latin-1',on_bad_lines='skip')
    jg = pd.read_csv('http://c1.websale.net/$WS/hg1ht/autoexport/143/export-01-aa-JG.csv',usecols=['ProdIndex','Thumbnail'],sep='\t',encoding='latin-1',on_bad_lines='skip')
    data = pd.concat([hg,jg])
    data = data.rename(columns={'ProdIndex':'ART_NR'})
    data['ART_NR'] = data['ART_NR'].str.strip()
    df['ART_NR'] = df['ART_NR'].str.strip()
    data = data.drop_duplicates(subset='ART_NR')
    df = pd.merge(df,data,on='ART_NR',how='left')
    df['Thumbnail'] = df['Thumbnail'].astype(str)
    df.loc[df['PREIS']<0,'MENGE'] *= -1
    print(f"v2ad1156 data loaded with {len(df)} records.")
    return df

In [None]:
def merge_data(v2ad1056, v2ad1156):
    """Merge the two DataFrames."""
    merged_df = pd.merge(v2ad1056, v2ad1156, left_on='RECH_NR', right_on='RECHNUNG', how='inner')
    print(f"merged data loaded with {len(merged_df)} records.")
    
    return merged_df

In [None]:
def filter_and_save(df,df2,land):
    print(f"Start loading {land} data ...")
    """Filter the columns, remove duplicates, and save to Excel."""
    wg = pd.read_excel("/Volumes/MARAL/Data/WG.xlsx")
    df2 = df2.rename(columns={'NUMMER':'ART_NR','WARENGR':'WG'})[['ART_NR','WG']]
    df2['WG'] = df2['WG'].astype(str).str.replace('.0','')
    wg['WG_CODE'] = wg['WG_CODE'].astype(str).str.replace('.0','')
    wg = wg.rename(columns={'WG_CODE':'WG'})
    df2 = df2.merge(wg,on='WG',how='left')

    df2 = df2.drop_duplicates(subset='ART_NR')
    df2 = df2[['ART_NR','WG','WG_NAME']]

    df2['ART_NR'] = df2['ART_NR'].astype(str).str.strip()
    df['ART_NR'] = df['ART_NR'].astype(str).str.strip()
    df = pd.merge(df,df2,on='ART_NR',how='left')
    df['NUMMER'] = df['NUMMER'].astype(str).str.replace('.0','').str.zfill(10)
    df['AUFTRAG_NR'] = df['AUFTRAG_NR'].astype(str).str.replace('.0','').str.zfill(9)
    filtered_data = df[['VERWEIS', 'AUFTRAG_NR', 'HERKUNFT', 'TYP', 'DATUM', 'MEDIACODE', 'NUMMER', "AUF_ANLAGE",
        'RECHNUNG','PROJEKT', 'ART_NR', 'GROESSE', 'FARBE', 'MENGE', 'PREIS', 'MWST', 'WG','WG_NAME', 'EK',
        'BEZEICHNG', 'RETOUREGRD', 'RETOUREART', 'RECH_ART','RABATT','Thumbnail']]
    filtered_data = filtered_data.drop_duplicates()
    filtered_data.to_csv(fr'/Volumes/MARAL/rechnung/rechnung_F0{land}.csv', sep=';',encoding='cp850', index=False)

In [None]:




def user_rechnung():
    """Prefect ETL process."""
    for i in range(1,5):
        # File paths
        v2ad1056_path = f'/Volumes/MARAL/CSV/F0{i}/V2AD1056.csv'
        v2ad1156_path = f'/Volumes/MARAL/CSV/F0{i}/V2AD1156.csv'
        v2ar1001_path = f'/Volumes/MARAL/CSV/F01/V2AR1001.csv'

        # Load data
        v2ad1056 = load_csv(v2ad1056_path)
        v2ad1156 = load_csv(v2ad1156_path)
        v2ar1001 = load_csv(v2ar1001_path)
        

        # Preprocess data
        v2ad1056 = preprocess_v2ad1056(v2ad1056)
        v2ad1156 = preprocess_v2ad1156(v2ad1156)

        # Merge and save data
        merged_data = merge_data(v2ad1056, v2ad1156)
        filter_and_save(merged_data,v2ar1001,i)


if __name__ == "__main__":
    user_rechnung()
