In [9]:
import pandas as pd
import numpy as np


btradron = pd.read_csv("./.data/BTRADRON.csv", skiprows=16)

btradron["Currency"] = "RON"

btradeur = pd.read_csv("./.data/BTRADEUR.csv", skiprows=16)

btradeur["Currency"] = "EUR"


df = pd.concat([btradron, btradeur], ignore_index=True)

df.head()

Unnamed: 0,Data tranzactie,Data valuta,Descriere,Referinta tranzactiei,Debit,Credit,Sold contabil,Currency
0,2024-08-02,2024-08-02,Plata la POS non-BT cu card VISA;EPOS 30/07/20...,000NVPO242154Boq,-1043.83,,164.3,RON
1,2024-08-01,2024-08-01,Pachet IZI;Nelimitat in reteaua BT;REF: E41IZ5...,E41IZ56241800002,-29.0,,1208.13,RON
2,2024-07-31,2024-07-31,Plata la POS non-BT cu card VISA;EPOS 28/07/20...,000NVPO242134gxY,-36.99,,1237.13,RON
3,2024-07-22,2024-07-22,Plata OP inter - canal electronic;/ROC/./RFB/2...,E41ZEXA2420400GK,-1301.0,,535.52,RON
4,2024-07-22,2024-07-22,Comision plata OP;/ROC/./RFB/20240722/20240722...,E41ZEXA2420400GK,-5.0,,1836.52,RON


In [10]:
df = df.drop(
    ["Referinta tranzactiei", "Credit", "Sold contabil", "Data valuta"], axis=1
)

df.head()

Unnamed: 0,Data tranzactie,Descriere,Debit,Currency
0,2024-08-02,Plata la POS non-BT cu card VISA;EPOS 30/07/20...,-1043.83,RON
1,2024-08-01,Pachet IZI;Nelimitat in reteaua BT;REF: E41IZ5...,-29.0,RON
2,2024-07-31,Plata la POS non-BT cu card VISA;EPOS 28/07/20...,-36.99,RON
3,2024-07-22,Plata OP inter - canal electronic;/ROC/./RFB/2...,-1301.0,RON
4,2024-07-22,Comision plata OP;/ROC/./RFB/20240722/20240722...,-5.0,RON


In [11]:
df["Payment Date"] = df["Descriere"].str.extract(r"(POS\s(\d{2}\/\d{2}\/\d{4}))")[1]
regex_pattern = r"Schimb valutar|LT383250069969855031|Comision"

mask = ~df["Descriere"].str.contains(regex_pattern)


filtered_df = df.loc[mask].copy()


date_formats = ["%d/%m/%Y", "%Y-%m-%d", "%d-%m-%Y"]  # Add all expected formats
for fmt in date_formats:
    filtered_df["Payment Date"] = pd.to_datetime(
        filtered_df["Payment Date"], format=fmt, errors="coerce"
    )


filtered_df["Data tranzactie"] = pd.to_datetime(df["Data tranzactie"], errors="coerce")

# Formatting dates to 'day-month-year'
filtered_df["Payment Date"] = filtered_df["Payment Date"].dt.strftime("%d-%m-%Y")
filtered_df["Data tranzactie"] = filtered_df["Data tranzactie"].dt.strftime("%d-%m-%Y")


filtered_df["Payment Date"] = filtered_df["Payment Date"].fillna(
    filtered_df["Data tranzactie"]
)


filtered_df.head()

Unnamed: 0,Data tranzactie,Descriere,Debit,Currency,Payment Date
0,02-08-2024,Plata la POS non-BT cu card VISA;EPOS 30/07/20...,-1043.83,RON,30-07-2024
1,01-08-2024,Pachet IZI;Nelimitat in reteaua BT;REF: E41IZ5...,-29.0,RON,01-08-2024
2,31-07-2024,Plata la POS non-BT cu card VISA;EPOS 28/07/20...,-36.99,RON,28-07-2024
3,22-07-2024,Plata OP inter - canal electronic;/ROC/./RFB/2...,-1301.0,RON,22-07-2024
5,07-07-2024,Plata la POS non-BT cu card VISA;EPOS 05/07/20...,-49.99,RON,05-07-2024


In [12]:
filtered_df["Merchant"] = filtered_df["Descriere"].str.extract(
    r"TID:\s*(?:[^\s]{2,8}\s+)?(\w+)"
)
filtered_df["Merchant"] = filtered_df["Merchant"].fillna(
    df["Descriere"].str.split(";").str[3]
)

filtered_df.head()

Unnamed: 0,Data tranzactie,Descriere,Debit,Currency,Payment Date,Merchant
0,02-08-2024,Plata la POS non-BT cu card VISA;EPOS 30/07/20...,-1043.83,RON,30-07-2024,PPC
1,01-08-2024,Pachet IZI;Nelimitat in reteaua BT;REF: E41IZ5...,-29.0,RON,01-08-2024,
2,31-07-2024,Plata la POS non-BT cu card VISA;EPOS 28/07/20...,-36.99,RON,28-07-2024,APPLE
3,22-07-2024,Plata OP inter - canal electronic;/ROC/./RFB/2...,-1301.0,RON,22-07-2024,BUGETUL GENERAL CONSOLIDAT
5,07-07-2024,Plata la POS non-BT cu card VISA;EPOS 05/07/20...,-49.99,RON,05-07-2024,APPLE


In [13]:

# Assuming filtered_df is already loaded with your data

# Convert 'Payment Date' to datetime type ensuring all entries are checked and properly interpreted
filtered_df['Payment Date'] = pd.to_datetime(filtered_df['Payment Date'], errors='coerce', dayfirst=True)

# Convert 'Data tranzactie' to datetime type as well
filtered_df['Data tranzactie'] = pd.to_datetime(filtered_df['Data tranzactie'], errors='coerce', dayfirst=True)

# If you need to display dates in 'DD-MM-YYYY' format, convert datetime objects to strings in the desired format
filtered_df['Formatted Payment Date'] = filtered_df['Payment Date'].dt.strftime('%d-%m-%Y')
filtered_df['Formatted Data Tranzactie'] = filtered_df['Data tranzactie'].dt.strftime('%d-%m-%Y')

# Sort the DataFrame by 'Payment Date'
filtered_df = filtered_df.sort_values(by='Payment Date', ascending=True)

# Now, you can drop the old date columns if they are no longer needed, or keep them as is
# filtered_df.drop(columns=['Payment Date', 'Data tranzactie'], inplace=True)

# Optionally, reorder columns to include the formatted date columns
new_column_order = ['Formatted Payment Date', 'Merchant', 'Descriere', 'Debit', 'Currency']
filtered_df = filtered_df[new_column_order]

filtered_df.head()

Unnamed: 0,Formatted Payment Date,Merchant,Descriere,Debit,Currency
5,05-07-2024,APPLE,Plata la POS non-BT cu card VISA;EPOS 05/07/20...,-49.99,RON
49,08-07-2024,LINKEDIN,Plata la POS non-BT cu card MASTERCARD;EPOS 08...,-33.05,EUR
46,10-07-2024,Amazon,Plata la POS non-BT cu card MASTERCARD;EPOS 10...,-12.27,EUR
45,12-07-2024,Amazon,Plata la POS non-BT cu card MASTERCARD;EPOS 12...,-11.07,EUR
42,15-07-2024,NESPRESSO,Plata la POS non-BT cu card MASTERCARD;EPOS 15...,-39.8,EUR


In [14]:
filtered_df = filtered_df.rename(
    columns={
        "Formatted Payment Date": "Payment Date",
        "Merchant": "Merchant",
        "Descriere": "Description",
        "Debit": "Debit",
        "Fee Currency": "Currency",
    }
)

filtered_df['Payment Date'] = pd.to_datetime(filtered_df['Payment Date'], format='%d-%m-%Y', errors='coerce')

filtered_df["Account"]="BT Business"

filtered_df.head()

Unnamed: 0,Payment Date,Merchant,Description,Debit,Currency,Account
5,2024-07-05,APPLE,Plata la POS non-BT cu card VISA;EPOS 05/07/20...,-49.99,RON,BT Business
49,2024-07-08,LINKEDIN,Plata la POS non-BT cu card MASTERCARD;EPOS 08...,-33.05,EUR,BT Business
46,2024-07-10,Amazon,Plata la POS non-BT cu card MASTERCARD;EPOS 10...,-12.27,EUR,BT Business
45,2024-07-12,Amazon,Plata la POS non-BT cu card MASTERCARD;EPOS 12...,-11.07,EUR,BT Business
42,2024-07-15,NESPRESSO,Plata la POS non-BT cu card MASTERCARD;EPOS 15...,-39.8,EUR,BT Business


In [15]:
filtered_df.to_csv(".data/filtered_bt.csv", index=False)