In [1]:
import pandas as pd

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

# Read the CSV file into a DataFrame
df = pd.read_csv("current_accounts.csv")

# Display the first 5 rows
print(df.head().to_markdown(index=False, numalign="left", stralign="left"))

# Print the column names and their data types
print(df.info())

| Supplier                                     | Date       | Expiration   | Voucher Type   | Voucher       | Debit   | Credit   | Balance   | Status    |
|:---------------------------------------------|:-----------|:-------------|:---------------|:--------------|:--------|:---------|:----------|:----------|
| Adt Security Service S.A. (0808 - ADT)       | 01/10/2023 | 10/10/2023   | Fact. A        | 0107-03698827 | nan     | 11742,79 | 11742,79  | PENDIENTE |
| Adt Security Service S.A. (0808 - ADT)       | 01/10/2023 | 10/10/2023   | Fact. A        | 0107-03698992 | nan     | 15640,52 | 27383,31  | PENDIENTE |
| Adt Security Service S.A. (0808 - ADT)       | 01/10/2023 | 10/10/2023   | Fact. A        | 0107-03699332 | nan     | 15449    | 42832,31  | PENDIENTE |
| Agua Y Saneamiento Mendoza Sa (0423 - AYSAM) | 15/08/2023 | 14/09/2023   | Fact. A        | 0003-00595740 | nan     | 5618,69  | 48451     | PARCIAL   |
| Agua Y Saneamiento Mendoza Sa (0423 - AYSAM) | 11/09/2023 | nan     

In [2]:
# Replace the characters in `Debit` and `Credit` columns and convert to numeric
df["Debit"] = (
    df["Debit"]
    .astype(str)
    .str.replace(".", ",", regex=False)
    .str.replace(",", "", regex=False)
)
df["Credit"] = (
    df["Credit"]
    .astype(str)
    .str.replace(".", ",", regex=False)
    .str.replace(",", "", regex=False)
)

df["Debit"] = pd.to_numeric(df["Debit"], errors="coerce")
df["Credit"] = pd.to_numeric(df["Credit"], errors="coerce")

# Replace the characters in `Balance` column and convert to numeric
df["Balance"] = (
    df["Balance"]
    .astype(str)
    .str.replace(".", ",", regex=False)
    .str.replace(",", "", regex=False)
)
df["Balance"] = pd.to_numeric(df["Balance"], errors="coerce")

# Check if `Balance` column is the cumulative sum of `Credit` minus `Debit` for each row
df["Expected_Balance"] = (df["Credit"].fillna(0) - df["Debit"].fillna(0)).cumsum()

# Compare `Balance` and `Expected_Balance`
mismatches = df[df["Balance"] != df["Expected_Balance"]]

if not mismatches.empty:
    print("Rows where Balance is not the cumulative sum of Credit minus Debit:")
    print(
        mismatches[
            [
                "Supplier",
                "Date",
                "Voucher Type",
                "Voucher",
                "Debit",
                "Credit",
                "Balance",
                "Expected_Balance",
            ]
        ].to_markdown(index=False, numalign="left", stralign="left")
    )
else:
    print("All rows have Balance as the cumulative sum of Credit minus Debit")

# Print the unique values in `Status` column
print("\nUnique values in Status column:")
print(df["Status"].unique())

# Calculate and print aggregate statistics for each unique value in `Status` column
print("\nAggregate statistics for each unique value in Status column:")

agg_df = df.groupby("Status")["Balance"].agg(["mean", "max", "min"])
print(agg_df.to_markdown(numalign="left", stralign="left"))

# Calculate and print aggregate statistics for rows where `Status` is null
print("\nAggregate statistics for rows where Status is null:")

agg_df = df[df["Status"].isnull()]["Balance"].agg(["mean", "max", "min"])
print(agg_df.to_markdown(numalign="left", stralign="left"))

Rows where Balance is not the cumulative sum of Credit minus Debit:
| Supplier                                                                             | Date       | Voucher Type      | Voucher          | Debit       | Credit      | Balance    | Expected_Balance   |
|:-------------------------------------------------------------------------------------|:-----------|:------------------|:-----------------|:------------|:------------|:-----------|:-------------------|
| Adt Security Service S.A. (0808 - ADT)                                               | 01/10/2023 | Fact. A           | 0107-03699332    | nan         | 15449       | 4283231    | 2.75378e+06        |
| Agua Y Saneamiento Mendoza Sa (0423 - AYSAM)                                         | 15/08/2023 | Fact. A           | 0003-00595740    | nan         | 561869      | 48451      | 3.31565e+06        |
| Agua Y Saneamiento Mendoza Sa (0423 - AYSAM)                                         | 11/09/2023 | Mercado Pago      