In [1]:
import pandas as pd
import numpy as np
import pdfplumber
import csv

In [2]:
def convert(pdf_path, csv_path):
    with pdfplumber.open(pdf_path) as pdf, open(csv_path, "w") as csv_file:
        writer = csv.writer(csv_file, lineterminator="\n")
        for page in pdf.pages:
            tables = page.extract_tables()
            for table in tables:
                if table:
                    for row in table:
                        writer.writerow(row)

In [None]:
convert("../statement_pdf.pdf", "../statement.csv")

In [75]:
df = pd.read_csv("statement.csv", header=None)

In [76]:
df.head()

Unnamed: 0,0,1,2,3,4,5
0,DATE,DESCRIPTION,CHEQUE NO,DEBIT,CREDIT,BALANCE
1,03/04/2025,TO ONL UPI/DR/509335780211/THE ULTI/YESB/PAYTM...,,30.00,,18985.78
2,04/04/2025,TO ONL UPI/DR/546087903003/INDIAN I/SBIN/38329...,,17.00,,18968.78
3,04/04/2025,TO ONL UPI/DR/546078415439/INDIAN I/SBIN/38329...,,32.00,,18936.78
4,06/04/2025,TO ONL UPI/DR/546320612982/SAPTARSI/SBIN/SAPTA...,,47.50,,18889.28


In [77]:
df.drop_duplicates(inplace=True)

In [78]:
df.drop(0, axis=0, inplace=True)

In [79]:
df.head()

Unnamed: 0,0,1,2,3,4,5
1,03/04/2025,TO ONL UPI/DR/509335780211/THE ULTI/YESB/PAYTM...,,30.0,,18985.78
2,04/04/2025,TO ONL UPI/DR/546087903003/INDIAN I/SBIN/38329...,,17.0,,18968.78
3,04/04/2025,TO ONL UPI/DR/546078415439/INDIAN I/SBIN/38329...,,32.0,,18936.78
4,06/04/2025,TO ONL UPI/DR/546320612982/SAPTARSI/SBIN/SAPTA...,,47.5,,18889.28
5,07/04/2025,BY ONL UPI/CR/509734628153/KHURSHID/NSPB/AKURS...,,,1.0,18890.28


In [80]:
def categorize(desc: str, debit):
    desc = str(desc).upper()
    if "UPI" in desc:
        return "Expenses:UPI"
    elif "INTEREST" in desc:
        return "Income:Interest"
    elif debit:
        return "Expenses:Misc"
    else:
        return "Income:Misc"

In [81]:
df.drop([2, 5], axis=1, inplace=True)

In [82]:
df.head()

Unnamed: 0,0,1,3,4
1,03/04/2025,TO ONL UPI/DR/509335780211/THE ULTI/YESB/PAYTM...,30.0,
2,04/04/2025,TO ONL UPI/DR/546087903003/INDIAN I/SBIN/38329...,17.0,
3,04/04/2025,TO ONL UPI/DR/546078415439/INDIAN I/SBIN/38329...,32.0,
4,06/04/2025,TO ONL UPI/DR/546320612982/SAPTARSI/SBIN/SAPTA...,47.5,
5,07/04/2025,BY ONL UPI/CR/509734628153/KHURSHID/NSPB/AKURS...,,1.0


In [83]:
df.columns = ["Date", "Description", "Debit", "Credit"]

In [84]:
df.head()

Unnamed: 0,Date,Description,Debit,Credit
1,03/04/2025,TO ONL UPI/DR/509335780211/THE ULTI/YESB/PAYTM...,30.0,
2,04/04/2025,TO ONL UPI/DR/546087903003/INDIAN I/SBIN/38329...,17.0,
3,04/04/2025,TO ONL UPI/DR/546078415439/INDIAN I/SBIN/38329...,32.0,
4,06/04/2025,TO ONL UPI/DR/546320612982/SAPTARSI/SBIN/SAPTA...,47.5,
5,07/04/2025,BY ONL UPI/CR/509734628153/KHURSHID/NSPB/AKURS...,,1.0


In [86]:
df['Account'] = df.apply(lambda x: categorize(x["Description"], x["Debit"]), axis=1)

In [87]:
df.head()

Unnamed: 0,Date,Description,Debit,Credit,Account
1,03/04/2025,TO ONL UPI/DR/509335780211/THE ULTI/YESB/PAYTM...,30.0,,Expenses:UPI
2,04/04/2025,TO ONL UPI/DR/546087903003/INDIAN I/SBIN/38329...,17.0,,Expenses:UPI
3,04/04/2025,TO ONL UPI/DR/546078415439/INDIAN I/SBIN/38329...,32.0,,Expenses:UPI
4,06/04/2025,TO ONL UPI/DR/546320612982/SAPTARSI/SBIN/SAPTA...,47.5,,Expenses:UPI
5,07/04/2025,BY ONL UPI/CR/509734628153/KHURSHID/NSPB/AKURS...,,1.0,Expenses:UPI


In [88]:
df["Debit"] = df['Debit'].fillna(0)
df["Credit"] = df['Credit'].fillna(0)

In [92]:
def currency_to_float(value):
    if isinstance(value, str):
        value = value.replace('₹', '').replace('$', '')  # Remove currency symbols
        value = value.replace(',', '')                  # Remove commas
    return float(value)


In [93]:
df["Debit"] = df["Debit"].apply(currency_to_float)
df["Credit"] = df["Credit"].apply(currency_to_float)

In [94]:
df["Amount"] = df["Credit"] - df["Debit"]

In [95]:
df.head()

Unnamed: 0,Date,Description,Debit,Credit,Account,Amount
1,03/04/2025,TO ONL UPI/DR/509335780211/THE ULTI/YESB/PAYTM...,30.0,0.0,Expenses:UPI,-30.0
2,04/04/2025,TO ONL UPI/DR/546087903003/INDIAN I/SBIN/38329...,17.0,0.0,Expenses:UPI,-17.0
3,04/04/2025,TO ONL UPI/DR/546078415439/INDIAN I/SBIN/38329...,32.0,0.0,Expenses:UPI,-32.0
4,06/04/2025,TO ONL UPI/DR/546320612982/SAPTARSI/SBIN/SAPTA...,47.5,0.0,Expenses:UPI,-47.5
5,07/04/2025,BY ONL UPI/CR/509734628153/KHURSHID/NSPB/AKURS...,0.0,1.0,Expenses:UPI,1.0


In [96]:
df = df[["Date", "Description", "Account", "Amount"]]

In [97]:
df.head()

Unnamed: 0,Date,Description,Account,Amount
1,03/04/2025,TO ONL UPI/DR/509335780211/THE ULTI/YESB/PAYTM...,Expenses:UPI,-30.0
2,04/04/2025,TO ONL UPI/DR/546087903003/INDIAN I/SBIN/38329...,Expenses:UPI,-17.0
3,04/04/2025,TO ONL UPI/DR/546078415439/INDIAN I/SBIN/38329...,Expenses:UPI,-32.0
4,06/04/2025,TO ONL UPI/DR/546320612982/SAPTARSI/SBIN/SAPTA...,Expenses:UPI,-47.5
5,07/04/2025,BY ONL UPI/CR/509734628153/KHURSHID/NSPB/AKURS...,Expenses:UPI,1.0


In [99]:
df.to_csv("statement.csv", index=False)

In [None]:
df[df['Date'] != "TOTAL"]