### Create custom dataset

In [1]:
import pandas as pd

notifications_df = pd.read_csv('raw-data.txt', quotechar='"', escapechar='\\')

# app_labels = ['Gojek', 'GoPay', 'Grab', 'Jenius', 'Amazon Shopping']
# filtered_df = notifications_df[notifications_df['APP LABEL'].isin(app_labels)]
# filtered_df.to_csv('custom_notifications.csv', index=False)
# print(filtered_df.head())

unwanted_labels = ['Stay Focused', 'Android System']
filtered_df = notifications_df[~notifications_df['APP LABEL'].isin(unwanted_labels)]
filtered_df.to_csv('custom_notifications.csv', index=False)

app_counts = filtered_df['APP LABEL'].value_counts()
print("\nCount of notifications by app label:")
print(app_counts)

# # To see if any of the requested labels are missing from the data
# missing_labels = set(app_labels) - set(app_counts.index)
# if missing_labels:
#     print("\nRequested labels not found in the data:", missing_labels)


Count of notifications by app label:
APP LABEL
Tokopedia                                                                                                              6777
FT                                                                                                                     3896
Messages                                                                                                               3672
MSN                                                                                                                    2667
Duolingo                                                                                                               2467
                                                                                                                       ... 
by.U                                                                                                                      1
My HKG                                                                              

### Notification parser

In [2]:
from transaction_parser import process_notification_data, test_raw_csv_input, update_dictionaries_interactively, manage_regex_patterns_interactively

### Testing on custom dataset

In [3]:
try:
    # Load the dataset
    print("Loading notifications data...")
    notifications_df = pd.read_csv('custom_notifications.csv')
    
    print(f"Loaded {len(notifications_df)} notifications")
    
    # Process the data
    print("Processing notifications...")
    results_df = process_notification_data(notifications_df)
    
    # Remove entries with "unknown" transaction type
    valid_results_df = results_df[results_df['transaction_type'] != 'unknown']
    
    # Count transactions by type
    transaction_counts = valid_results_df['transaction_type'].value_counts()
    
    # Print transaction counts
    print("\nTransaction Type Summary:")
    print(f"Income: {transaction_counts.get('income', 0)}")
    print(f"Expense: {transaction_counts.get('expense', 0)}")
    print(f"Transfer: {transaction_counts.get('transfer', 0)}")
    print(f"Total valid transactions: {len(valid_results_df)}")
    
    if len(results_df) - len(valid_results_df) > 0:
        print(f"Removed {len(results_df) - len(valid_results_df)} unknown transaction entries")
    
    # Category distribution
    category_counts = valid_results_df['category'].value_counts()
    print("\nCategory Distribution:")
    for category, count in category_counts.items():
        print(f"{category}: {count}")
    
    # Calculate financial summary
    if 'amount' in valid_results_df.columns:
        income_amount = valid_results_df[valid_results_df['transaction_type'] == 'income']['amount'].sum()
        expense_amount = valid_results_df[valid_results_df['transaction_type'] == 'expense']['amount'].sum()
        
        print("\nFinancial Summary:")
        print(f"Total Income: {income_amount:,.2f}")
        print(f"Total Expenses: {expense_amount:,.2f}")
        print(f"Net Balance: {income_amount - expense_amount:,.2f}")
    
    # Save to CSV
    valid_results_df.to_csv('processed_transactions.csv', index=False)
    
    print(f"\nSuccessfully processed and saved {len(valid_results_df)} transactions.")
    
except Exception as e:
    print(f"Error processing data: {e}")

Loading notifications data...
Loaded 42369 notifications
Processing notifications...
Processed 42129 transactions, skipped 240 blacklisted apps

Transaction Type Summary:
Income: 4127
Expense: 5479
Transfer: 476
Total valid transactions: 10082
Removed 32047 unknown transaction entries

Category Distribution:
other: 5284
savings: 2611
shopping: 508
housing: 424
financial: 414
cashback: 196
food: 167
entertainment: 152
lifestyle: 139
transport: 137
health: 19
healthcare: 19
career: 7
fitness: 5

Financial Summary:
Total Income: 2,080,468,368.66
Total Expenses: 187,459,719.54
Net Balance: 1,893,008,649.12

Successfully processed and saved 10082 transactions.


### Manual testing

In [4]:
raw_input = """5909,com.gojek.app,Gojek,Pembayaran ke TOKOPEDIA berhasil.,,Dana sebesar Rp138.000 telah dipotong dari GoPay kamu.,2023-07-12 22:03:22"""
raw_results = test_raw_csv_input(raw_input)

print("\nResults from Raw CSV Input:")
for i, result in enumerate(raw_results, 1):
    print(f"\nTransaction {i}:")
    for key, value in result.items():
        
        print(f"  {key}: {value}")

Processed 1 transactions, skipped 0 blacklisted apps

Results from Raw CSV Input:

Transaction 1:
  id: 5909
  timestamp: 2023-07-12 22:03:22
  transaction_type: expense
  amount: 138.0
  account_number: None
  category: savings


### Updating dictionary

In [5]:
update_dictionaries_interactively()


DICTIONARY MANAGER
Update transaction categorization dictionaries or apply personas

Current Statistics:
  categories: 7 subcategories, 94 keywords
  merchants: 9 subcategories, 133 keywords
  transaction_types: 3 subcategories, 61 keywords
  blacklist: 1 apps

DICTIONARY MANAGER MENU
1. Select & Apply Persona (Replace Categories)
2. Update Categories
3. Update Merchants
4. Update Transaction Types
5. Update Blacklisted Apps
6. Show Statistics
7. Exit



Exiting dictionary manager.


In [6]:
manage_regex_patterns_interactively()


REGEX PATTERN MANAGER
Manage regex patterns for transaction parsing

PATTERN MANAGER MENU
1. Manage Amount Patterns
2. Manage Account Patterns
3. View All Patterns
4. Test Patterns
5. Reload Patterns from File
6. Exit

Exiting pattern manager.
