### Create custom dataset

In [1]:
import pandas as pd

notifications_df = pd.read_csv('raw-data.txt', quotechar='"', escapechar='\\')

app_labels = ['Gojek', 'GoPay', 'Grab', 'Jenius', 'Amazon Shopping']
filtered_df = notifications_df[notifications_df['APP LABEL'].isin(app_labels)]
filtered_df.to_csv('custom_notifications.csv', index=False)
print(filtered_df.head())

app_counts = filtered_df['APP LABEL'].value_counts()
print("\nCount of notifications by app label:")
print(app_counts)

# To see if any of the requested labels are missing from the data
missing_labels = set(app_labels) - set(app_counts.index)
if missing_labels:
    print("\nRequested labels not found in the data:", missing_labels)

      ID            PACKAGE NAME APP LABEL  \
0    437         com.gojek.gopay     GoPay   
1    439  com.grabtaxi.passenger      Grab   
128  942         com.gojek.gopay     GoPay   
129  943           com.gojek.app     Gojek   
130  944         com.gojek.gopay     GoPay   

                                     MESSAGE DATE  \
0          Pembayaran ke TOKOPEDIA berhasil.  NaN   
1            Gercep biar kebagian diskonnya.  NaN   
128         Ka-ching! You got GoPay Coins 💰.  NaN   
129  Payment successfully made to TOKOPEDIA.  NaN   
130  Payment successfully made to TOKOPEDIA.  NaN   

                                              CONTENTS            TIMESTAMP  
0    Dana sebesar Rp1.407 telah dipotong dari GoPay...  2023-07-09 03:13:14  
1                 Diskon s.d. 30% tiap Minggu-Selasa 👉  2023-07-09 03:44:43  
128  Yay, you just earned 997 GoPay Coins! Tap here...  2023-07-09 10:21:29  
129  An amount of Rp997 has been made from your GoPay.  2023-07-09 10:37:59  
130  An amount

### Notification parser

In [2]:
from notification_reader import process_notification_data, test_raw_csv_input, update_dictionaries_interactively

### Testing on custom dataset

In [3]:
try:
    # Load the dataset
    print("Loading notifications data...")
    notifications_df = pd.read_csv('custom_notifications.csv')
    
    print(f"Loaded {len(notifications_df)} notifications")
    
    # Process the data
    print("Processing notifications...")
    results_df = process_notification_data(notifications_df)
    
    # Remove entries with "unknown" transaction type
    valid_results_df = results_df[results_df['transaction_type'] != 'unknown']
    
    # Count transactions by type
    transaction_counts = valid_results_df['transaction_type'].value_counts()
    
    # Print transaction counts
    print("\nTransaction Type Summary:")
    print(f"Income: {transaction_counts.get('income', 0)}")
    print(f"Expense: {transaction_counts.get('expense', 0)}")
    print(f"Transfer: {transaction_counts.get('transfer', 0)}")
    print(f"Top-up: {transaction_counts.get('top_up', 0)}")
    print(f"Total valid transactions: {len(valid_results_df)}")
    
    if len(results_df) - len(valid_results_df) > 0:
        print(f"Removed {len(results_df) - len(valid_results_df)} unknown transaction entries")
    
    # Category distribution
    category_counts = valid_results_df['category'].value_counts()
    print("\nCategory Distribution:")
    for category, count in category_counts.items():
        print(f"{category}: {count}")
    
    # Calculate financial summary
    if 'amount' in valid_results_df.columns:
        income_amount = valid_results_df[valid_results_df['transaction_type'] == 'income']['amount'].sum()
        expense_amount = valid_results_df[valid_results_df['transaction_type'] == 'expense']['amount'].sum()
        
        print("\nFinancial Summary:")
        print(f"Total Income: {income_amount:,.2f}")
        print(f"Total Expenses: {expense_amount:,.2f}")
        print(f"Net Balance: {income_amount - expense_amount:,.2f}")
    
    # Save to CSV
    valid_results_df.to_csv('processed_transactions.csv', index=False)
    
    print(f"\nSuccessfully processed and saved {len(valid_results_df)} transactions.")
    
except Exception as e:
    print(f"Error processing data: {e}")

Loading notifications data...
Loaded 4658 notifications
Processing notifications...
Processed 4418 transactions, skipped 240 blacklisted apps

Transaction Type Summary:
Income: 995
Expense: 1035
Transfer: 54
Top-up: 0
Total valid transactions: 2084
Removed 2334 unknown transaction entries

Category Distribution:
finance: 1524
shopping: 218
cashback: 196
bills: 44
transfer: 36
food: 36
transport: 15
other: 15

Financial Summary:
Total Income: 7,831,232.16
Total Expenses: 39,264,202.87
Net Balance: -31,432,970.71

Successfully processed and saved 2084 transactions.


### Manual testing

In [4]:
raw_input = """950,com.gojek.app,Gojek,Transaction refund.,,You have received Rp101.700 GoPay refund.,2023-07-09 10:47:23"""
raw_results = test_raw_csv_input(raw_input)

print("\nResults from Raw CSV Input:")
for i, result in enumerate(raw_results, 1):
    print(f"\nTransaction {i}:")
    for key, value in result.items():
        
        print(f"  {key}: {value}")

Processed 1 transactions, skipped 0 blacklisted apps

Results from Raw CSV Input:

Transaction 1:
  id: 950
  timestamp: 2023-07-09 10:47:23
  transaction_type: income
  amount: 101700.0
  account: GoPay
  category: finance


### Updating dictionary

In [5]:
update_dictionaries_interactively()


DICTIONARY UPDATER
Update transaction categorization dictionaries or apply personas
Available features: Dictionary Updates, Persona Selection

Current Statistics:
  categories: 9 subcategories, 82 keywords
  merchants: 4 subcategories, 13 keywords
  transaction_types: 3 subcategories, 28 keywords
  blacklist: 1 apps

DICTIONARY MANAGER MENU
1. Select & Apply Persona (Replace Categories)
2. Update Categories
3. Update Merchants
4. Update Transaction Types
5. Update Blacklisted Apps
6. Show Statistics
7. Exit

DICTIONARY STATISTICS

CATEGORIES:
  Subcategories: 9
  Total keywords: 82
  Breakdown:
    food: 12 keywords
    transport: 13 keywords
    shopping: 11 keywords
    entertainment: 8 keywords
    bills: 7 keywords
    ... and 4 more subcategories

MERCHANTS:
  Subcategories: 4
  Total keywords: 13
  Breakdown:
    transport: 5 keywords
    food: 2 keywords
    shopping: 4 keywords
    finance: 2 keywords

TRANSACTION_TYPES:
  Subcategories: 3
  Total keywords: 28
  Breakdown:
   