### Create custom dataset

In [1]:
import pandas as pd

notifications_df = pd.read_csv('raw-data.txt', quotechar='"', escapechar='\\')

# app_labels = ['Gojek', 'GoPay', 'Grab', 'Jenius', 'Amazon Shopping']
# filtered_df = notifications_df[notifications_df['APP LABEL'].isin(app_labels)]
# filtered_df.to_csv('custom_notifications.csv', index=False)
# print(filtered_df.head())

unwanted_labels = ['Stay Focused', 'Android System']
filtered_df = notifications_df[~notifications_df['APP LABEL'].isin(unwanted_labels)]
filtered_df.to_csv('custom_notifications.csv', index=False)

app_counts = filtered_df['APP LABEL'].value_counts()
print("\nCount of notifications by app label:")
print(app_counts)

# # To see if any of the requested labels are missing from the data
# missing_labels = set(app_labels) - set(app_counts.index)
# if missing_labels:
#     print("\nRequested labels not found in the data:", missing_labels)


Count of notifications by app label:
APP LABEL
Tokopedia                                                                                                              6777
FT                                                                                                                     3896
Messages                                                                                                               3672
MSN                                                                                                                    2667
Duolingo                                                                                                               2467
                                                                                                                       ... 
by.U                                                                                                                      1
My HKG                                                                              

### Notification parser

In [2]:
import pandas as pd
from notification_reader import process_notification_data

def main():
    # Define file paths
    notifications_file = 'custom_notifications.csv'
    dictionary_file = 'dictionary.json'
    patterns_file = 'regex_patterns.json'
    
    try:
        # Load the dataset
        print("Loading notifications data...")
        notifications_df = pd.read_csv(notifications_file)
        print(f"Loaded {len(notifications_df)} notifications")
        
        # Process the data with dictionary and patterns files
        print("Processing notifications...")
        results_df = process_notification_data(
            notifications_df, 
            dictionary_file, 
            patterns_file
        )
        
        # Remove entries with "unknown" transaction type
        valid_results_df = results_df[results_df['transaction_type'] != 'unknown']
        
        print(f"Found {len(valid_results_df)} valid transactions out of {len(results_df)} processed")
        
        # Save to CSV
        output_file = 'processed_transactions.csv'
        valid_results_df.to_csv(output_file, index=False)
        
        print(f"\nSuccessfully processed and saved {len(valid_results_df)} transactions to {output_file}")
        
        # Display summary statistics
        if len(valid_results_df) > 0:
            print("\nTransaction Summary:")
            print(f"Transaction Types:")
            print(valid_results_df['transaction_type'].value_counts())
            print(f"\nTop Categories:")
            print(valid_results_df['category'].value_counts().head(10))
        
    except FileNotFoundError as e:
        print(f"File not found: {e}")
        print("Please ensure the following files exist:")
        print(f"- {notifications_file}")
        print(f"- {dictionary_file}")
        print(f"- {patterns_file}")
    except KeyError as e:
        print(f"Missing required data in dictionary file: {e}")
    except ValueError as e:
        print(f"Invalid file format: {e}")
    except Exception as e:
        print(f"Error processing data: {e}")

if __name__ == "__main__":
    main()

Loading notifications data...
Loaded 42369 notifications
Processing notifications...
Processed 42369 transactions, skipped 0 blacklisted apps
Found 10161 valid transactions out of 42369 processed

Successfully processed and saved 10161 transactions to processed_transactions.csv

Transaction Summary:
Transaction Types:
transaction_type
expense     5546
income      4134
transfer     481
Name: count, dtype: int64

Top Categories:
category
other            3178
savings          2960
shopping         1466
income            956
housing           463
transport         356
entertainment     255
cashback          196
education         176
food              107
Name: count, dtype: int64
