<a href="https://colab.research.google.com/github/sathvik-ujwal/-AI-Powered-Personal-Expense-Categorization-/blob/main/expense_categorization1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install catboost


Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [None]:
import pandas as pd
import numpy as np
import re
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
)
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from google.colab import drive
import os
drive.mount('/content/drive')
save_dir = '/content/drive/MyDrive/expense_categorization/'
os.makedirs(save_dir, exist_ok=True)
import warnings
warnings.filterwarnings('ignore')

Mounted at /content/drive


In [None]:
df = pd.read_csv('complex_transactions_faker1.csv')
print(df.head(10))

   transaction_id  user_id            date_time                city    amount  \
0               1     8936  2024-07-25 04:35:15               Delhi  14861.19   
1               2     3612  2025-03-06 01:47:11              Jaipur  13172.75   
2               3     5515  2024-11-12 03:03:06               Delhi   7134.89   
3               4     2046  2025-04-10 10:58:50  Thiruvananthapuram   1347.28   
4               5     1140  2025-05-01 15:41:25              Mumbai   7551.88   
5               6     5978  2025-05-03 12:12:47           Bengaluru   2469.53   
6               7     8752  2025-02-07 16:07:39              Bhopal   1892.35   
7               8      917  2025-02-23 06:08:03             Chennai  11360.52   
8               9     5156  2025-05-21 15:02:43             Chennai  18886.74   
9              10     9578  2024-09-04 18:44:05           Ahmedabad  15027.29   

  payment_method                  merchant  \
0  Mobile Wallet          Maharaj and Sons   
1  Mobile Wallet

In [None]:
df['category'].unique()

array(['Shopping', 'Food & Dining', 'Transportation', 'Travel',
       'Bills & Utilities', 'Entertainment', 'Healthcare', 'Housing',
       'Personal Care', 'Insurance', 'Education', 'Financial Obligations',
       'Miscellaneous', 'Taxes', 'Charity/Donations', 'Pets', 'Childcare'],
      dtype=object)

In [None]:
category_counts = df['category'].value_counts()
print("Category counts:\n", category_counts)

Category counts:
 category
Shopping                 20000
Food & Dining            18000
Transportation           12000
Bills & Utilities         9000
Travel                    8000
Entertainment             8000
Healthcare                5000
Housing                   5000
Personal Care             4000
Insurance                 3000
Education                 3000
Financial Obligations     3000
Miscellaneous             2000
Taxes                     2000
Charity/Donations         1000
Pets                      1000
Childcare                 1000
Name: count, dtype: int64


In [None]:
def clean_text(text):
    if not isinstance(text, str):
        text = str(text)  # Ensure text is string
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [None]:
df['clean_desc'] = df['description'].apply(clean_text)
labels = df['category'].astype('category')
df['label'] = labels.cat.codes
label_mapping = dict(enumerate(labels.cat.categories))

In [None]:
print(label_mapping)

{0: 'Bills & Utilities', 1: 'Charity/Donations', 2: 'Childcare', 3: 'Education', 4: 'Entertainment', 5: 'Financial Obligations', 6: 'Food & Dining', 7: 'Healthcare', 8: 'Housing', 9: 'Insurance', 10: 'Miscellaneous', 11: 'Personal Care', 12: 'Pets', 13: 'Shopping', 14: 'Taxes', 15: 'Transportation', 16: 'Travel'}


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    df['clean_desc'], df['label'], test_size=0.2, random_state=42, stratify=df['label']
)

In [None]:
tfidf = TfidfVectorizer(ngram_range=(1,2), max_features=10000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

xgb = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='mlogloss')
rf = RandomForestClassifier(n_estimators=100, random_state=42)
catboost = CatBoostClassifier(iterations=100, random_state=42, verbose=0)

xgb.fit(X_train_tfidf, y_train)
rf.fit(X_train_tfidf, y_train)
catboost.fit(X_train_tfidf, y_train)

y_pred_xgb = xgb.predict(X_test_tfidf)
y_pred_rf = rf.predict(X_test_tfidf)
y_pred_catboost = catboost.predict(X_test_tfidf)

KeyboardInterrupt: 

In [None]:


print("=== XGBoost Classification Report ===")
print(classification_report(y_test, y_pred_xgb, target_names=labels.cat.categories))
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)

print("=== Random Forest Classification Report ===")
print(classification_report(y_test, y_pred_rf, target_names=labels.cat.categories))
accuracy_rf = accuracy_score(y_test, y_pred_rf)

print("=== CatBoost Classification Report ===")
print(classification_report(y_test, y_pred_catboost, target_names=labels.cat.categories))
accuracy_catboost = accuracy_score(y_test, y_pred_catboost)

=== XGBoost Classification Report ===
                       precision    recall  f1-score   support

    Bills & Utilities       0.97      0.79      0.87      1800
    Charity/Donations       0.98      0.79      0.87       200
            Childcare       0.97      0.71      0.82       200
            Education       0.97      0.71      0.82       600
        Entertainment       0.99      0.81      0.89      1600
Financial Obligations       1.00      0.79      0.88       600
        Food & Dining       0.63      0.89      0.74      3600
           Healthcare       0.95      0.72      0.82      1000
              Housing       0.98      0.81      0.89      1000
            Insurance       0.99      0.77      0.87       600
        Miscellaneous       0.99      0.70      0.82       400
        Personal Care       0.96      0.74      0.84       800
                 Pets       0.99      0.76      0.86       200
             Shopping       0.70      0.89      0.78      4000
                

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid_xgb = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2]
}
grid_xgb = GridSearchCV(
    XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
    param_grid_xgb,
    cv=3,
    scoring='accuracy',
    verbose=3
)

In [None]:
grid_xgb.fit(X_train_tfidf, y_train)
print("Best parameters for XGBoost:", grid_xgb.best_params_)
print("Best cross-validation accuracy for XGBoost:", grid_xgb.best_score_)

Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV 1/3] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=0.784 total time=  20.0s
[CV 2/3] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=0.783 total time=  21.4s
[CV 3/3] END learning_rate=0.01, max_depth=3, n_estimators=50;, score=0.782 total time=  19.9s
[CV 1/3] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.791 total time=  41.7s
[CV 2/3] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.793 total time=  42.3s
[CV 3/3] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.786 total time=  41.8s
[CV 1/3] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.804 total time= 1.5min
[CV 2/3] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.802 total time= 1.4min
[CV 3/3] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.796 total time= 1.4min
[CV 1/3] END learning_rate=0.01, max_depth=5, n_estimators=50;, score=0.808 to

In [None]:
import joblib
joblib.dump(grid_xgb.best_estimator_, os.path.join(save_dir, 'xgb_best_model.pkl'))

['/content/drive/MyDrive/expense_categorization/xgb_best_model.pkl']

In [None]:
import pickle
with open(os.path.join(save_dir, 'label_mapping.pkl'), 'wb') as f:
    pickle.dump(label_mapping, f)

In [None]:
def predict_category(description, model_name, model_path, vectorizer_path, label_mapping_path):
    # Load the model, vectorizer, and label mapping
    model = joblib.load(model_path)
    tfidf = joblib.load(vectorizer_path)
    with open(label_mapping_path, 'rb') as f:
        label_mapping = pickle.load(f)

    # Clean and transform the description
    clean_desc = clean_text(description)
    desc_tfidf = tfidf.transform([clean_desc])

    # Predict
    predicted_label = model.predict(desc_tfidf)[0]
    category = label_mapping[predicted_label]
    return category


In [None]:
os.environ["WANDB_MODE"] = "disabled"

In [None]:
def fine_tune_transformer(model_name, train_texts, train_labels, test_texts, test_labels):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, num_labels=len(label_mapping)
    )

    # Tokenize
    train_encodings = tokenizer(
        train_texts.tolist(), truncation=True, padding=True, max_length=128
    )
    test_encodings = tokenizer(
        test_texts.tolist(), truncation=True, padding=True, max_length=128
    )

    class ExpenseDataset(torch.utils.data.Dataset):
        def __init__(self, encodings, labels):
            self.encodings = encodings
            self.labels = labels
        def __len__(self):
            return len(self.labels)
        def __getitem__(self, idx):
            item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
            item['labels'] = torch.tensor(self.labels.iloc[idx])
            return item

    train_dataset = ExpenseDataset(train_encodings, train_labels.reset_index(drop=True))
    eval_dataset = ExpenseDataset(test_encodings, test_labels.reset_index(drop=True))

    # TrainingArguments
    args = TrainingArguments(
        output_dir=f'./results-{model_name.split("/")[-1]}',
        num_train_epochs=2,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=32,
        eval_strategy='epoch',  # Updated from evaluation_strategy
        save_strategy='epoch',
        learning_rate=2e-5,
        logging_dir='./logs',
        logging_steps=50,
        load_best_model_at_end=True,
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
    )
    trainer.train()
    metrics = trainer.evaluate()
    print(f"=== {model_name} Fine-Tuning Results ===")
    print(metrics)

    # Predictions
    preds = trainer.predict(eval_dataset)
    y_pred = np.argmax(preds.predictions, axis=1)
    print(f"=== {model_name} Classification Report ===")
    print(classification_report(test_labels, y_pred, target_names=labels.cat.categories))
    accuracy = accuracy_score(test_labels, y_pred)
    print(f"{model_name} Test Accuracy: {accuracy:.4f}")

    # Save model and tokenizer
    model.save_pretrained(os.path.join(save_dir, f'{model_name.split("/")[-1]}_finetuned'))
    tokenizer.save_pretrained(os.path.join(save_dir, f'{model_name.split("/")[-1]}_finetuned'))
    return model, tokenizer, y_pred, accuracy


In [None]:
roberta_model, roberta_tok, y_pred_roberta, accuracy_roberta = fine_tune_transformer(
    'roberta-base', X_train, y_train, X_test, y_test
)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.5791,0.539313
2,0.5557,0.533138


=== roberta-base Fine-Tuning Results ===
{'eval_loss': 0.5331382751464844, 'eval_runtime': 32.5395, 'eval_samples_per_second': 645.369, 'eval_steps_per_second': 20.191, 'epoch': 2.0}
=== roberta-base Classification Report ===
                       precision    recall  f1-score   support

    Bills & Utilities       1.00      0.79      0.88      1800
    Charity/Donations       0.99      0.80      0.89       200
            Childcare       1.00      0.73      0.85       200
            Education       1.00      0.71      0.83       600
        Entertainment       1.00      0.81      0.89      1600
Financial Obligations       1.00      0.80      0.88       600
        Food & Dining       0.87      0.81      0.84      3600
           Healthcare       0.99      0.72      0.83      1000
              Housing       1.00      0.81      0.90      1000
            Insurance       1.00      0.78      0.88       600
        Miscellaneous       1.00      0.79      0.88       400
        Personal 

In [None]:
def fine_tune_finbert(model_name, train_texts, train_labels, test_texts, test_labels):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=len(label_mapping),
        ignore_mismatched_sizes=True  # Fix for FinBERT size mismatch
    )

    # Tokenize
    train_encodings = tokenizer(
        train_texts.tolist(), truncation=True, padding=True, max_length=128
    )
    test_encodings = tokenizer(
        test_texts.tolist(), truncation=True, padding=True, max_length=128
    )

    # Dataset class
    class ExpenseDataset(torch.utils.data.Dataset):
        def __init__(self, encodings, labels):
            self.encodings = encodings
            self.labels = labels
        def __len__(self):
            return len(self.labels)
        def __getitem__(self, idx):
            item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
            item['labels'] = torch.tensor(self.labels.iloc[idx])
            return item

    train_dataset = ExpenseDataset(train_encodings, train_labels.reset_index(drop=True))
    eval_dataset = ExpenseDataset(test_encodings, test_labels.reset_index(drop=True))

    # TrainingArguments
    args = TrainingArguments(
        output_dir=f'./results-{model_name.split("/")[-1]}',
        num_train_epochs=4,  # Set to 4 epochs as per previous request
        per_device_train_batch_size=16,
        per_device_eval_batch_size=32,
        eval_strategy='epoch',
        save_strategy='epoch',
        learning_rate=2e-5,
        logging_dir='./logs',
        logging_steps=50,
        load_best_model_at_end=True,
        metric_for_best_model='eval_loss',
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
    )
    trainer.train()
    metrics = trainer.evaluate()
    print(f"=== {model_name} Fine-Tuning Results ===")
    print(metrics)

    # Predictions
    preds = trainer.predict(eval_dataset)
    y_pred = np.argmax(preds.predictions, axis=1)
    print(f"=== {model_name} Classification Report ===")
    print(classification_report(test_labels, y_pred, target_names=label_mapping.values()))
    accuracy = accuracy_score(test_labels, y_pred)
    print(f"{model_name} Test Accuracy: {accuracy:.4f}")

    # Save model and tokenizer
    model.save_pretrained(os.path.join(save_dir, f'{model_name.split("/")[-1]}_finetuned'))
    tokenizer.save_pretrained(os.path.join(save_dir, f'{model_name.split("/")[-1]}_finetuned'))
    return model, tokenizer, y_pred, accuracy

In [None]:
finbert_model, finbert_tok, y_pred_finbert, accuracy_finbert = fine_tune_finbert(
    'yiyanghkust/finbert-tone', X_train, y_train, X_test, y_test
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at yiyanghkust/finbert-tone and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([17, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([17]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.5728,0.542568
2,0.5459,0.538309
3,0.5181,0.536951


Epoch,Training Loss,Validation Loss
1,0.5728,0.542568
2,0.5459,0.538309
3,0.5181,0.536951
4,0.5244,0.546834


=== yiyanghkust/finbert-tone Fine-Tuning Results ===
{'eval_loss': 0.5369514226913452, 'eval_runtime': 29.0203, 'eval_samples_per_second': 723.631, 'eval_steps_per_second': 22.639, 'epoch': 4.0}
=== yiyanghkust/finbert-tone Classification Report ===
                       precision    recall  f1-score   support

    Bills & Utilities       1.00      0.79      0.88      1800
    Charity/Donations       0.99      0.80      0.88       200
            Childcare       0.99      0.72      0.84       200
            Education       1.00      0.71      0.83       600
        Entertainment       1.00      0.81      0.89      1600
Financial Obligations       1.00      0.80      0.89       600
        Food & Dining       0.86      0.81      0.83      3600
           Healthcare       0.99      0.72      0.84      1000
              Housing       1.00      0.81      0.90      1000
            Insurance       1.00      0.78      0.88       600
        Miscellaneous       1.00      0.78      0.88    

In [None]:
def fine_tune_transformer(model_name, train_texts, train_labels, test_texts, test_labels, label_mapping):
   tokenizer = AutoTokenizer.from_pretrained(model_name)
   model = AutoModelForSequenceClassification.from_pretrained(
       model_name,
       num_labels=len(label_mapping),
       ignore_mismatched_sizes=True  # Handle size mismatches for classification head
   )
   # Tokenize
   train_encodings = tokenizer(
       train_texts.tolist(), truncation=True, padding=True, max_length=128
   )
   test_encodings = tokenizer(
       test_texts.tolist(), truncation=True, padding=True, max_length=128
   )
   # Dataset class
   class ExpenseDataset(torch.utils.data.Dataset):
       def __init__(self, encodings, labels):
           self.encodings = encodings
           self.labels = labels
       def __len__(self):
           return len(self.labels)
       def __getitem__(self, idx):
           item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
           item['labels'] = torch.tensor(self.labels.iloc[idx])
           return item
   train_dataset = ExpenseDataset(train_encodings, train_labels.reset_index(drop=True))
   eval_dataset = ExpenseDataset(test_encodings, test_labels.reset_index(drop=True))
   # TrainingArguments
   args = TrainingArguments(
       output_dir=f'./results-{model_name.split("/")[-1]}',
       num_train_epochs=3,
       per_device_train_batch_size=16,
       per_device_eval_batch_size=32,
       eval_strategy='epoch',
       save_strategy='epoch',
       learning_rate=2e-5,
       logging_dir='./logs',
       logging_steps=50,
       load_best_model_at_end=True,
       metric_for_best_model='eval_loss',
       report_to="none"
   )
   trainer = Trainer(
       model=model,
       args=args,
       train_dataset=train_dataset,
       eval_dataset=eval_dataset,
       tokenizer=tokenizer,
   )
   trainer.train()
   metrics = trainer.evaluate()
   print(f"=== {model_name} Fine-Tuning Results ===")
   print(metrics)
   # Predictions
   preds = trainer.predict(eval_dataset)
   y_pred = np.argmax(preds.predictions, axis=1)
   print(f"=== {model_name} Classification Report ===")
   print(classification_report(test_labels, y_pred, target_names=label_mapping.values()))
   accuracy = accuracy_score(test_labels, y_pred)
   print(f"{model_name} Test Accuracy: {accuracy:.4f}")
   # Save model and tokenizer
   model.save_pretrained(os.path.join(save_dir, f'{model_name.split("/")[-1]}_finetuned'))
   tokenizer.save_pretrained(os.path.join(save_dir, f'{model_name.split("/")[-1]}_finetuned'))
   return model, tokenizer, y_pred, accuracy

models_to_test = [
    'microsoft/deberta-base',
     'google/electra-base-discriminator',
    'albert-base-v2',
    'bert-base-uncased'


]

# Fine-tune all models
results = {}
for model_name in models_to_test:
    print(f"\nFine-tuning {model_name}...")
    model, tokenizer, y_pred, accuracy = fine_tune_transformer(
        model_name, X_train, y_train, X_test, y_test, label_mapping
    )
    results[model_name] = {
        'model': model,
        'tokenizer': tokenizer,
        'y_pred': y_pred,
        'accuracy': accuracy
    }

# Print summary of accuracies
print("\n=== Model Accuracy Comparison ===")
for model_name, result in results.items():
    print(f"{model_name}: Test Accuracy = {result['accuracy']:.4f}")


Fine-tuning microsoft/deberta-base...


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/559M [00:00<?, ?B/s]

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


Epoch,Training Loss,Validation Loss
1,0.5669,0.541965
2,0.547,0.534086
3,0.5157,0.532876


=== microsoft/deberta-base Fine-Tuning Results ===
{'eval_loss': 0.5328761339187622, 'eval_runtime': 40.3883, 'eval_samples_per_second': 519.952, 'eval_steps_per_second': 16.267, 'epoch': 3.0}
=== microsoft/deberta-base Classification Report ===
                       precision    recall  f1-score   support

    Bills & Utilities       1.00      0.79      0.88      1800
    Charity/Donations       0.99      0.80      0.89       200
            Childcare       1.00      0.73      0.84       200
            Education       1.00      0.71      0.83       600
        Entertainment       1.00      0.81      0.89      1600
Financial Obligations       1.00      0.80      0.89       600
        Food & Dining       0.88      0.81      0.84      3600
           Healthcare       0.94      0.73      0.82      1000
              Housing       1.00      0.81      0.90      1000
            Insurance       1.00      0.78      0.88       600
        Miscellaneous       1.00      0.79      0.88       4

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.5668,0.544279
2,0.5441,0.53684
3,0.5174,0.536025


=== google/electra-base-discriminator Fine-Tuning Results ===
{'eval_loss': 0.5360249876976013, 'eval_runtime': 35.3972, 'eval_samples_per_second': 593.267, 'eval_steps_per_second': 18.561, 'epoch': 3.0}
=== google/electra-base-discriminator Classification Report ===
                       precision    recall  f1-score   support

    Bills & Utilities       1.00      0.79      0.88      1800
    Charity/Donations       0.99      0.79      0.88       200
            Childcare       1.00      0.72      0.84       200
            Education       1.00      0.70      0.83       600
        Entertainment       1.00      0.81      0.89      1600
Financial Obligations       1.00      0.80      0.89       600
        Food & Dining       0.88      0.81      0.84      3600
           Healthcare       0.94      0.74      0.82      1000
              Housing       1.00      0.81      0.90      1000
            Insurance       1.00      0.78      0.88       600
        Miscellaneous       1.00      

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.5974,0.547637
2,0.5598,0.538572
3,0.5227,0.536255


=== albert-base-v2 Fine-Tuning Results ===
{'eval_loss': 0.5362552404403687, 'eval_runtime': 37.569, 'eval_samples_per_second': 558.971, 'eval_steps_per_second': 17.488, 'epoch': 3.0}
=== albert-base-v2 Classification Report ===
                       precision    recall  f1-score   support

    Bills & Utilities       1.00      0.79      0.88      1800
    Charity/Donations       0.99      0.80      0.89       200
            Childcare       1.00      0.72      0.84       200
            Education       0.99      0.70      0.82       600
        Entertainment       1.00      0.81      0.89      1600
Financial Obligations       1.00      0.80      0.89       600
        Food & Dining       0.88      0.81      0.84      3600
           Healthcare       0.95      0.73      0.83      1000
              Housing       1.00      0.81      0.90      1000
            Insurance       1.00      0.78      0.88       600
        Miscellaneous       1.00      0.78      0.88       400
        Person

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.5813,0.539919
2,0.5541,0.53666
3,0.5109,0.535694


=== bert-base-uncased Fine-Tuning Results ===
{'eval_loss': 0.5356939435005188, 'eval_runtime': 34.2178, 'eval_samples_per_second': 613.716, 'eval_steps_per_second': 19.201, 'epoch': 3.0}
=== bert-base-uncased Classification Report ===
                       precision    recall  f1-score   support

    Bills & Utilities       1.00      0.79      0.88      1800
    Charity/Donations       0.99      0.80      0.88       200
            Childcare       1.00      0.72      0.84       200
            Education       1.00      0.71      0.83       600
        Entertainment       1.00      0.81      0.89      1600
Financial Obligations       1.00      0.80      0.88       600
        Food & Dining       0.88      0.80      0.84      3600
           Healthcare       0.93      0.74      0.83      1000
              Housing       1.00      0.81      0.90      1000
            Insurance       1.00      0.78      0.88       600
        Miscellaneous       1.00      0.78      0.87       400
       