#### Importing the necessary libraries

In [1]:
import torch
import pickle
import pandas as pd
from tqdm import tqdm
from sklearn.pipeline import Pipeline
from transformers import pipeline
from sklearn.metrics import accuracy_score, precision_score

#### Loading 5 product models

In [2]:
with open('models/Credit_Reporting_model.pkl', 'rb') as f:
   trained_model_cr= pickle.load(f)

with open('models/Credit_Prepaid_Card_model.pkl', 'rb') as f:
   trained_model_cp= pickle.load(f)

with open('models/Checking_saving_model.pkl', 'rb') as f:
    trained_model_cs=pickle.load(f)

with open('models/loan_model.pkl', 'rb') as f:
   trained_model_l= pickle.load(f)

with open('models/Debt_model.pkl', 'rb') as f:
   trained_model_d= pickle.load(f)

#### Loading 17 issue models

In [3]:
# Path to the models and their corresponding names
issue_model_files = {
    'trained_model_account_operations': 'issue_models/account_operations_and_unauthorized_transaction_issues.pkl',
    'trained_model_collect_debt': 'issue_models/attempts_to_collect_debt_not_owed.pkl',
    'trained_model_closing_account': 'issue_models/closing_an_account.pkl',
    'trained_model_closing_your_account': 'issue_models/closing_your_account.pkl',
    'trained_model_credit_report': 'issue_models/credit_report_and_monitoring_issues.pkl',
    'trained_model_lender': 'issue_models/dealing_with_your_lender_or_servicer.pkl',
    'trained_model_disputes': 'issue_models/disputes_and_misrepresentations.pkl',
    'trained_model_improper_use_report': 'issue_models/improper_use_of_your_report.pkl',
    'trained_model_incorrect_info': 'issue_models/incorrect_information_on_your_report.pkl',
    'trained_model_legal_and_threat': 'issue_models/legal_and_threat_actions.pkl',
    'trained_model_managing_account': 'issue_models/managing_an_account.pkl',
    'trained_model_payment_funds': 'issue_models/payment_and_funds_management.pkl',
    'trained_model_investigation_wrt_issue': 'issue_models/problem_with_a_company\'s_investigation_into_an_existing_issue.pkl',
    'trained_model_investigation_wrt_problem': 'issue_models/problem_with_a_company\'s_investigation_into_an_existing_problem.pkl',
    'trained_model_credit_investigation_wrt_problem': 'issue_models/problem_with_a_credit_reporting_company\'s_investigation_into_an_existing_problem.pkl',
    'trained_model_purchase_shown': 'issue_models/problem_with_a_purchase_shown_on_your_statement.pkl',
    'trained_model_notification_about_debt': 'issue_models/written_notification_about_debt.pkl',
}

issue_models = {}

for model_name, file_path in issue_model_files.items():
    with open(file_path, 'rb') as f:
        issue_models[model_name] = pickle.load(f)

#### LLM to classify the product based on the narrative

In [4]:
device = "mps" if torch.backends.mps.is_available() else "cpu"

# Define the pipeline for classifying product
product_classifier = pipeline("text-classification", model="Mahesh9/distil-bert-fintuned-product-cfpb-complaints",
                              max_length = 512, truncation = True, device = device)

#### Function to choose the appropriate product model to classify the sub-product

In [5]:
# Define a function to select the appropriate subproduct prediction model based on the predicted product
def select_subproduct_model(predicted_product):
    if predicted_product == 'Credit Reporting' :
        return trained_model_cr
    elif predicted_product == 'Credit/Prepaid Card':
        return trained_model_cp
    elif predicted_product == 'Checking or savings account':
        return trained_model_cs
    elif predicted_product == 'Loans / Mortgage':
        return trained_model_l
    elif predicted_product == 'Debt collection':
        return trained_model_d
    else:
        raise ValueError("Invalid predicted product category")

#### LLM to classify the issue based on the narrative

In [6]:
# Define the pipeline for classifying issue
issue_classifier = pipeline("text-classification", model="Mahesh9/distil-bert-fintuned-issues-cfpb-complaints",
                            max_length = 512, truncation = True, device = device)

#### Function to choose the appropriate issue model to classify the sub-issue

In [7]:
# Define a function to select the appropriate subissue prediction model based on the predicted issue
def select_subissue_model(predicted_issue):
    if predicted_issue == "Problem with a company's investigation into an existing problem":
        return issue_models['trained_model_investigation_wrt_problem']
        
    elif predicted_issue == "Problem with a credit reporting company's investigation into an existing problem":
        return issue_models['trained_model_credit_investigation_wrt_problem']

    elif predicted_issue == "Problem with a company's investigation into an existing issue":
        return issue_models['trained_model_investigation_wrt_issue']

    elif predicted_issue == "Problem with a purchase shown on your statement":
        return issue_models['trained_model_purchase_shown']

    elif predicted_issue == "Incorrect information on your report":
        return issue_models['trained_model_incorrect_info']
        
    elif predicted_issue == "Improper use of your report":
        return issue_models['trained_model_improper_use_report']

    elif predicted_issue == "Account Operations and Unauthorized Transaction Issues":
        return issue_models['trained_model_account_operations']
        
    elif predicted_issue == "Payment and Funds Management":
        return issue_models['trained_model_payment_funds']

    elif predicted_issue == "Managing an account":
        return issue_models['trained_model_managing_account']
        
    elif predicted_issue == "Attempts to collect debt not owed":
        return issue_models['trained_model_collect_debt']

    elif predicted_issue == "Written notification about debt":
        return issue_models['trained_model_notification_about_debt']
        
    elif predicted_issue == "Dealing with your lender or servicer":
        return issue_models['trained_model_lender']

    elif predicted_issue == "Disputes and Misrepresentations":
        return issue_models['trained_model_disputes']
        
    elif predicted_issue == "Closing your account":
        return issue_models['trained_model_closing_your_account']

    elif predicted_issue == "Closing an account":
        return issue_models['trained_model_closing_account']
        
    elif predicted_issue == "Credit Report and Monitoring Issues":
        return issue_models['trained_model_credit_report']

    elif predicted_issue == "Legal and Threat Actions":
        return issue_models['trained_model_legal_and_threat']
        
    else:
        raise ValueError("Invalid predicted issue category")

#### Driver code to classify the complaint into various categories

In [8]:
def classify_complaint(narrative):
    # Predict product category
    predicted_product = product_classifier(narrative)[0]['label']
    
    # Load the appropriate subproduct prediction model
    subproduct_model = select_subproduct_model(predicted_product)
    # Predict subproduct category using the selected model
    predicted_subproduct = subproduct_model.predict([narrative])[0]


    
    # Predict the appropriate issue category using the narrative
    predicted_issue = issue_classifier(narrative)[0]['label']
    
    # Load the appropriate subissue prediction model
    subissue_model = select_subissue_model(predicted_issue)
    # Predict subissue category using the selected model
    predicted_subissue = subissue_model.predict([narrative])[0]
    
    return {
        "Product" : predicted_product,
        "Sub-product" : predicted_subproduct,
        "Issue" : predicted_issue,
        "Sub-issue" : predicted_subissue
    }

In [9]:
narrative = """It is absurd that I have consistently made timely payments for this account and have never been
             overdue. I kindly request that you promptly update my account to reflect this accurately."""

classify_complaint(narrative)

{'Product': 'Credit/Prepaid Card',
 'Sub-product': 'General-purpose credit card or charge card',
 'Issue': "Problem with a company's investigation into an existing problem",
 'Sub-issue': 'Was not notified of investigation status or results'}

#### Evaluation on external test set

In [None]:
# Load the test dataset
test_data = pd.read_csv('../data_splits/test-data-split.csv')  

# Initialize lists to store predicted and actual labels
predicted_products = []
predicted_subproducts = []
predicted_issues = []
predicted_subissues = []

actual_products = test_data['Product']
actual_subproducts = test_data['Sub-product']
actual_issues = test_data['Issue']
actual_subissues = test_data['Sub-issue']

# Iterate over each complaint narrative in the test set
for narrative in tqdm(test_data['Consumer complaint narrative']):
    # Predict product and subproduct using the custom_predict function
    prediction = classify_complaint(narrative)
    
    # Append predicted labels to lists
    predicted_products.append(prediction['Product'])
    predicted_subproducts.append(prediction['Sub-product'])
    predicted_issues.append(prediction['Issue'])
    predicted_subissues.append(prediction['Sub-issue'])
    
# Calculate accuracy and precision
accuracy_product = accuracy_score(actual_products, predicted_products)
precision_product = precision_score(actual_products, predicted_products, average='macro',zero_division=1)
accuracy_subproduct = accuracy_score(actual_subproducts, predicted_subproducts)
precision_subproduct = precision_score(actual_subproducts, predicted_subproducts, average='macro',zero_division=1)

accuracy_product = accuracy_score(actual_issues, predicted_issues)
precision_product = precision_score(actual_issues, predicted_issues, average='macro',zero_division=1)
accuracy_subproduct = accuracy_score(actual_subissues, predicted_subissues)
precision_subproduct = precision_score(actual_subissues, predicted_subissues, average='macro',zero_division=1)


# Print the results
print("Product Prediction Accuracy:", accuracy_product)
print("Product Prediction Precision:", precision_product)

print("Subproduct Prediction Accuracy:", accuracy_subproduct)
print("Subproduct Prediction Precision:", precision_subproduct)

print("Issue Prediction Accuracy:", accuracy_issue)
print("Issue Prediction Precision:", precision_issue)

print("Sub-issue Prediction Accuracy:", accuracy_issue)
print("Sub-issue Prediction Precision:", precision_issue)