In [24]:
import numpy as np
import pandas as pd
from joblib import load
from tensorflow.keras.models import load_model
from datetime import datetime

In [6]:
# Load saved models and preprocessors to predict new data
X_preprocessor = load('model/X_preprocessor.joblib')
y_scaler = load('model/y_scaler.joblib')
model = load_model('model/best_model.keras')

In [98]:
df = pd.read_csv('USGovtRevCollect_20041001_20231227.csv')
electronic_dict = pd.Series(df['Electronic Category ID'].values, index=df['Electronic Category Description']).to_dict()
channel_dict = pd.Series(df['Channel Type ID'].values, index=df['Channel Type Description']).to_dict()
tax_dict = pd.Series(df['Tax Category ID'].values, index=df['Tax Category Description']).to_dict()
base_date = df['Record Date'].min()

print(electronic_dict)
print(channel_dict)
print(tax_dict)

def evaluate(electronic_type, channel_type, tax_type, fiscal_year, fiscal_quarter, record_date):
    data = pd.DataFrame({})
    
    if electronic_type not in electronic_dict: return None
    else: data['Electronic Category ID'] = [electronic_dict[electronic_type]]
        
    if channel_type not in channel_dict: return None
    else: data['Channel Type ID'] = [channel_dict[channel_type]]
        
    if tax_type not in tax_dict: return None
    else: data['Tax Category ID'] = [tax_dict[tax_type]]
    
    data['Fiscal Year'] = [fiscal_year]
    data['Fiscal Quarter Number'] = [fiscal_quarter]
    
    try:
        date_obj = datetime.strptime(record_date, '%Y-%m-%d')
        data['Calendar Quarter Number'] = [(date_obj.month - 1) // 3 + 1]
        data['Calendar Month Number'] = [date_obj.month]
        basedate_obj = datetime.strptime(base_date, '%Y-%m-%d')
        data['Record Date Discretized'] = [(date_obj - basedate_obj).days]
    except ValueError:
        return None

    X = X_preprocessor.transform(data)
    X = np.array(X.todense())
    prediction = model.predict(X)
    prediction = y_scaler.inverse_transform(prediction)
    return prediction

{'Non-Electronic': 4, 'Electronic Settlement': 3, 'Fully Electronic - FS': 2, 'Fully Electronic - All': 1}
{'Mail': 2, 'Over-the-Counter (OTC)': 1, 'Bank': 5, 'Internet': 3, 'Other': 7}
{'Non-Tax': 1, 'IRS Non-Tax': 2, 'IRS Tax': 3}


In [112]:
# electronic_type = {
#     'Fully Electronic - All': 1,
#     'Fully Electronic - FS': 2,
#     'Electronic Settlement': 3,
#     'Non-Electronic': 4,
# }
# channel_type = {
#     'Over-the-Counter (OTC)': 1,
#     'Mail': 2,
#     'Internet': 3,
#     'Bank': 5,
#     'Other': 7,
# }
# tax_type = {
#     'Non-Tax': 1,
#     'IRS Non-Tax': 2,
#     'IRS Tax': 3
# }

result = evaluate(
    electronic_type='Fully Electronic - All',
    channel_type='Internet',
    tax_type='IRS Tax',
    fiscal_year=2025,
    fiscal_quarter=2,
    record_date='2025-2-20'
)
print('\nPredicted Net Collections Amount =', result[0][0])


Predicted Net Collections Amount = 17446742000.0
