<a href="https://colab.research.google.com/github/savinthie/Final_Year_Project_IDP_2024-2025/blob/main/Final_IDP_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Importing of necessary libraries**

In [14]:
from tensorflow.keras.layers import Layer
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, Flatten, Dense, ReLU, Concatenate, MaxPooling1D,Dropout
from tensorflow.keras.optimizers import Adam
import joblib
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
import xgboost as xgb
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tqdm import tqdm
from google.colab import drive

**Data collection stage**

In [15]:
drive.mount('/content/drive')
original_household_dataset = pd.read_csv('/content/drive/MyDrive/FYP 2024 25/USDataset.csv', header=1)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Data preprocessing stage**

In [16]:
original_household_dataset = original_household_dataset.fillna(0)
original_household_dataset.columns = [col.lower().replace(' ', '').replace('.', '') for col in original_household_dataset.columns]
cols_to_keep = ['stateabv', 'county', 'family', 'housing', 'food', 'transportation',
                'healthcare', 'othernecessities', 'childcare', 'taxes', 'total',
                'median_family_income', 'num_counties_in_st']
household_dataset = original_household_dataset[cols_to_keep].copy()

household_dataset['median_family_income'] = household_dataset['median_family_income'].replace(',', '', regex=True).astype(float)

**Feature engineering stage**

In [17]:
household_dataset['median_family_income'] = household_dataset['median_family_income'].map(lambda x: x/12)#getting the median family income on monthly basis
household_dataset['n_parents'] = household_dataset['family'].str.slice(0, 1).astype(int) # new feature
household_dataset['n_children'] = household_dataset['family'].str.slice(2, 3).astype(int) # new feature
household_dataset['n_members'] = household_dataset['n_parents'] + household_dataset['n_children'] # new feature
household_dataset['financial_stability'] = household_dataset['median_family_income'] / household_dataset['total'] # new feature
household_dataset["per_member_cost"] = household_dataset["total"] / household_dataset["n_members"] # new feature
household_dataset["child_expense_cost"] = household_dataset["per_member_cost"]*household_dataset["n_children"] # new feature
household_dataset["parent_expense_cost"] = household_dataset["per_member_cost"]*household_dataset["n_parents"] # new feature
household_dataset["other_expense_cost"] = household_dataset["total"] - (household_dataset["child_expense_cost"]+household_dataset["parent_expense_cost"]) # new feature
# fixing the 0 child issue
household_dataset["zero_childcare_cost"] = household_dataset['n_children'].map(lambda x: 0 if x < 1 else 1) # new feature

**Feature scaling (An important data handling technique in the Neural networks)**

In [18]:
# Splitting the data
#inputs
X = household_dataset[['total', 'median_family_income', 'num_counties_in_st', 'n_children', 'n_parents', 'n_members']+['per_member_cost','child_expense_cost','parent_expense_cost','other_expense_cost','zero_childcare_cost']].values
# output labels
y_expenses = household_dataset[['housing', 'food', 'transportation', 'healthcare', 'othernecessities', 'childcare', 'taxes']].values
# output labels
target_col_list = ['housing', 'food', 'transportation', 'healthcare', 'othernecessities', 'childcare', 'taxes']

# Scaling
scaler_X = MinMaxScaler()

# Load the X scaler
# scaler_X = joblib.load('scaler_X.pkl')

X_scaled = scaler_X.fit_transform(X)

scaler_y = MinMaxScaler()

# Load the y scaler
# scaler_y = joblib.load('scaler_y.pkl')
y_exp_scaled = scaler_y.fit_transform(y_expenses)


# Save the X scaler
joblib.dump(scaler_X, 'scaler_X.pkl')

# Save the y scaler
joblib.dump(scaler_y, 'scaler_y.pkl')

['scaler_y.pkl']

In [19]:
# this method is used to avoid the issue in the mape and smape value for the childcare expenses since the childcare expenses consist of 0 value where the smape and mape are sensitive of.
def replace_zeros_with_ones(a, b):
    # Iterate through both lists simultaneously
    for i in range(len(a)):
        # If both value is 0, replace both with 1
        if a[i] == 0 and b[i] == 0:
            a[i] = 1
            b[i] = 1
    return a, b

In [20]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def calculateSmape(y_true, y_pred):
    y_true, y_pred = replace_zeros_with_ones(y_true, y_pred)
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    numerator = np.abs(y_true - y_pred)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    smape_value = np.mean(numerator / denominator) * 100
    return smape_value

In [21]:
# Cross-Validation Setup
kf = KFold(n_splits=2, shuffle=True, random_state=100)

# Metrics
def print_metrics(y_true, y_pred, task_name):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{task_name} - MSE: {mse:.4f}, R2: {r2:.4f}")

def regr_report(x, y):
    mae = round(mean_absolute_error(x, y), 4)
    r2 = round(r2_score(x, y), 4)
    mse = round(mean_squared_error(x, y), 4)
    rmse = round(np.sqrt(mean_squared_error(x, y)), 4)
    smape_score = calculateSmape(x, y)
    return f'MAE: {mae}, R-Squared: {r2}, RMSE: {rmse}, MSE: {mse} ,smape: {smape_score}'

In [22]:
num_children_col = 3  # 4th position, 0-indexed
childcare_exp_col = target_col_list.index('childcare')  # Replace 'childcare_exp' with the actual target name

**Model selection stage**

*Implementation of the Novel hybrid model*

In [23]:
# L2 regularization strength (you can tune this value)
l2_strength = 0.01

# Hybrid Model Definition
def create_hybrid_model(input_shape, output_shape):
    input_layer = Input(shape=input_shape)
    # CNN part
    x = Conv1D(filters=8, kernel_size=3, padding='same', activation='relu')(input_layer)
    x = Flatten()(x)
    # MLP part
    x = Dense(128, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    # Multiple regression outputs
    output_layers = [Dense(1, activation='linear', name=f'target_{col}')(x) for col in target_col_list]

    household_model = Model(inputs=input_layer, outputs=output_layers)
    return household_model

In [24]:
global X_train_global, X_test_global, X_val_global
global y_train_global, y_test_global, y_val_global

In [25]:
# Cross-Validation with Evaluation
def cross_val_with_evaluation(X_scaled, y_exp_scaled, kf, num_epochs=200):
    global X_train_global, X_test_global, X_val_global
    global y_train_global, y_test_global, y_val_global
    fold = 1
    for train_index, val_index in kf.split(X_scaled):
        print(f"Fold {fold}/{kf.get_n_splits()}")
        X_train, X_test_val = X_scaled[train_index], X_scaled[val_index]
        y_train, y_test_val = y_exp_scaled[train_index], y_exp_scaled[val_index]

        X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=100) # 50/50 split of remaining 20%

        X_train_global, X_test_global, X_val_global = X_train, X_test, X_val
        y_train_global, y_test_global, y_val_global = y_train, y_test, y_val

        # Reshaping data for CNN
        X_train = X_train[..., np.newaxis]
        X_val = X_val[..., np.newaxis]
        X_test = X_test[..., np.newaxis]

        household_model = create_hybrid_model((X_train.shape[1], 1), y_train.shape[1])
        household_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

        household_model.summary()

        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        # Training
        household_model.fit(X_train, [y_train[:, i] for i in range(y_train.shape[1])],
                  validation_data=(X_val, [y_val[:, i] for i in range(y_val.shape[1])]),
                  epochs=num_epochs, batch_size=64, verbose=1, callbacks= [early_stopping])

        # Predictions
        y_train_pred = np.column_stack(household_model.predict(X_train))
        y_val_pred = np.column_stack(household_model.predict(X_val))
        y_test_pred = np.column_stack(household_model.predict(X_test)) #Prediction on test set

        # Inverse scaling
        y_train_pred_original = scaler_y.inverse_transform(y_train_pred)
        y_train_original = scaler_y.inverse_transform(y_train)
        y_val_pred_original = scaler_y.inverse_transform(y_val_pred)
        y_val_original = scaler_y.inverse_transform(y_val)
        y_test_pred_original = scaler_y.inverse_transform(y_test_pred) #Inverse transform for test set
        y_test_original = scaler_y.inverse_transform(y_test) #Inverse transform for test set

        # Enforce the rule after predictions for childcare expense
        # The issue is likely caused by X_train having an extra dimension due to[..., np.newaxis].
        # We need to select the original features for the condition
        y_train_pred_original[:, childcare_exp_col] = np.where(
            X_train[:, num_children_col, 0] == 0, 0, y_train_pred_original[:, childcare_exp_col]) #Using X_train[:, num_children_col, 0] instead of X_train[:, num_children_col]

        y_val_pred_original[:, childcare_exp_col] = np.where(
            X_val[:, num_children_col, 0] == 0, 0, y_val_pred_original[:, childcare_exp_col]) #Using X_val[:, num_children_col, 0] instead of X_val[:, num_children_col]

        y_test_pred_original[:, childcare_exp_col] = np.where(
            X_test[:, num_children_col, 0] == 0, 0, y_test_pred_original[:, childcare_exp_col]) #Using X_test[:, num_children_col, 0] instead of X_test[:, num_children_col]

        # Metrics for each target
        for i, target in enumerate(target_col_list):
            print(f'Model Results for {target.capitalize()}:')
            print('Train Data:', regr_report(y_train_original[:, i], y_train_pred_original[:, i]))
            print('Validation Data:', regr_report(y_val_original[:, i], y_val_pred_original[:, i]))
            print('Test Data:', regr_report(y_test_original[:, i], y_test_pred_original[:, i])) #Print metrics for test set
            print()

        fold += 1
    return household_model

household_model_combine = cross_val_with_evaluation(X_scaled, y_exp_scaled,kf)



Fold 1/2


Epoch 1/200
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.0755 - target_childcare_loss: 0.0198 - target_food_loss: 0.0042 - target_healthcare_loss: 0.0089 - target_housing_loss: 0.0044 - target_othernecessities_loss: 0.0044 - target_taxes_loss: 0.0075 - target_transportation_loss: 0.0261 - val_loss: 0.0154 - val_target_childcare_loss: 0.0026 - val_target_food_loss: 6.2228e-04 - val_target_healthcare_loss: 0.0053 - val_target_housing_loss: 0.0015 - val_target_othernecessities_loss: 9.8300e-04 - val_target_taxes_loss: 8.2013e-04 - val_target_transportation_loss: 0.0036
Epoch 2/200
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0143 - target_childcare_loss: 0.0023 - target_food_loss: 6.4584e-04 - target_healthcare_loss: 0.0050 - target_housing_loss: 0.0014 - target_othernecessities_loss: 9.4410e-04 - target_taxes_loss: 7.5058e-04 - target_transportation_loss: 0.0033 - val_loss: 0.0144 - val_target_childcare_loss:

Epoch 1/200
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0945 - target_childcare_loss: 0.0112 - target_food_loss: 0.0079 - target_healthcare_loss: 0.0178 - target_housing_loss: 0.0087 - target_othernecessities_loss: 0.0121 - target_taxes_loss: 0.0067 - target_transportation_loss: 0.0301 - val_loss: 0.0138 - val_target_childcare_loss: 0.0021 - val_target_food_loss: 5.5401e-04 - val_target_healthcare_loss: 0.0049 - val_target_housing_loss: 0.0015 - val_target_othernecessities_loss: 0.0011 - val_target_taxes_loss: 5.5254e-04 - val_target_transportation_loss: 0.0031
Epoch 2/200
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0143 - target_childcare_loss: 0.0021 - target_food_loss: 6.1877e-04 - target_healthcare_loss: 0.0050 - target_housing_loss: 0.0014 - target_othernecessities_loss: 9.7792e-04 - target_taxes_loss: 5.3665e-04 - target_transportation_loss: 0.0036 - val_loss: 0.0134 - val_target_childcare_loss: 0.0

In [None]:
'''# Cross-Validation with Evaluation for XGBoost
def cross_val_with_evaluation(X_scaled, y_exp_scaled, kf, num_boost_rounds=4000):
    fold = 1
    childcarelist = []
    childcarelist_pred = []
    for train_index, val_index in kf.split(X_scaled):
        models = []
        print(f"Fold {fold}/{kf.get_n_splits()}")
        X_train, X_test_val = X_scaled[train_index], X_scaled[val_index]
        y_train, y_test_val = y_exp_scaled[train_index], y_exp_scaled[val_index]

        # Split validation and test data
        X_test, X_val, y_test, y_val = train_test_split(
            X_test_val, y_test_val, test_size=0.5, random_state=100
        )  # 50/50 split of remaining 20%

        # Train XGBoost model for each target variable
        # Store predictions for all targets in this fold
        all_y_train_pred = []
        all_y_val_pred = []
        all_y_test_pred = []

        for i in range(y_train.shape[1]):
            dtrain = xgb.DMatrix(X_train, label=y_train[:, i])
            dval = xgb.DMatrix(X_val, label=y_val[:, i])
            dtest = xgb.DMatrix(X_test)

            params = {
                'objective': 'reg:squarederror',
                'eval_metric': 'rmse',
                'learning_rate': 0.01,
                'max_depth': 6,
                'n_estimators': num_boost_rounds
            }

            evals = [(dtrain, 'train'), (dval, 'eval')]
            model = xgb.train(
                params, dtrain, num_boost_rounds, evals, early_stopping_rounds=50, verbose_eval=50
            )
            models.append(model) #Not required here

            # Predictions
            y_train_pred = model.predict(dtrain)
            y_val_pred = model.predict(dval)
            y_test_pred = model.predict(dtest)

            # Append predictions for current target to the list
            all_y_train_pred.append(y_train_pred)
            all_y_val_pred.append(y_val_pred)
            all_y_test_pred.append(y_test_pred)

        # Stack predictions for all targets to get the original shape
        y_train_pred = np.column_stack(all_y_train_pred)
        y_val_pred = np.column_stack(all_y_val_pred)
        y_test_pred = np.column_stack(all_y_test_pred)

        # Inverse scaling
        y_train_pred_original = scaler_y.inverse_transform(y_train_pred) #Now y_train_pred has 7 columns matching scaler_y
        y_train_original = scaler_y.inverse_transform(y_train)
        y_val_pred_original = scaler_y.inverse_transform(y_val_pred)  #Now y_val_pred has 7 columns matching scaler_y
        y_val_original = scaler_y.inverse_transform(y_val)
        y_test_pred_original = scaler_y.inverse_transform(y_test_pred) #Now y_test_pred has 7 columns matching scaler_y
        y_test_original = scaler_y.inverse_transform(y_test)


        # Enforce the rule after predictions for childcare expense
        y_train_pred_original[:, childcare_exp_col] = np.where(
            X_train[:, num_children_col] == 0, 0, y_train_pred_original[:, childcare_exp_col])

        y_val_pred_original[:, childcare_exp_col] = np.where(
            X_val[:, num_children_col] == 0, 0, y_val_pred_original[:, childcare_exp_col])

        y_test_pred_original[:, childcare_exp_col] = np.where(
            X_test[:, num_children_col] == 0, 0, y_test_pred_original[:, childcare_exp_col])

        # Metrics for each target
        for i, target in enumerate(target_col_list):
            print(f'Model Results for {target.capitalize()}:')
            print('Train Data:', regr_report(y_train_original[:, i], y_train_pred_original[:, i]))
            print('Validation Data:', regr_report(y_val_original[:, i], y_val_pred_original[:, i]))
            print('Test Data:', regr_report(y_test_original[:, i], y_test_pred_original[:, i])) #Print metrics for test set
            print()

        fold += 1
    return models

# Cross-Validation Execution
xgb_models = cross_val_with_evaluation(X_scaled, y_exp_scaled, kf)'''

In [None]:
'''def createAdditinalFeatures(X_scaled, xgb_models):
  #initialize a list to store the predicitions
    new_y_exp_scaled = []
    # Iterate over the model and generate the prediction
    for model in tqdm(xgb_models):
        predict_vals = model.predict(xgb.DMatrix(X_scaled))
        new_y_exp_scaled.append(predict_vals)

    new_y_exp_scaled_val = np.array(new_y_exp_scaled) # convert the prediction into a numpy array
    new_y_exp_scaled_val_transp = new_y_exp_scaled_val.T # transposing the predictions to make them concatenate with the original values
    a = X_scaled
    b = new_y_exp_scaled_val_transp
    results = []
    for f,t in zip(a,b):
        results.append(list(f)+list(t))

    additional_features =  np.array(results)
    return additional_features'''

In [None]:
#additional_features = createAdditinalFeatures(X_scaled, xgb_models)

In [None]:
'''# Cross-Validation with Evaluation
def cross_val_with_evaluation(X_scaled, y_exp_scaled, kf, num_epochs=200):
    fold = 1
    for train_index, val_index in kf.split(X_scaled):
        print(f"Fold {fold}/{kf.get_n_splits()}")
        X_train, X_test_val = X_scaled[train_index], X_scaled[val_index]
        y_train, y_test_val = y_exp_scaled[train_index], y_exp_scaled[val_index]

        X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=100) # 50/50 split of remaining 20%

        # Reshaping data for CNN
        X_train = X_train[..., np.newaxis]
        X_val = X_val[..., np.newaxis]
        X_test = X_test[..., np.newaxis]

        model = create_hybrid_model((X_train.shape[1], 1), y_train.shape[1])
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

        model.summary()

        # Training
        model.fit(X_train, [y_train[:, i] for i in range(y_train.shape[1])],
                  validation_data=(X_val, [y_val[:, i] for i in range(y_val.shape[1])]),
                  epochs=num_epochs, batch_size=64, verbose=1, callbacks=[early_stopping])

        # Predictions
        y_train_pred = np.column_stack(model.predict(X_train))
        y_val_pred = np.column_stack(model.predict(X_val))
        y_test_pred = np.column_stack(model.predict(X_test)) #Prediction on test set

        # Inverse scaling
        y_train_pred_original = scaler_y.inverse_transform(y_train_pred)
        y_train_original = scaler_y.inverse_transform(y_train)
        y_val_pred_original = scaler_y.inverse_transform(y_val_pred)
        y_val_original = scaler_y.inverse_transform(y_val)
        y_test_pred_original = scaler_y.inverse_transform(y_test_pred) #Inverse transform for test set
        y_test_original = scaler_y.inverse_transform(y_test) #Inverse transform for test set

        # Enforce the rule after predictions for childcare expense
        # The issue is likely caused by X_train having an extra dimension due to[..., np.newaxis].
        # We need to select the original features for the condition
        y_train_pred_original[:, childcare_exp_col] = np.where(
            X_train[:, num_children_col, 0] == 0, 0, y_train_pred_original[:, childcare_exp_col]) #Using X_train[:, num_children_col, 0] instead of X_train[:, num_children_col]

        y_val_pred_original[:, childcare_exp_col] = np.where(
            X_val[:, num_children_col, 0] == 0, 0, y_val_pred_original[:, childcare_exp_col]) #Using X_val[:, num_children_col, 0] instead of X_val[:, num_children_col]

        y_test_pred_original[:, childcare_exp_col] = np.where(
            X_test[:, num_children_col, 0] == 0, 0, y_test_pred_original[:, childcare_exp_col]) #Using X_test[:, num_children_col, 0] instead of X_test[:, num_children_col]

        # Metrics for each target
        for i, target in enumerate(target_col_list):
            print(f'Model Results for {target.capitalize()}:')
            print('Train Data:', regr_report(y_train_original[:, i], y_train_pred_original[:, i]))
            print('Validation Data:', regr_report(y_val_original[:, i], y_val_pred_original[:, i]))
            print('Test Data:', regr_report(y_test_original[:, i], y_test_pred_original[:, i])) #Print metrics for test set
            print()

        fold += 1
        return model # Return the trained model
model_combine = cross_val_with_evaluation(additional_features, y_exp_scaled, kf)
model_combine.save_weights("model_expenses.weights.h5")'''

In [None]:

'''
# Load the scalers
scaler_X = joblib.load('scaler_X.pkl')
scaler_y = joblib.load('scaler_y.pkl')

def get_prediction(model_combine, input_data, scaler_X, scaler_y, num_children_col, childcare_exp_col):
    """
    Generates predictions for expenses using the combined model.

    Args:
        model_combine: The trained Keras model.
        input_data: Pandas DataFrame of new input data.
        scaler_X: Fitted MinMaxScaler for features.
        scaler_y: Fitted MinMaxScaler for target variables.
        num_children_col: Integer column index for number of children.
        childcare_exp_col: Integer column index for the 'childcare' target.

    Returns:
         Numpy Array: Predicted expense values.
    """

    # Step 1: Create new features
    input_data_with_features = createFeatures(input_data.copy())

    # Step 2: Select relevant features
    X = input_data_with_features[['total', 'median_family_income', 'num_counties_in_st', 'n_children',
                                  'n_parents', 'n_members', 'per_member_cost', 'child_expense_cost',
                                  'parent_expense_cost', 'other_expense_cost','zero_childcare_cost']].values

    # Step 3: Scale the input data using the fitted scaler_X
    input_data_scaled = scaler_X.transform(X)

    # Step 4: Reshape for the CNN layer (add the extra dimension)
    input_data_scaled = input_data_scaled[..., np.newaxis]  # This line is crucial to match the training data shape

    # Step 5: Make predictions using the trained model
    # We need to unpack the predictions from the model output
    # Then, we can stack those into a single output array.
    input_data_pred = model_combine.predict(input_data_scaled)
    input_data_pred = np.column_stack(input_data_pred)

    # Step 6: Inverse transform the predictions to return them to the original scale
    input_data_pred_original = scaler_y.inverse_transform(input_data_pred)

    # Step 7: Enforce the rule that if there are no children, childcare cost is 0
    input_data_pred_original[:, childcare_exp_col] = np.where(
        input_data_with_features['n_children'].values == 0, 0, input_data_pred_original[:, childcare_exp_col]
    )

    return input_data_pred_original

def createFeatures(input_data):
    """
    Creates additional features needed for the model prediction.

    Args:
        input_data: The input pandas DataFrame with the original data.

    Returns:
        input_data: The input data with additional features.
    """
    # Step 1: Create additional features
    input_data["per_member_cost"] = input_data["total"] / input_data["n_members"]
    input_data["child_expense_cost"] = input_data["per_member_cost"] * input_data["n_children"]
    input_data["parent_expense_cost"] = input_data["per_member_cost"] * input_data["n_parents"]
    input_data["other_expense_cost"] = input_data["total"] - (input_data["child_expense_cost"] + input_data["parent_expense_cost"])

    # Step 2: Create binary column for whether there are children (0 if no children, 1 if there are children)
    input_data["zero_childcare_cost"] = input_data['n_children'].map(lambda x: 0 if x < 1 else 1)

    return input_data

# Example of input data to test predictions
input_data = pd.DataFrame(
    [[4541, 8790.0, 67, 0, 2, 2]],
    columns=['total', 'median_family_income', 'num_counties_in_st', 'n_children', 'n_parents', 'n_members']
)

# Assuming `model_combine` is your trained model
input_data_pred_original = get_prediction(model_combine, input_data, scaler_X, scaler_y, num_children_col=3, childcare_exp_col=5)

# Print predictions
target_col_list = ['housing', 'food', 'transportation', 'healthcare', 'othernecessities', 'childcare', 'taxes']
for i, prediction in enumerate(input_data_pred_original[0]):
    print(f"{target_col_list[i]}: {prediction:.2f}")'''
