In [None]:
### XGBoost_LOS Build and Testing Local - Multiclass only


## Setup

In [None]:
#Importing key packages
import io
import os
import boto3
import sagemaker
import time
import botocore
from sagemaker import get_execution_role, image_uris, model_uris, script_uris, hyperparameters
from sagemaker.estimator import Estimator
from sagemaker.utils import name_from_base
from sagemaker.tuner import HyperparameterTuner
from time import gmtime, strftime
import matplotlib as plt
import numpy as np
import pandas as pd


In [None]:
#Setting up

role = sagemaker.get_execution_role()
boto_session = boto3.Session()
region = boto_session.region_name
sess = sagemaker.Session(boto_session=boto_session)

print("Role:", role)
print("Region:", region)
print("SageMaker Session Region:", sess.boto_region_name)

In [None]:
#Setting up Bucket Links/Info
bucket='xgb-los-multi'
s3_bucket_prefix= "xgb-multi-code/"
prefix = f"{bucket}/{s3_bucket_prefix}"


In [None]:
prefix

In [None]:
##Set XGB Container

container = sagemaker.image_uris.retrieve("xgboost", region, "1.7-1")


## Train/Test/Split

In [None]:
#Load Source file to do encoding and split train/test

#s3://sagemaker-us-east-2-917456409349/sagemaker/adoption/golden_record/df_cat_dog_harmonized_Sample_With_Outcome.csv

bucket = "sagemaker-us-east-2-917456409349"
key = "sagemaker/adoption/golden_record/df_cat_dog_harmonized.csv" 

# Initialize S3 client
s3_client = boto3.client("s3")

# Fetch the object from S3
obj = s3_client.get_object(Bucket=bucket, Key=key)

# Read into pandas DataFrame
df = pd.read_csv(io.BytesIO(obj['Body'].read()))

# Show the first few rows
df.head()

In [None]:
#Deduping

# Drop duplicates, keeping the last record for each animal_id
df_deduped = df.drop_duplicates(subset='primary_key', keep='last')


print("Original rows:", len(df))
print("After deduplication:", len(df_deduped))
df = df_deduped.copy()
print("New rows for df:", len(df))

In [None]:
#Adding Custom Train/Test/Split
def assign_split(row):
    if row['outcome_year'] <= 2022:
        return "train"
    elif row['outcome_year'] in [2023, 2024]:
        return "validate"
    elif row['outcome_year'] == 2025:
        return "test"
    else:
        return "exclude"  # fallback for unexpected years

df['split'] = df.apply(assign_split, axis=1)


In [None]:

#Naming features to keep and drop if needed, but won't as keeping standard format of xlsx.
features_to_keep = ['outcome_type_harmonized_grouped','animal_type', 'primary_breed_harmonized', 'primary_color_harmonized',
    'sex', 'intake_type_harmonized',
    'Is_returned', 'has_name', 'is_mix', 'Num_returned', 'age_months','stay_length_days', 'min_height', 'max_height',
    'min_weight', 'max_weight', 'min_expectancy', 'max_expectancy',
    'grooming_frequency_value', 'shedding_value', 'energy_level_value',
    'trainability_value', 'demeanor_value'
]

# # Trim the DataFrame to only those columns
# df = df[features_to_keep].copy()

# #EDIT: Only training on models with features to keep

In [None]:
#See all columns
all_columns = df.columns.tolist()
print(all_columns)
print("Total columns:", len(all_columns))

In [None]:
#Pre-encoding Adoption
df['outcome_type_harmonized_grouped'] = (df['outcome_type_harmonized_grouped'] == 'adopted').astype(int)

In [None]:
#Encoding
from sklearn.preprocessing import LabelEncoder

# Copy original DataFrame
encoded_df = df.copy()

# Specific columns we want to encode
columns_to_encode = [
    'animal_type', 'primary_breed_harmonized', 'primary_color_harmonized',
    'sex', 'intake_type_harmonized',
    'Is_returned', 'has_name', 'is_mix'
]

# Dictionary to store label encoders
le_dict = {}

# Apply label encoding to specified columns, save in new columns
for col in columns_to_encode:
    le = LabelEncoder()
    encoded_col_name = f"Encoded-{col}"
    encoded_df[encoded_col_name] = le.fit_transform(encoded_df[col].astype(str))
    le_dict[col] = le

# Fill missing age_months with median
median_age = encoded_df['age_months'].median()
encoded_df['age_months'] = encoded_df['age_months'].fillna(median_age)



In [None]:
np.shape(encoded_df)

In [None]:
encoded_df.head()

In [None]:
# Saving Encoding locally JIC
import pickle

with open("label_encoders.pkl", "wb") as f:
    pickle.dump(le_dict, f)



In [None]:
#Because we are doing a multiclass, we need to add a column for multiclass as follows
#We use "Other" where mapping does not occur and will filter out of our predction below.

# Define mapping for non-adopted subtypes
non_adopted_subtypes = {
    "rescue": "rescue",
    "foster": "foster",
    "return to owner": "return_to_owner",
    "foster to adopt": "foster",
    "return to rescue": "rescue",
    "rtf": "foster"
}

# Map outcome types to the target labels, default others to "other"
encoded_df["non_adopted_label"] = (
    encoded_df["outcome_type_harmonized"]
    .map(non_adopted_subtypes)
    .fillna("other")
)



In [None]:
#inspect columns
encoded_df.head()

In [None]:
# Splitting for training

df_train = encoded_df[encoded_df['split'] == 'train']
df_test = encoded_df[encoded_df['split'] == 'test']
df_validate = encoded_df[encoded_df['split'] == 'validate']
# Save each to CSV (no index)
df_train.to_csv("train.csv", index=False)
df_test.to_csv("test.csv", index=False)
df_validate.to_csv("validate.csv", index=False)

# Output sizes
print("Train rows:", len(df_train))
print("Test rows:", len(df_test))
print("Validate rows:", len(df_validate))

## Model Training our XGBoost Multiclass on Non-Adopted so we can Test only on those with proba <0.5. Locally-only need to do once 

In [None]:
# TRain Model so we can get a prediction, if our predictions are similar to endpoitn testing in other files will use this to subset on
#adoption prediction, then do XGBoost Grid search

In [None]:
#Running AMT to see if can improve Test performance

from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import pandas as pd

feature_columns = [
'Encoded-animal_type', 
    'Encoded-primary_breed_harmonized', 
    'Encoded-primary_color_harmonized', 
    'Encoded-sex', 
    'Encoded-intake_type_harmonized', 
    'Encoded-Is_returned', 
    'Encoded-has_name', 
    'Encoded-is_mix',
    'age_months',    
    'Num_returned', 
    'stay_length_days', #We comment this out in our prediction for los but not here
    'min_height', 
    'max_height',
    'min_weight', 
    'max_weight', 
    'min_expectancy', 
    'max_expectancy',
    'grooming_frequency_value', 
    'shedding_value', 
    'energy_level_value',
    'trainability_value', 
    'demeanor_value'
]


# prepare datasets using only selected features
X_train = df_train[feature_columns]
y_train = df_train['outcome_type_harmonized_grouped']

X_val = df_validate[feature_columns]
y_val = df_validate['outcome_type_harmonized_grouped']

X_test = df_test[feature_columns]
y_test = df_test['outcome_type_harmonized_grouped']

# Combine train and val for GridSearchCV
X_trainval = pd.concat([X_train, X_val])
y_trainval = pd.concat([y_train, y_val])

# Define parameter grid
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.05, 0.1, 0.2],
    'gamma': [0, 2, 4],
    'min_child_weight': [1, 4, 6],
    'subsample': [0.6, 0.8, 1.0],
    'n_estimators': [50, 100],
}

# Initialize model
xgb_base = XGBClassifier(
    objective='binary:logistic',
    eval_metric='logloss',
    n_jobs=-1,
    random_state=42
)

# Grid search with 3-fold CV
grid_search = GridSearchCV(
    estimator=xgb_base,
    param_grid=param_grid,
    scoring='f1',
    cv=3,
    verbose=1,
    n_jobs=-1
)

# Fit grid search
grid_search.fit(X_trainval, y_trainval)
best_model = grid_search.best_estimator_

print("Best parameters found:", grid_search.best_params_)

# Evaluation function
def evaluate_performance(X, y_true, dataset_name):
    y_pred = best_model.predict(X)
    print(f"\n{dataset_name} Set Performance:")
    print(classification_report(y_true, y_pred))

    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap='Blues')
    plt.title(f"{dataset_name} Confusion Matrix")
    plt.show()

# Run evaluations
evaluate_performance(X_train, y_train, "Training")
evaluate_performance(X_val, y_val, "Validation")
evaluate_performance(X_test, y_test, "Test")

In [None]:
#Model is consistent so continuing the process 

In [None]:
#Outputting Best Parameters, saving model 

best_params = grid_search.best_params_
print(best_params)

best_model.save_model("best_xgb_model_local.json") 



In [None]:
#We get exact same parameters so good to rain on this new data set for our likely outcome for non-adoption




In [None]:
#Now we run whole encoded data set through to add our prediction and probability 
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Define feature cols
feature_columns_los = [
    'Encoded-animal_type', 
    'Encoded-primary_breed_harmonized', 
    'Encoded-primary_color_harmonized', 
    'Encoded-sex', 
    'Encoded-intake_type_harmonized', 
    'Encoded-Is_returned', 
    'Encoded-has_name', 
    'Encoded-is_mix',
    'age_months',    
    'Num_returned', 
    'stay_length_days',
    'min_height', 
    'max_height',
    'min_weight', 
    'max_weight', 
    'min_expectancy', 
    'max_expectancy',
    'grooming_frequency_value', 
    'shedding_value', 
    'energy_level_value',
    'trainability_value', 
    'demeanor_value'
]

# Prepare features from encoded_df
X_encoded = encoded_df[feature_columns_los]

# Check if ground truth labels exist
if 'outcome_type_harmonized_grouped' in encoded_df.columns:
    y_encoded = encoded_df['outcome_type_harmonized_grouped']
else:
    y_encoded = None

# Run prediction and predicted probabilities
y_pred = best_model.predict(X_encoded)
y_proba = best_model.predict_proba(X_encoded)[:, 1]  # Probability of positive class (adopted)

# Add predictions and probabilities to df
encoded_df['predicted_label'] = y_pred
encoded_df['predicted_proba'] = y_proba

# Evaluate Performance
if y_encoded is not None:
    print("Classification Report on encoded_df:")
    print(classification_report(y_encoded, y_pred))

    cm = confusion_matrix(y_encoded, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap='Blues')
    plt.title("Confusion Matrix on encoded_df")
    plt.show()
else:
    print("No ground truth labels found; predictions added to DataFrame.")


print(encoded_df[['predicted_label', 'predicted_proba']].head())

encoded_df

In [None]:
#See all columns
all_columns = encoded_df.columns.tolist()
print(all_columns)
print("Total columns:", len(all_columns))

In [None]:
np.shape(encoded_df)

## Training Multiclass now locally to ensure we get good results


In [None]:
#First thing we need to do is encode non_adopted_label

#Define mapping dictionary
label_map = {
    'foster': 0,
    'rescue': 1,
    'return_to_owner': 2,
    'other': -1  # We'll filter these out later
}

#Apply mapping
encoded_df["Encoded-non_adopted_label"] = (
    encoded_df["non_adopted_label"].map(label_map).astype('Int64')  # Use Int64 to allow -1 for "other"
)


In [None]:
# Now we'll train locally to find our best fit but first we split again for train/test/validate
# Splitting for training
#Dropping any "Other" non_adopted cases


multi_predicted_df_train = encoded_df[encoded_df['split'] == 'train']
multi_predicted_df_test = encoded_df[(encoded_df['split'] == 'test') & (encoded_df['predicted_proba'] < 0.5)]
multi_predicted_df_validate = encoded_df[encoded_df['split'] == 'validate']

# Filter out rows where non_adopted_label is 'other'
valid_classes = ['foster', 'rescue', 'return_to_owner']
multi_predicted_df_train = multi_predicted_df_train[multi_predicted_df_train['non_adopted_label'].isin(valid_classes)]
multi_predicted_df_test = multi_predicted_df_test[multi_predicted_df_test['non_adopted_label'].isin(valid_classes)]
multi_predicted_df_validate = multi_predicted_df_validate[multi_predicted_df_validate['non_adopted_label'].isin(valid_classes)]

# Save cleaned splits
multi_predicted_df_train.to_csv("multi_train.csv", index=False)
multi_predicted_df_test.to_csv("multi_test.csv", index=False)
multi_predicted_df_validate.to_csv("multi_validate.csv", index=False)

# Output sizes and unique labels for sanity check
print(" Filtered Datasets")
print("Train rows:", len(multi_predicted_df_train), "Classes:", multi_predicted_df_train['non_adopted_label'].unique())
print("Validate rows:", len(multi_predicted_df_validate), "Classes:", multi_predicted_df_validate['non_adopted_label'].unique())
print("Test rows:", len(multi_predicted_df_test), "Classes:", multi_predicted_df_test['non_adopted_label'].unique())


In [None]:
multi_predicted_df_train

In [None]:
#Then we train locally using AMT to find our best model fit and ensure our Test is only on animals
#that are predicted adoption <50%
#Trying to predict multiclass "non_adopt_label"

#Running AMT to see if can improve Test performance

from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import numpy as np

# Features and target
feature_columns_multi = [
    'Encoded-animal_type', 
    'Encoded-primary_breed_harmonized', 
    'Encoded-primary_color_harmonized', 
    'Encoded-sex', 
    'Encoded-intake_type_harmonized', 
    'Encoded-Is_returned', 
    'Encoded-has_name', 
    'Encoded-is_mix',
    'age_months',    
    'Num_returned', 
    'stay_length_days',
    'min_height', 
    'max_height',
    'min_weight', 
    'max_weight', 
    'min_expectancy', 
    'max_expectancy',
    'grooming_frequency_value', 
    'shedding_value', 
    'energy_level_value',
    'trainability_value', 
    'demeanor_value'
]

# Filter and ensure no NaN
X_trainval = pd.concat([
    multi_predicted_df_train[feature_columns_multi],
    multi_predicted_df_validate[feature_columns_multi]
]).fillna(0)

# Use encoded target directly
y_trainval = pd.concat([
    multi_predicted_df_train['Encoded-non_adopted_label'],
    multi_predicted_df_validate['Encoded-non_adopted_label']
])

X_test = multi_predicted_df_test[feature_columns_multi].fillna(0)
y_test = multi_predicted_df_test['Encoded-non_adopted_label']

# Parameter grid for AMT
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.05, 0.1, 0.2],
    'gamma': [0, 1, 4],
    'min_child_weight': [1, 4, 6],
    'subsample': [0.6, 0.8, 1.0],
    'n_estimators': [50, 100]
}

# XGB Classifier
xgb_model = XGBClassifier(
    objective='multi:softprob',
    num_class=3,  # foster, rescue, return_to_owner
    eval_metric='mlogloss',
    random_state=42
)

# Grid Search with F1 macro
grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    scoring='f1_macro',
    cv=3,
    verbose=1,
    n_jobs=-1
)

# Fit model
grid_search.fit(X_trainval, y_trainval)

best_model_multi = grid_search.best_estimator_
print("Best Parameters multi:", grid_search.best_params_)

# Evaluate on test
y_pred = best_model_multi.predict(X_test)

print("\nClassification Report:\n",
      classification_report(y_test, y_pred, target_names=['foster', 'rescue', 'return_to_owner']))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))



In [None]:
#Outputting Best Parameters, saving model 

best_params_multi = grid_search.best_params_
print(best_params_multi)

best_model.save_model("best_xgb_model_multi_local.json") 


In [None]:
#Showing XGBMulticlass

from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Classes and numeric mapping
class_names = ['foster', 'rescue', 'return_to_owner']
n_classes = len(class_names)

# Binarize the numeric y_test
y_test_bin = label_binarize(y_test, classes=[0, 1, 2])

# Get prediction probabilities
y_score = best_model.predict_proba(X_test)

# Compute ROC curve and AUC for each class
fpr, tpr, roc_auc = {}, {}, {}
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Compute micro-average ROC and AUC
fpr["micro"], tpr["micro"], _ = roc_curve(y_test_bin.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# Plot all ROC curves
plt.figure(figsize=(8, 6))
for i, label in enumerate(class_names):
    plt.plot(fpr[i], tpr[i], lw=2, label=f'ROC curve ({label}) (AUC = {roc_auc[i]:.2f})')

plt.plot(fpr["micro"], tpr["micro"], color='deeppink', linestyle=':', linewidth=2,
         label=f'Micro-average ROC (AUC = {roc_auc["micro"]:.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Multiclass ROC Curve')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()



## Training for XGBoost Endpoint Multiclass


In [None]:
#Remembering our columns so we can pass the right features to XGboost

all_columns = multi_predicted_df_test.columns.tolist()
print(all_columns)
print("Total columns for los_predicted structure:", len(all_columns))



In [None]:
#Data Prep from above work to S3 so sagemaker can access

# Setup
import boto3
import sagemaker
from sagemaker.estimator import Estimator
import pandas as pd


# SageMaker setup
session = sagemaker.Session()
role = "arn:aws:iam::917456409349:role/Sagemaker_Execution_Role"

bucket = "xgb-los-multi"
prefix = "xgb-multi-code"
data_prefix = f"{prefix}/data"

# Feature and target columns
feature_columns_multi = [
    'Encoded-animal_type', 
    'Encoded-primary_breed_harmonized', 
    'Encoded-primary_color_harmonized', 
    'Encoded-sex', 
    'Encoded-intake_type_harmonized', 
    'Encoded-Is_returned', 
    'Encoded-has_name', 
    'Encoded-is_mix',
    'age_months',    
    'Num_returned', 
    'stay_length_days',
    'min_height', 
    'max_height',
    'min_weight', 
    'max_weight', 
    'min_expectancy', 
    'max_expectancy',
    'grooming_frequency_value', 
    'shedding_value', 
    'energy_level_value',
    'trainability_value', 
    'demeanor_value'
]

target_column = "Encoded-non_adopted_label"

# Function to reorder columns for SageMaker (target first)
def prepare_for_sagemaker(df):
    df = df[[target_column] + feature_columns_multi]
    return df

# Prepare train/validate/test
train_df = prepare_for_sagemaker(multi_predicted_df_train)
val_df = prepare_for_sagemaker(multi_predicted_df_validate)
test_df = prepare_for_sagemaker(multi_predicted_df_test)

# Save as CSV (no header/index)
train_df.to_csv("train.csv", header=False, index=False)
val_df.to_csv("validation.csv", header=False, index=False)
test_df.to_csv("test.csv", header=False, index=False)

# Upload to S3
s3 = boto3.client("s3")
s3.upload_file("train.csv", bucket, f"{data_prefix}/train.csv")
s3.upload_file("validation.csv", bucket, f"{data_prefix}/validation.csv")
s3.upload_file("test.csv", bucket, f"{data_prefix}/test.csv")

print(" Uploaded to:")
print(f"s3://{bucket}/{data_prefix}/train.csv")
print(f"s3://{bucket}/{data_prefix}/validation.csv")
print(f"s3://{bucket}/{data_prefix}/test.csv")

train_s3_path = f"s3://{bucket}/{data_prefix}/train.csv"
val_s3_path = f"s3://{bucket}/{data_prefix}/validation.csv"


In [None]:
#Now we access S3 to run our endpoint and deployment training

#container image for XGBoost

container = sagemaker.image_uris.retrieve("xgboost", session.boto_region_name, version="1.5-1")

#{'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 50, 'subsample': 0.6}

import sagemaker
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput


# Create estimator for multiclass problem
xgb_estimator_multi = Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size=10,
    max_run=3600,
    output_path=f"s3://{bucket}/{prefix}/output",
    sagemaker_session=session
)

# Hyperparameters for multiclass, adjusting a bit from pretraining in effort to improve model based on test cases
#from student team in "ShelterData_ML_Final.ipynb"

xgb_estimator_multi.set_hyperparameters(
    objective="multi:softprob",  # Multiclass classification
    num_class=3,                # foster, rescue, return_to_owner
    eval_metric="mlogloss",     # Multiclass metric
    eta=0.1,                    # learning rate
    max_depth=3,
    min_child_weight=1,
    subsample=0.6,
    gamma=0,
    num_round=50,               # number of boosting rounds
    seed=42
)

print("Starting training job for multiclass model...")
xgb_estimator_multi.fit({
    "train": TrainingInput(train_s3_path, content_type="text/csv"),
    "validation": TrainingInput(val_s3_path, content_type="text/csv")
})

print("Model stored at:",f"s3://{bucket}/{prefix}/output")

In [None]:
print("Model stored at:",f"s3://{bucket}/{prefix}/output")

In [None]:
#Last, we deploy

from sagemaker.serializers import CSVSerializer


print(" Deploying endpoint...")
xgb_predictor_multi = xgb_estimator_multi.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name="xgb-multi-endpoint3"
)

# Configure predictor
xgb_predictor_multi.serializer = CSVSerializer()
print ("endpoint is live:",xgb_predictor_multi.endpoint_name)



In [None]:
#Inspect multi_predicted_Test_DF
#See all columns
all_columns = multi_predicted_df_test.columns.tolist()
print(all_columns)
print("Total columns:", len(all_columns))

In [None]:
multi_predicted_df_test

In [None]:
#Now we test the endpoint using multi_predicted_df_test 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sagemaker.serializers import CSVSerializer
from sagemaker.predictor import Predictor

#setup
endpoint_name = "xgb-multi-endpoint3"
xgb_predictor_multi = Predictor(endpoint_name)
xgb_predictor_multi.serializer = CSVSerializer()

#FEatures
feature_columns_multi = [
    'Encoded-animal_type',
    'Encoded-primary_breed_harmonized',
    'Encoded-primary_color_harmonized',
    'Encoded-sex',
    'Encoded-intake_type_harmonized',
    'Encoded-Is_returned',
    'Encoded-has_name',
    'Encoded-is_mix',
    'age_months',
    'Num_returned',
    'stay_length_days',
    'min_height',
    'max_height',
    'min_weight',
    'max_weight',
    'min_expectancy',
    'max_expectancy',
    'grooming_frequency_value',
    'shedding_value',
    'energy_level_value',
    'trainability_value',
    'demeanor_value'
]

target_column = "Encoded-non_adopted_label"

#  Prepare test data
df_test = multi_predicted_df_test.copy()
df_test = df_test[df_test[target_column].isin([0, 1, 2])]  # Filter out 'other' or null

X_test = df_test[feature_columns_multi].fillna(0)
y_test = df_test[target_column].astype(int).tolist()

print(f" Using {len(X_test)} rows and {len(feature_columns_multi)} features for inference")

#  Predict from endpoint in batches ---
predictions = []
batch_size = 100

for i in range(0, X_test.shape[0], batch_size):
    batch = X_test.iloc[i:i+batch_size]
    payload = "\n".join([",".join(map(str, row)) for row in batch.to_numpy()])
    
    if i == 0:
        print(" Sample payload being sent:", payload.split("\n")[0])
    
    response = xgb_predictor_multi.predict(payload)
    decoded = response.decode("utf-8").strip()
    
    if decoded:
        # Split lines, map to class with highest probability
        probs = [list(map(float, line.split(","))) for line in decoded.split("\n")]
        preds = [int(np.argmax(p)) for p in probs]
        predictions.extend(preds)
    else:
        print(f"Empty response for batch {i}-{i+batch_size}")

print(f"Predictions received: {len(predictions)} rows")

# --- Evaluation ---
labels = ['foster', 'rescue', 'return_to_owner']
print("\n Classification Report:")
print(classification_report(y_test, predictions, target_names=labels))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions))
