## Building our Container Testing Scripts.

In [None]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
import sagemaker
import time
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
import sagemaker
import time



sess = sagemaker.Session()
role = sagemaker.get_execution_role()

image_uri = '917456409349.dkr.ecr.us-east-2.amazonaws.com/my-custom-sklearn'

script_processor = ScriptProcessor(
    role=role,
    image_uri=image_uri,
    command=['python3'],
    instance_count=1,
    instance_type='ml.m5.xlarge',
    max_runtime_in_seconds=1200,
    sagemaker_session=sess,
)

job_name = f"shap-process-{int(time.time())}"

script_processor.run(
    code='shap_process.py',
    inputs=[
        ProcessingInput(
            source='s3://dockerevalcontainer/processing/input/model/model.tar.gz',
            destination='/opt/ml/processing/input/model/'
        ),
        ProcessingInput(
            source='s3://dockerevalcontainer/processing/input/data/scored.csv',
            destination='/opt/ml/processing/input/data/'
        ),
    ],
    outputs=[
        ProcessingOutput(
            source='/opt/ml/processing/output/',
            destination='s3://dockerevalcontainer/processing/output/'
        )
    ],
    job_name=job_name,
    wait=True,
    logs=True
)


In [None]:
import boto3

sm_client = boto3.client('sagemaker')

response = sm_client.list_processing_jobs(
    SortBy='CreationTime',
    SortOrder='Descending',
    MaxResults=5
)

for job in response['ProcessingJobSummaries']:
    print(job['ProcessingJobName'], job['ProcessingJobStatus'])



In [None]:
import boto3

sm = boto3.client('sagemaker')
job_name = 'shap-process-1753319283'  # replace with actual job name as needed

response = sm.describe_processing_job(ProcessingJobName=job_name)
print("Status:", response['ProcessingJobStatus'])
print("FailureReason:", response.get('FailureReason', 'N/A'))
print("SecondaryStatus:", response.get('SecondaryStatus', 'N/A'))


In [None]:
import boto3

s3 = boto3.client('s3')
bucket = 'dockerevalcontainer'
prefix = 'processing/output/'

response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
print("Files in output prefix:")
for obj in response.get('Contents', []):
    print(obj['Key'])



## Helper code below for uploading files to test locally from s3

In [None]:
#Testing input files are identical for ingestion

# import pandas as pd

# file1 = 'scored.csv'
# file2 = 'sample_data_output.csv'

# df1 = pd.read_csv(file1)
# df2 = pd.read_csv(file2)

# # Compare shapes first
# if df1.shape != df2.shape:
#     print(f"Files differ in shape: {df1.shape} vs {df2.shape}")
# else:
#     # Compare data content
#     if df1.equals(df2):
#         print("Files are identical.")
#     else:
#         print("Files have the same shape but differ in content.")


In [None]:
#Testing model run locally and put through shap in scikit vs end to end output below

import pandas as pd

file1 = 'final_with_shap.csv'
file2 = 'final_with_shap_fullendtoend.csv'

df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)

# Compare shapes first
if df1.shape != df2.shape:
    print(f"Files differ in shape: {df1.shape} vs {df2.shape}")
else:
    # Compare data content
    if df1.equals(df2):
        print("Files are identical.")
    else:
        print("Files have the same shape but differ in content.")


In [None]:
#Local File upload from S3 for working in Sagemaker

bucket = "sagemaker-us-east-2-917456409349" #update as needed
key = "sagemaker/adoption/output_zone_sample/sample_data_output.csv"#update as needed

s3 = boto3.client('s3')

response = s3.get_object(Bucket=bucket, Key=key)
content = response['Body'].read()

df = pd.read_csv(io.BytesIO(content))


df.head()


In [None]:
# Export DataFrame 
output_path = "sample_data_output.csv"
df.to_csv(output_path, index=False)




In [None]:
#Testing our csv locally with shap process.py before uploading

model_tar_path = 'model.tar.gz'  #local tar
input_csv_path = 'sample_data_output.csv'  #local file
output_csv_path = 'final_with_shap.csv'  # ocal final with shap

#Running shap_process.py (have to do this in bash)
#python shap_process.py



## Testing locally and remotely as needed

In [None]:
#We've now run a full end to end so testing what worked in shap_process.py locally works in pipeline


import pandas as pd

file1 = 'final_with_shap_local.csv'
file2 = 'final_with_shap.csv'

df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)

# Compare shapes first
if df1.shape != df2.shape:
    print(f"Files differ in shape: {df1.shape} vs {df2.shape}")
else:
    # Compare data content
    if df1.equals(df2):
        print("Files are identical.")
    else:
        print("Files have the same shape but differ in content.")


In [None]:
### Checking performance at end of full pipeline via docker etc.

#Showing performance
import pandas as pd
import boto3
from io import BytesIO
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
    roc_curve, confusion_matrix, ConfusionMatrixDisplay
)
import matplotlib.pyplot as plt

# Load ground truth from S3
bucket = "sagemaker-us-east-2-917456409349"
key = "sagemaker/adoption/golden_record/df_cat_dog_harmonized_Sample_With_Outcome.csv"

s3 = boto3.client('s3')
obj = s3.get_object(Bucket=bucket, Key=key)
df_true = pd.read_csv(BytesIO(obj['Body'].read()))

# Convert ground truth to binary: adopted=1 else 0
df_true['true_label'] = (df_true['outcome_type_harmonized_grouped'] == 'adopted').astype(int)

# Load predicted local file
df_pred = pd.read_csv("final_with_shap.csv") ###Uploaded from output landing zone, can update for whateve file we want to check

# Merge on primary_key
df_merged = pd.merge(df_true[['primary_key', 'true_label']], df_pred[['primary_key', 'predicted_label', 'predicted_proba']], on='primary_key')

# Extract arrays
y_true = df_merged['true_label']
y_pred = df_merged['predicted_label']
y_proba = df_merged['predicted_proba']

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, zero_division=0)
recall = recall_score(y_true, y_pred, zero_division=0)
f1 = f1_score(y_true, y_pred, zero_division=0)
auc = roc_auc_score(y_true, y_proba)

print(f"Scored file shape: {df_pred.shape}")
print(f"Golden file shape: {df_true.shape}")
print(f"Merged shape: {df_merged.shape}\n")

print("PERFORMANCE METRICS")
print(f" Accuracy:  {accuracy:.4f}")
print(f" Precision: {precision:.4f}")
print(f" Recall:    {recall:.4f}")
print(f" F1 Score:  {f1:.4f}")
print(f" AUC:       {auc:.4f}")

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.title("Confusion Matrix")
plt.show()

# ROC Curve
fpr, tpr, thresholds = roc_curve(y_true, y_proba)
plt.plot(fpr, tpr, label=f'AUC = {auc:.4f}')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()


from sklearn.metrics import classification_report

print("Test Classification Report for adoption score (XGBoost:")
print(classification_report(
    y_true, 
    y_pred, 
    labels=[0, 1],                   # Predict label order: adopted (1), non-adopted (0)
    target_names=['non-adopted', 'adopted'],  
    digits=6
))



In [None]:
# Local and remote are identical, and model performance is as expected so pipeline is working correctly.

In [None]:
#Testing updated
#We've now run a full end to end so testing what worked in shap_process.py locally works in pipeline


import pandas as pd

file1 = 'final_with_shap_ratios_Update.csv'
file2 = 'final_with_shap_ratios.csv'

df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)

# Compare shapes first
if df1.shape != df2.shape:
    print(f"Files differ in shape: {df1.shape} vs {df2.shape}")
else:
    # Compare data content
    if df1.equals(df2):
        print("Files are identical.")
    else:
        print("Files have the same shape but differ in content.")
