In [1]:
import sagemaker
import boto3
from sagemaker.estimator import Estimator



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [27]:
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)
s3 = boto3.Session().client(service_name="s3", region_name=region)

In [28]:
bucket_name = 'ads-508-final'

In [29]:
# Get SageMaker's built-in Factorization Machines container
container = sagemaker.image_uris.retrieve("factorization-machines", region)

fm_estimator = Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path=f"s3://{bucket_name}/factorization-machines/output",
    sagemaker_session=sess
)

In [30]:
# Set hyperparameters 
# (may look at hyper parameter tuning
fm_estimator.set_hyperparameters(
    feature_dim=2,
    num_factors=64,
    predictor_type="binary_classifier",
    mini_batch_size=1000,
    epochs=10
)

In [31]:
s3_train_path = f"s3://{bucket_name}/transformeddata/sm_fm_train.recordio"
s3_test_path = f"s3://{bucket_name}/transformeddata/sm_fm_test.recordio"

In [32]:
train_input = sagemaker.inputs.TrainingInput(s3_train_path, content_type="application/x-recordio-protobuf")
test_input = sagemaker.inputs.TrainingInput(s3_test_path, content_type="application/x-recordio-protobuf")

data_channels = {"train": train_input, "test": test_input}

In [33]:
fm_estimator.fit(inputs=data_channels)

2025-04-01 04:50:28 Starting - Starting the training job...
..25-04-01 04:50:42 Starting - Preparing the instances for training.
..25-04-01 04:51:03 Downloading - Downloading input data.
...........04:51:53 Downloading - Downloading the training image.
[34mDocker entrypoint called with argument(s): train[0mmpleted. Training in progress.
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
[34m[04/01/2025 04:53:45 INFO 140639020992320] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'epochs': 1, 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', 'linear_wd': '0.001', 'factors_wd': '0.00001', 'bias_init_method': 'normal', 'bias_init_sigma': '0.01', 'linear_init_method': 'normal', 'linear_init_sigma': '0.01', 'factors_init_method': 'normal', 'factors_init_sigma': '

In [2]:
training_job_name = 'factorization-machines-2025-04-01-04-50-23-673'

sm_client = boto3.client('sagemaker')
response = sm_client.describe_training_job(TrainingJobName=training_job_name)
metrics = response.get('FinalMetricDataList', [])

In [3]:
metric_dict = {metric['MetricName']: metric for metric in metrics}

# Create a helper to print nicely
def print_metric(name, metric_dict):
    if name in metric_dict:
        m = metric_dict[name]
        print(f"{name}: {m['Value']:.4f} (timestamp: {m['Timestamp']})")
    else:
        print(f"{name}: Not found")

# Print F1 and Accuracy metrics
print("F1 Scores")
print_metric('train:binary_f_beta:epoch', metric_dict)
print_metric('test:binary_f_beta', metric_dict)

print("\nAccuracy")
print_metric('train:binary_classification_accuracy:epoch', metric_dict)
print_metric('test:binary_classification_accuracy', metric_dict)

🔍 F1 Scores
train:binary_f_beta:epoch: 0.9802 (timestamp: 2025-04-01 04:59:42+00:00)
test:binary_f_beta: 0.9868 (timestamp: 2025-04-01 04:59:47+00:00)

📊 Accuracy
train:binary_classification_accuracy:epoch: 0.9612 (timestamp: 2025-04-01 04:59:42+00:00)
test:binary_classification_accuracy: 0.9740 (timestamp: 2025-04-01 04:59:47+00:00)
