In [None]:
!pip install sagemaker pandas numpy boto3 scikit-learn

In [None]:
import pandas as pd
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
from sagemaker.estimator import Estimator

# Load dataset
df = pd.read_csv("leo_objects_cleaned.csv")

# Ensure 'object_type' is first column
cols = ["object_type"] + [col for col in df.columns if col != "object_type"]
df = df[cols]

# Drop unnecessary column 
df = df.drop(columns=["unnamed:_0"], errors="ignore")

# Encode categorical variables
df = pd.get_dummies(df)

# Split into train and test
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Save CSV files
train_df.to_csv("train.csv", index=False, header=False)
test_df.to_csv("test.csv", index=False, header=False)

print("✅ Data Preprocessing Complete")

In [None]:
# Set up SageMaker session
sagemaker_session = sagemaker.Session()
role = get_execution_role()

# Upload files to S3
bucket = sagemaker_session.default_bucket()
train_path = f"s3://{bucket}/train.csv"
test_path = f"s3://{bucket}/test.csv"

s3_client = boto3.client("s3")
s3_client.upload_file("train.csv", bucket, "train.csv")
s3_client.upload_file("test.csv", bucket, "test.csv")

print(f"✅ Data uploaded to {train_path}")

In [None]:
# Define SageMaker XGBoost container
from sagemaker.algorithm_specifier import AlgorithmSpecification

container = sagemaker.image_uris.retrieve("xgboost", sagemaker_session.boto_region_name, "1.5-1")

xgboost = Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path=f"s3://{bucket}/output",
    sagemaker_session=sagemaker_session
)

# Set hyperparameters
xgboost.set_hyperparameters(
    objective="multi:softmax",
    num_class=df["object_type"].nunique(),
    num_round=100,
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.8,
    verbosity=2
)

# Define input format
train_input = TrainingInput(train_path, content_type="csv")
test_input = TrainingInput(test_path, content_type="csv")

# Train model
xgboost.fit({"train": train_input, "validation": test_input})

print("✅ Training Complete")

In [None]:
# Deploy the trained model
predictor = xgboost.deploy(instance_type="ml.m5.large", initial_instance_count=1)

# Prepare sample data for inference
import numpy as np

sample = test_df.iloc[0, 1:].values.reshape(1, -1)  # Exclude 'object_type'

# Make a prediction
prediction = predictor.predict(sample)
print(f"🚀 Predicted Object Type: {prediction}")