#### 1. Clean Data and Split Locally

In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your dataset
df = pd.read_csv("IceCreamData.csv")  # Replace with your path

# Assume target is 'Fuel Economy (MPG)' and feature is 'Horse Power'
X = df[['Temperature']].values.astype('float32')
y = df[['Revenue']].values.astype('float32')

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


#### 2. Convert to RecordIO and Upload to S3

In [30]:
import io
import boto3
import os
import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import LinearLearner
from sagemaker.session import Session

In [42]:
# Set variables
session = sagemaker.Session()
bucket = 'aws-ml-data-model'
prefix = 'linear-learner'
role = sagemaker.get_execution_role()
print(role)

arn:aws:iam::800156317112:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole


In [39]:
# Upload train data
# If y_train has shape (N,1), flatten it to (N,)
if y_train.ndim > 1:
    y_train = y_train.reshape(-1)  # or y_train = y_train[:, 0]

buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, X_train, y_train)
buf.seek(0)
key = 'train/linear-train-data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, key)).upload_fileobj(buf)
s3_train_data = f's3://{bucket}/{prefix}/{key}'
print(f"Uploaded train data to: {s3_train_data}")

Uploaded train data to: s3://aws-ml-data-model/linear-learner/train/linear-train-data


In [40]:
# Upload test data
# Flatten y_test to 1D vector if needed
if y_test.ndim > 1:
    y_test = y_test.reshape(-1)  # or y_test = y_test[:, 0]
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, X_test, y_test)
buf.seek(0)
key = 'test/linear-test-data'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, key)).upload_fileobj(buf)
s3_test_data = f's3://{bucket}/{prefix}/{key}'
print(f"Uploaded test data to: {s3_test_data}")

Uploaded test data to: s3://aws-ml-data-model/linear-learner/test/linear-test-data


#### 3. Train Model

In [45]:
from sagemaker.estimator import Estimator

container = sagemaker.image_uris.retrieve('linear-learner', session.boto_region_name)

linear_estimator = Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m4.xlarge',
    output_path=f's3://{bucket}/{prefix}/output',
    sagemaker_session=session
)

linear_estimator.set_hyperparameters(
    predictor_type='regressor',
    mini_batch_size=10,
    epochs=10
)

from sagemaker.inputs import TrainingInput

train_input = TrainingInput(s3_data='s3://aws-ml-data-model/linear-learner/train/linear-train-data', content_type='application/x-recordio-protobuf')
validation_input = TrainingInput(s3_data='s3://aws-ml-data-model/linear-learner/test/linear-test-data',content_type='application/x-recordio-protobuf')

linear_estimator.fit({'train': train_input, 'validation': validation_input})

2025-05-22 14:54:17 Starting - Starting the training job...
2025-05-22 14:54:43 Starting - Preparing the instances for training......
2025-05-22 14:55:24 Downloading - Downloading input data...
2025-05-22 14:55:54 Downloading - Downloading the training image......
2025-05-22 14:57:15 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[05/22/2025 14:57:26 INFO 140317557905216] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'opt

 #### 4. Deploy

In [48]:
predictor = linear_estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

-------------!

#### 5. Inference

In [50]:
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer
import numpy as np

predictor.serializer = CSVSerializer()
predictor.deserializer = JSONDeserializer()

sample = np.array([[32.0]], dtype='float32')

response = predictor.predict(sample)
print(response)

{'predictions': [{'score': 732.4500732421875}]}


In [51]:
# Cleanup
predictor.delete_endpoint()