In [101]:
import pandas as pd
import numpy as np
import boto3
from sklearn.model_selection import train_test_split
import sagemaker
from sagemaker import Session
import io
import sagemaker.amazon.common as smac
import os
from sagemaker.amazon.amazon_estimator import get_image_uri

In [102]:
df = pd.read_csv("student_scores.csv")
df.head()

Unnamed: 0,Hours,Scores
0,2.5,21
1,5.1,47
2,3.2,27
3,8.5,75
4,3.5,30


In [103]:
df.shape

(25, 2)

In [104]:
x = df[["Hours"]]
y = df[["Scores"]]

In [105]:
x.dtypes
x = x.astype("float32")
y = y.astype("float32")

In [106]:
y.dtypes

Scores    float32
dtype: object

In [107]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2)

In [108]:
x_train = x_train.reset_index(drop = True)
y_train = y_train.reset_index(drop = True) 
x_test = x_test.reset_index(drop = True)
y_test = y_test.reset_index(drop = True)

In [109]:
x_train

Unnamed: 0,Hours
0,1.1
1,8.3
2,4.8
3,3.3
4,1.5
5,5.9
6,2.5
7,3.8
8,5.5
9,9.2


In [110]:
y_train

Unnamed: 0,Scores
0,17.0
1,81.0
2,54.0
3,42.0
4,20.0
5,62.0
6,30.0
7,35.0
8,60.0
9,88.0


In [111]:
y_train = y_train.iloc[:,0]
y_train

0     17.0
1     81.0
2     54.0
3     42.0
4     20.0
5     62.0
6     30.0
7     35.0
8     60.0
9     88.0
10    85.0
11    95.0
12    21.0
13    41.0
14    47.0
15    25.0
16    30.0
17    86.0
18    69.0
19    24.0
Name: Scores, dtype: float32

In [112]:
y_test = y_test.iloc[:,0]
y_test

0    27.0
1    76.0
2    75.0
3    30.0
4    67.0
Name: Scores, dtype: float32

In [113]:
sagemaker_session = sagemaker.Session()
bucket_name = "buckett-sagemaker"
prefix = "linear-learner"
role = sagemaker.get_execution_role()

In [114]:
x_train = np.array(x_train)

In [115]:
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,x_train,y_train)
buf.seek(0)

0

In [116]:
key = "student-data"
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'train',key)).upload_fileobj(buf)
s3_train_data = f"s3://{bucket_name}/{prefix}/train/{key}"
print("Data uploaded",s3_train_data)

Data uploaded s3://buckett-sagemaker/linear-learner/train/student-data


In [117]:
x_test = np.array(x_test)
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,x_test,y_test)
buf.seek(0)
key = "student-data-test"
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'test',key)).upload_fileobj(buf)
s3_test_data = f"s3://{bucket_name}/{prefix}/test/{key}"
print("Data uploaded",s3_train_data)

Data uploaded s3://buckett-sagemaker/linear-learner/train/student-data


In [118]:
output_location = f"s3://{bucket_name}/{prefix}/output"
output_location

's3://buckett-sagemaker/linear-learner/output'

In [120]:
container = sagemaker.image_uris.retrieve("linear-learner",boto3.Session().region_name)

[2;36m[06/19/25 15:13:57][0m[2;36m [0m[1;94mINFO    [0m Same images used for training and ]8;id=200792;file:///opt/conda/lib/python3.11/site-packages/sagemaker/image_uris.py\[2mimage_uris.py[0m]8;;\[2m:[0m]8;id=312247;file:///opt/conda/lib/python3.11/site-packages/sagemaker/image_uris.py#393\[2m393[0m]8;;\
[2;36m                    [0m         inference. Defaulting to image    [2m                 [0m
[2;36m                    [0m         scope: inference.                 [2m                 [0m
[2;36m                   [0m[2;36m [0m[1;94mINFO    [0m Ignoring unnecessary instance     ]8;id=922857;file:///opt/conda/lib/python3.11/site-packages/sagemaker/image_uris.py\[2mimage_uris.py[0m]8;;\[2m:[0m]8;id=564481;file:///opt/conda/lib/python3.11/site-packages/sagemaker/image_uris.py#530\[2m530[0m]8;;\
[2;36m                    [0m         type: [3;35mNone[0m.                       [2m                 [0m


In [124]:
linear = sagemaker.estimator.Estimator(container,
                                       role,
                                       instance_count=1,
                                       instance_type="ml.c4.xlarge",  
                                       output_path=output_location,
                                       sagemaker_session=sagemaker_session)


In [126]:
linear.set_hyperparameters(feature_dim = 1,
                           predictor_type = "regressor",
                           mini_batch_size = 4,
                           epochs = 6,
                           num_models = 32,
                           loss = "absolute_loss")           

In [127]:
linear.fit({"train":s3_train_data})

[2;36m[06/19/25 15:30:29][0m[2;36m [0m[1;94mINFO    [0m SageMaker Python SDK will   ]8;id=551010;file:///opt/conda/lib/python3.11/site-packages/sagemaker/telemetry/telemetry_logging.py\[2mtelemetry_logging.py[0m]8;;\[2m:[0m]8;id=964503;file:///opt/conda/lib/python3.11/site-packages/sagemaker/telemetry/telemetry_logging.py#91\[2m91[0m]8;;\
[2;36m                    [0m         collect telemetry to help   [2m                       [0m
[2;36m                    [0m         us better understand our    [2m                       [0m
[2;36m                    [0m         user's needs, diagnose      [2m                       [0m
[2;36m                    [0m         issues, and deliver         [2m                       [0m
[2;36m                    [0m         additional features.        [2m                       [0m
[2;36m                    [0m         To opt out of telemetry,    [2m                       [0m
[2;36m                    [0m        

In [129]:
linear_regressor = linear.deploy(initial_instance_count = 1, instance_type = "ml.m4.xlarge")

[2;36m[06/19/25 15:52:49][0m[2;36m [0m[1;94mINFO    [0m Creating model with name:           ]8;id=813661;file:///opt/conda/lib/python3.11/site-packages/sagemaker/session.py\[2msession.py[0m]8;;\[2m:[0m]8;id=715971;file:///opt/conda/lib/python3.11/site-packages/sagemaker/session.py#4094\[2m4094[0m]8;;\
[2;36m                    [0m         linear-learner-[1;36m2025[0m-06-19-15-52-49- [2m               [0m
[2;36m                    [0m         775                                 [2m               [0m
[2;36m[06/19/25 15:52:50][0m[2;36m [0m[1;94mINFO    [0m Creating endpoint-config with name  ]8;id=202151;file:///opt/conda/lib/python3.11/site-packages/sagemaker/session.py\[2msession.py[0m]8;;\[2m:[0m]8;id=393293;file:///opt/conda/lib/python3.11/site-packages/sagemaker/session.py#5889\[2m5889[0m]8;;\
[2;36m                    [0m         linear-learner-[1;36m2025[0m-06-19-15-52-49- [2m               [0m
[2;36m                    [0m 

In [134]:
linear_regressor.serializer = sagemaker.serializers.CSVSerializer()
linear_regressor.deserializer = sagemaker.deserializers.JSONDeserializer()

In [135]:
results = linear_regressor.predict(x_test)

In [136]:
results

{'predictions': [{'score': 26.486217498779297},
  {'score': 71.33539581298828},
  {'score': 90.72962951660156},
  {'score': 20.425518035888672},
  {'score': 61.638267517089844}]}

In [138]:
predictions = np.array([i["score"] for i in results["predictions"]])

In [139]:
predictions

array([26.4862175 , 71.33539581, 90.72962952, 20.42551804, 61.63826752])