In [72]:
import pandas as pd
import numpy as np 
import boto3 
import sagemaker 
from sagemaker import Session
import io 
import sagemaker.amazon.common as smac 
import os 
from sagemaker.amazon.amazon_estimator import get_image_uri 
from sklearn.model_selection import train_test_split


In [73]:
df= pd.read_csv("student_scores.csv") 
df.head() 

Unnamed: 0,Hours,Scores
0,2.5,21
1,5.1,47
2,3.2,27
3,8.5,75
4,3.5,30


In [74]:
df.shape 

(25, 2)

In [75]:
x= df[["Hours"]]
y=df[["Scores"]] 


In [76]:
print(x.dtypes)
x=x.astype("float32")
y=y.astype("float32")


Hours    float64
dtype: object


In [78]:
x.dtypes
y.dtypes

Scores    float32
dtype: object

In [79]:
X_train, X_test, y_train , y_test= train_test_split(x,y, test_size=0.2)

In [80]:
X_train=X_train.reset_index(drop=True)
y_train=y_train.reset_index(drop=True) 
X_test=X_test.reset_index(drop=True) 
y_test=y_test.reset_index(drop=True)

In [81]:
X_train

Unnamed: 0,Hours
0,9.2
1,4.8
2,2.5
3,3.2
4,1.1
5,8.5
6,2.7
7,7.8
8,7.4
9,5.9


In [82]:
y_train.shape

(20, 1)

In [83]:
y_train=y_train.iloc[:,0]

In [84]:
y_train.shape

(20,)

In [85]:
y_test=y_test.iloc[:,0]

In [86]:
y_test

0    60.0
1    76.0
2    21.0
3    35.0
4    95.0
Name: Scores, dtype: float32

In [87]:
y_test.shape

(5,)

In [88]:
#creating sagemaker session
sagemaker_session= sagemaker.Session() 
bucket_name= "sagemakerbucket25"
prefix= "linear-learner"
role= sagemaker.get_execution_role() 

In [89]:
X_train=np.array(X_train)

In [90]:
X_train

array([[9.2],
       [4.8],
       [2.5],
       [3.2],
       [1.1],
       [8.5],
       [2.7],
       [7.8],
       [7.4],
       [5.9],
       [1.9],
       [3.5],
       [5.1],
       [2.7],
       [3.3],
       [6.1],
       [1.5],
       [4.5],
       [8.3],
       [7.7]], dtype=float32)

In [91]:
# creating buffer for test data
buf=io.BytesIO() 
smac.write_numpy_to_dense_tensor(buf,X_train,y_train) 
buf.seek(0)

0

In [92]:
#define the name of thefile 
key="student-data"

#code to upload in s3 bucket
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'train',key)).upload_fileobj(buf) 

#path of our data
s3_train_data=f"s3://{bucket_name}/{prefix}/train/{key}"

print("Data Uploaded", s3_train_data)

Data Uploaded s3://sagemakerbucket25/linear-learner/train/student-data


In [93]:
# same way for X_test data
X_test=np.array(X_test)

In [94]:
X_test

array([[5.5],
       [6.9],
       [2.5],
       [3.8],
       [8.9]], dtype=float32)

In [95]:
X_test.shape

(5, 1)

In [96]:
# creating buffer for test data
buf=io.BytesIO() 
smac.write_numpy_to_dense_tensor(buf,X_test,y_test) 
buf.seek(0)

0

In [97]:
#define the name of thefile 
key="student-data-test"

#code to upload in s3 bucket
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'test',key)).upload_fileobj(buf) 

#path of our data
s3_test_data=f"s3://{bucket_name}/{prefix}/test/{key}"

print("Data Uploaded", s3_test_data)

Data Uploaded s3://sagemakerbucket25/linear-learner/test/student-data-test


In [98]:
# output location
output_location=f"s3://{bucket_name}/{prefix}/output"

In [99]:
output_location

's3://sagemakerbucket25/linear-learner/output'

In [100]:
#bring the container 
container=sagemaker.image_uris.retrieve("linear-learner",boto3.Session().region_name) 

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [101]:
container

'669576153137.dkr.ecr.eu-north-1.amazonaws.com/linear-learner:1'

In [102]:
#define the estimator
linear= sagemaker.estimator.Estimator(container,role,instance_count=1,instance_type="ml.m5.large",output_path=output_location,sagemaker_session=sagemaker_session) 

In [103]:
linear

<sagemaker.estimator.Estimator at 0x7f15c54de210>

In [104]:
linear.set_hyperparameters(feature_dim=1,predictor_type="regressor",mini_batch_size=4,epochs=6,num_models=32,loss="absolute_loss")

In [105]:
#fit the model
linear.fit({"train":s3_train_data})

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: linear-learner-2025-07-11-09-33-58-846


2025-07-11 09:33:59 Starting - Starting the training job...
2025-07-11 09:34:20 Starting - Preparing the instances for training...
2025-07-11 09:34:41 Downloading - Downloading input data...
2025-07-11 09:35:21 Downloading - Downloading the training image........[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[07/11/2025 09:36:47 INFO 139681705682752] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'auto', 'margin': '1.0', 'quantile': '0.5', 'loss_insensitivity':

In [107]:
#deploy the model
linear_regression=linear.deploy(initial_instance_count=1,instance_type="ml.m5.xlarge") 

INFO:sagemaker:Creating model with name: linear-learner-2025-07-11-09-43-06-860
INFO:sagemaker:Creating endpoint-config with name linear-learner-2025-07-11-09-43-06-860
INFO:sagemaker:Creating endpoint with name linear-learner-2025-07-11-09-43-06-860


------!

In [108]:
linear_regression.serializer=sagemaker.serializers.CSVSerializer() 
linear_regression.deserializer=sagemaker.deserializers.JSONDeserializer() 

In [109]:
#prediction
result=linear_regression.predict(X_test)

In [110]:
result

{'predictions': [{'score': 58.193397521972656},
  {'score': 70.76435852050781},
  {'score': 31.2556095123291},
  {'score': 42.92864990234375},
  {'score': 88.72288513183594}]}

In [112]:
predictions=np.array([i["score"] for i in result["predictions"]]) 

In [113]:
predictions

array([58.19339752, 70.76435852, 31.25560951, 42.9286499 , 88.72288513])