In [1]:
import pandas as pd

# Create a simple dataset
data = {
    'feature1': [1, 2, 3, 4, 5],
    'feature2': [5, 4, 3, 2, 1],
    'target': [1, 0, 1, 0, 1]
}
df = pd.DataFrame(data)

# Save the dataset to a CSV file
df.to_csv("simple-data.csv", index=False)

# Upload to S3
import boto3

bucket = 'wine-bucket-quality'
s3_client = boto3.client('s3')
s3_client.upload_file("simple-data.csv", bucket, "simple-data.csv")


In [2]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Ensure the IAM role is correct
role = 'arn:aws:iam::381491862757:role/SageMakerExecutionRole'  # Replace with your role ARN

# S3 bucket name
bucket = 'wine-bucket-quality'

# Specify the framework version
framework_version = '0.23-1'  # Use the appropriate version based on your requirements

# Create the SKLearn estimator
sklearn_estimator = SKLearn(
    entry_point='train.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version=framework_version,
    py_version='py3',
    script_mode=True
)

# Start the training job
sklearn_estimator.fit({'train': f's3://{bucket}/simple-data.csv'})


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/studio-lab-user/.config/sagemaker/config.yaml


INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-05-31-21-07-35-949


2024-05-31 21:07:36 Starting - Starting the training job...
2024-05-31 21:07:53 Starting - Preparing the instances for training...
2024-05-31 21:08:16 Downloading - Downloading input data...
2024-05-31 21:08:41 Downloading - Downloading the training image...
2024-05-31 21:09:32 Training - Training image download completed. Training in progress.
2024-05-31 21:09:32 Uploading - Uploading generated training model[34m2024-05-31 21:09:25,909 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-05-31 21:09:25,912 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:09:25,955 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-05-31 21:09:26,103 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:09:26,115 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-

In [3]:
# Download the simple dataset from S3
s3_client.download_file(bucket, 'simple-data.csv', 'simple-data_downloaded.csv')

# Read the downloaded dataset file
simple_data_downloaded = pd.read_csv('simple-data_downloaded.csv')

# Display the first few rows of the dataset
print("Simple Dataset:")
print(simple_data_downloaded.head())

# Check for the model file (assuming it was saved in a specific path, e.g., 'output/model.tar.gz')
try:
    s3_client.download_file(bucket, 'output/model.tar.gz', 'model_downloaded.tar.gz')
    print("Model file 'model.tar.gz' downloaded successfully.")
except Exception as e:
    print(f"Error downloading model file: {str(e)}")


Simple Dataset:
   feature1  feature2  target
0         1         5       1
1         2         4       0
2         3         3       1
3         4         2       0
4         5         1       1
Error downloading model file: An error occurred (404) when calling the HeadObject operation: Not Found


In [4]:
import boto3

# S3 bucket name
bucket = 'wine-bucket-quality'

# Initialize the S3 client
s3_client = boto3.client('s3')

# List the objects in the S3 bucket
response = s3_client.list_objects_v2(Bucket=bucket)

# Print the keys (file names) in the bucket
if 'Contents' in response:
    print("Files in the bucket:")
    for obj in response['Contents']:
        print(obj['Key'])
else:
    print("No files found in the bucket.")


Files in the bucket:
model/linear_regression_model.pkl
models/linear_regression_model.pkl
models/model.tar.gz
sagemaker/sklearn-wine/linear_regression_model.tar.gz
sagemaker/sklearn-wine/model/linear_regression_model.pkl
simple-data.csv
wine-quality-model/model.joblib
winequality-red.csv
winequality-red2.csv
winequality-white.csv
winequality-white2.csv


In [6]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Ensure the IAM role is correct
role = 'arn:aws:iam::381491862757:role/SageMakerExecutionRole'  # Replace with your role ARN

# S3 bucket name
bucket = 'wine-bucket-quality'

# Specify the framework version
framework_version = '0.23-1'  # Use the appropriate version based on your requirements

# Create the SKLearn estimator
sklearn_estimator = SKLearn(
    entry_point='simple_train.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version=framework_version,
    py_version='py3',
    script_mode=True,
    output_path=f's3://{bucket}/output'
)

# Start the training job with the simple dataset
sklearn_estimator.fit({'train': f's3://{bucket}/simple-data.csv'})


INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-05-31-21-19-31-278


2024-05-31 21:19:31 Starting - Starting the training job...
2024-05-31 21:19:46 Starting - Preparing the instances for training...
2024-05-31 21:20:11 Downloading - Downloading input data...
2024-05-31 21:20:35 Downloading - Downloading the training image...
2024-05-31 21:21:29 Training - Training image download completed. Training in progress.
2024-05-31 21:21:29 Uploading - Uploading generated training model[34m2024-05-31 21:21:20,029 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-05-31 21:21:20,032 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:21:20,071 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-05-31 21:21:20,239 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:21:20,251 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-

In [7]:
# Check the latest training job
training_job_name = sklearn_estimator.latest_training_job.name
print(f"Training job name: {training_job_name}")

# Describe the training job
training_job_description = sagemaker_session.describe_training_job(training_job_name)
print(training_job_description)

# Check the output path
output_path = training_job_description['OutputDataConfig']['S3OutputPath']
print(f"Output path: {output_path}")


Training job name: sagemaker-scikit-learn-2024-05-31-21-19-31-278
{'TrainingJobName': 'sagemaker-scikit-learn-2024-05-31-21-19-31-278', 'TrainingJobArn': 'arn:aws:sagemaker:us-east-2:381491862757:training-job/sagemaker-scikit-learn-2024-05-31-21-19-31-278', 'ModelArtifacts': {'S3ModelArtifacts': 's3://wine-bucket-quality/output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/output/model.tar.gz'}, 'TrainingJobStatus': 'Completed', 'SecondaryStatus': 'Completed', 'HyperParameters': {'sagemaker_container_log_level': '20', 'sagemaker_job_name': '"sagemaker-scikit-learn-2024-05-31-21-19-31-278"', 'sagemaker_program': '"simple_train.py"', 'sagemaker_region': '"us-east-2"', 'sagemaker_submit_directory': '"s3://wine-bucket-quality/sagemaker-scikit-learn-2024-05-31-21-19-31-278/source/sourcedir.tar.gz"'}, 'AlgorithmSpecification': {'TrainingImage': '257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3', 'TrainingInputMode': 'File', 'EnableSageMakerMetricsTimeSeries

In [8]:
import boto3

# S3 bucket name
bucket = 'wine-bucket-quality'

# Initialize the S3 client
s3_client = boto3.client('s3')

# List the objects in the S3 bucket
response = s3_client.list_objects_v2(Bucket=bucket, Prefix='output/')

# Print the keys (file names) in the bucket
if 'Contents' in response:
    print("Files in the bucket after training:")
    for obj in response['Contents']:
        print(obj['Key'])
else:
    print("No files found in the bucket.")


Files in the bucket after training:
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/debug-output/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/framework/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/incremental/2024053121/1717190400.algo-1.json
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/incremental/2024053121/1717190460.algo-1.json
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/training_job_end.ts


In [11]:
import os
import joblib
model_path = os.path.join("/opt/ml/model", "model.joblib")
joblib.dump(model, model_path)


NameError: name 'model' is not defined

In [13]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Ensure the IAM role is correct
role = 'arn:aws:iam::381491862757:role/SageMakerExecutionRole'  # Replace with your role ARN

# S3 bucket name
bucket = 'wine-bucket-quality'

# Create the SKLearn estimator
sklearn_estimator = SKLearn(
    entry_point='write_text.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='0.23-1',
    py_version='py3',
    script_mode=True,
    output_path=f's3://{bucket}/output'
)

# Start the training job
sklearn_estimator.fit({'train': f's3://{bucket}/simple-data.csv'})


INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-05-31-21-31-16-263


2024-05-31 21:31:16 Starting - Starting the training job...
2024-05-31 21:31:30 Starting - Preparing the instances for training...
2024-05-31 21:31:54 Downloading - Downloading input data...
2024-05-31 21:32:19 Downloading - Downloading the training image...
2024-05-31 21:33:10 Training - Training image download completed. Training in progress.
2024-05-31 21:33:10 Uploading - Uploading generated training model[34m2024-05-31 21:33:05,295 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-05-31 21:33:05,298 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:33:05,343 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-05-31 21:33:05,528 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:33:05,540 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-

In [14]:
import boto3

# S3 bucket name
bucket = 'wine-bucket-quality'

# Initialize the S3 client
s3_client = boto3.client('s3')

# List the objects in the S3 bucket
response = s3_client.list_objects_v2(Bucket=bucket, Prefix='output/')

# Print the keys (file names) in the bucket
if 'Contents' in response:
    print("Files in the bucket after training:")
    for obj in response['Contents']:
        print(obj['Key'])
else:
    print("No files found in the bucket.")


Files in the bucket after training:
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/debug-output/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/framework/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/incremental/2024053121/1717190400.algo-1.json
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/incremental/2024053121/1717190460.algo-1.json
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/debug-output/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/output/model.tar.gz
output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/profiler-output/framework/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/profiler-output/system/incremental/2024053121/1717191060.algo-1.json
output/sagemaker-scikit-learn-2024-05-31-21-31-16-

In [15]:
import boto3
import tarfile
import os

# S3 bucket name
bucket = 'wine-bucket-quality'
model_tar_path = 'output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/output/model.tar.gz'
local_model_tar_path = 'model.tar.gz'
local_model_dir = 'model'

# Initialize the S3 client
s3_client = boto3.client('s3')

# Download the model tar file from S3
s3_client.download_file(bucket, model_tar_path, local_model_tar_path)

# Extract the tar file
if not os.path.exists(local_model_dir):
    os.makedirs(local_model_dir)

with tarfile.open(local_model_tar_path, 'r:gz') as tar:
    tar.extractall(path=local_model_dir)

# List the extracted files
extracted_files = os.listdir(local_model_dir)
print("Extracted files:", extracted_files)


Extracted files: ['test.txt']


In [16]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Ensure the IAM role is correct
role = 'arn:aws:iam::381491862757:role/SageMakerExecutionRole'  # Replace with your role ARN

# S3 bucket name
bucket = 'wine-bucket-quality'

# Create the SKLearn estimator
sklearn_estimator = SKLearn(
    entry_point='train_wine_quality.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='0.23-1',
    py_version='py3',
    script_mode=True,
    output_path=f's3://{bucket}/output'
)

# Start the training job with the wine quality dataset
sklearn_estimator.fit({'train': f's3://{bucket}/winequality-red.csv'})


INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-05-31-21-36-51-579


2024-05-31 21:36:51 Starting - Starting the training job...
2024-05-31 21:37:06 Starting - Preparing the instances for training...
2024-05-31 21:37:33 Downloading - Downloading input data...
2024-05-31 21:37:58 Downloading - Downloading the training image...
2024-05-31 21:38:49 Training - Training image download completed. Training in progress.
2024-05-31 21:38:49 Uploading - Uploading generated training model[34m2024-05-31 21:38:42,334 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-05-31 21:38:42,338 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:38:42,383 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-05-31 21:38:42,546 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:38:42,558 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-

UnexpectedStatusException: Error for Training job sagemaker-scikit-learn-2024-05-31-21-36-51-579: Failed. Reason: AlgorithmError: framework error: 
Traceback (most recent call last):
  File "/miniconda3/lib/python3.7/site-packages/sagemaker_containers/_trainer.py", line 84, in train
    entrypoint()
  File "/miniconda3/lib/python3.7/site-packages/sagemaker_sklearn_container/training.py", line 39, in main
    train(environment.Environment())
  File "/miniconda3/lib/python3.7/site-packages/sagemaker_sklearn_container/training.py", line 35, in train
    runner_type=runner.ProcessRunnerType)
  File "/miniconda3/lib/python3.7/site-packages/sagemaker_training/entry_point.py", line 100, in run
    wait, capture_error
  File "/miniconda3/lib/python3.7/site-packages/sagemaker_training/process.py", line 291, in run
    cwd=environment.code_dir,
  File "/miniconda3/lib/python3.7/site-packages/sagemaker_training/process.py", line 208, in check_error
    info=extra_info,
sagemaker_training.errors.ExecuteUserScriptError: ExecuteUserScriptError:
ExitCode 1
ErrorMessage ""
Command "/miniconda3/bin/python train_wine_quality.py"

ExecuteU

In [17]:
import boto3

# Initialize the SageMaker client
sagemaker_client = boto3.client('sagemaker')

# Get the name of the last training job
training_job_name = sklearn_estimator.latest_training_job.name

# Describe the training job
training_job_description = sagemaker_client.describe_training_job(TrainingJobName=training_job_name)

# Print the training job description to get the log paths
print(training_job_description)

# Retrieve logs from CloudWatch
log_group = '/aws/sagemaker/TrainingJobs'
log_stream_prefix = training_job_name

logs_client = boto3.client('logs')
log_streams = logs_client.describe_log_streams(
    logGroupName=log_group,
    logStreamNamePrefix=log_stream_prefix
)

for log_stream in log_streams['logStreams']:
    log_stream_name = log_stream['logStreamName']
    log_events = logs_client.get_log_events(
        logGroupName=log_group,
        logStreamName=log_stream_name
    )
    for event in log_events['events']:
        print(event['message'])


{'TrainingJobName': 'sagemaker-scikit-learn-2024-05-31-21-36-51-579', 'TrainingJobArn': 'arn:aws:sagemaker:us-east-2:381491862757:training-job/sagemaker-scikit-learn-2024-05-31-21-36-51-579', 'ModelArtifacts': {'S3ModelArtifacts': 's3://wine-bucket-quality/output/sagemaker-scikit-learn-2024-05-31-21-36-51-579/output/model.tar.gz'}, 'TrainingJobStatus': 'Failed', 'SecondaryStatus': 'Failed', 'FailureReason': 'AlgorithmError: framework error: \nTraceback (most recent call last):\n  File "/miniconda3/lib/python3.7/site-packages/sagemaker_containers/_trainer.py", line 84, in train\n    entrypoint()\n  File "/miniconda3/lib/python3.7/site-packages/sagemaker_sklearn_container/training.py", line 39, in main\n    train(environment.Environment())\n  File "/miniconda3/lib/python3.7/site-packages/sagemaker_sklearn_container/training.py", line 35, in train\n    runner_type=runner.ProcessRunnerType)\n  File "/miniconda3/lib/python3.7/site-packages/sagemaker_training/entry_point.py", line 100, in ru

In [18]:
import pandas as pd

# URLs for the datasets
red_wine_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
white_wine_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"

# Read the datasets
red_wine = pd.read_csv(red_wine_url, sep=';')
white_wine = pd.read_csv(white_wine_url, sep=';')

# Display the first few rows of each dataset
print("Red Wine Dataset:")
print(red_wine.head())

print("\nWhite Wine Dataset:")
print(white_wine.head())


Red Wine Dataset:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2 

In [19]:
# Check for missing values in the red wine dataset
print("Red Wine Dataset Missing Values:")
print(red_wine.isnull().sum())

# Check for missing values in the white wine dataset
print("\nWhite Wine Dataset Missing Values:")
print(white_wine.isnull().sum())

# If there are missing values, fill or drop them
# For simplicity, we'll drop any rows with missing values (if any)
red_wine_cleaned = red_wine.dropna()
white_wine_cleaned = white_wine.dropna()

# Display the cleaned datasets
print("\nCleaned Red Wine Dataset:")
print(red_wine_cleaned.head())

print("\nCleaned White Wine Dataset:")
print(white_wine_cleaned.head())


Red Wine Dataset Missing Values:
fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

White Wine Dataset Missing Values:
fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

Cleaned Red Wine Dataset:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8    

In [20]:
# Expected column names
expected_columns = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 
                    'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 
                    'pH', 'sulphates', 'alcohol', 'quality']

# Verify column names for the red wine dataset
print("\nRed Wine Dataset Column Names:")
print(red_wine_cleaned.columns)

# Verify column names for the white wine dataset
print("\nWhite Wine Dataset Column Names:")
print(white_wine_cleaned.columns)



Red Wine Dataset Column Names:
Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

White Wine Dataset Column Names:
Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')


In [21]:
import boto3

# Save the cleaned datasets to local files
red_wine_cleaned.to_csv("winequality-red-cleaned.csv", index=False)
white_wine_cleaned.to_csv("winequality-white-cleaned.csv", index=False)

# S3 bucket name
bucket = 'wine-bucket-quality'

# Initialize the S3 client
s3_client = boto3.client('s3')

# Upload the cleaned datasets to S3
s3_client.upload_file("winequality-red-cleaned.csv", bucket, "winequality-red-cleaned.csv")
s3_client.upload_file("winequality-white-cleaned.csv", bucket, "winequality-white-cleaned.csv")

print("Cleaned datasets uploaded to S3.")


Cleaned datasets uploaded to S3.


In [22]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn

# Initialize the SageMaker session
sagemaker_session = sagemaker.Session()

# Ensure the IAM role is correct
role = 'arn:aws:iam::381491862757:role/SageMakerExecutionRole'  # Replace with your role ARN

# S3 bucket name
bucket = 'wine-bucket-quality'

# Create the SKLearn estimator
sklearn_estimator = SKLearn(
    entry_point='train_wine_quality.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='0.23-1',
    py_version='py3',
    script_mode=True,
    output_path=f's3://{bucket}/output'
)

# Start the training job with the cleaned dataset
sklearn_estimator.fit({'train': f's3://{bucket}/winequality-red-cleaned.csv'})


INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-05-31-21-44-41-396


2024-05-31 21:44:41 Starting - Starting the training job...
2024-05-31 21:44:55 Starting - Preparing the instances for training...
2024-05-31 21:45:21 Downloading - Downloading input data...
2024-05-31 21:45:46 Downloading - Downloading the training image...
2024-05-31 21:46:37 Training - Training image download completed. Training in progress.
2024-05-31 21:46:37 Uploading - Uploading generated training model[34m2024-05-31 21:46:32,143 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-05-31 21:46:32,147 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:46:32,194 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-05-31 21:46:32,391 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-05-31 21:46:32,403 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-

In [23]:
import boto3

# S3 bucket name
bucket = 'wine-bucket-quality'

# Initialize the S3 client
s3_client = boto3.client('s3')

# List the objects in the S3 bucket
response = s3_client.list_objects_v2(Bucket=bucket, Prefix='output/')

# Print the keys (file names) in the bucket
if 'Contents' in response:
    print("Files in the bucket after training:")
    for obj in response['Contents']:
        print(obj['Key'])
else:
    print("No files found in the bucket.")


Files in the bucket after training:
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/debug-output/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/framework/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/incremental/2024053121/1717190400.algo-1.json
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/incremental/2024053121/1717190460.algo-1.json
output/sagemaker-scikit-learn-2024-05-31-21-19-31-278/profiler-output/system/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/debug-output/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/output/model.tar.gz
output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/profiler-output/framework/training_job_end.ts
output/sagemaker-scikit-learn-2024-05-31-21-31-16-263/profiler-output/system/incremental/2024053121/1717191060.algo-1.json
output/sagemaker-scikit-learn-2024-05-31-21-31-16-

In [24]:
# Deploy the model
predictor = sklearn_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

print("Model deployed successfully.")


INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-05-31-21-48-17-591
INFO:sagemaker:Creating endpoint-config with name sagemaker-scikit-learn-2024-05-31-21-48-17-591
INFO:sagemaker:Creating endpoint with name sagemaker-scikit-learn-2024-05-31-21-48-17-591


-----!Model deployed successfully.


In [25]:
import numpy as np

# Sample data for prediction (use data in the same format as training)
sample_data = {
    'fixed acidity': 7.4,
    'volatile acidity': 0.7,
    'citric acid': 0,
    'residual sugar': 1.9,
    'chlorides': 0.076,
    'free sulfur dioxide': 11,
    'total sulfur dioxide': 34,
    'density': 0.9978,
    'pH': 3.51,
    'sulphates': 0.56,
    'alcohol': 9.4
}

# Convert sample data to a pandas DataFrame
sample_df = pd.DataFrame([sample_data])

# Make a prediction
prediction = predictor.predict(sample_df)
print(f"Predicted quality: {prediction}")


Predicted quality: [5.04834495]


In [26]:
# Delete the endpoint
predictor.delete_endpoint()
print("Endpoint deleted successfully.")


INFO:sagemaker:Deleting endpoint configuration with name: sagemaker-scikit-learn-2024-05-31-21-48-17-591
INFO:sagemaker:Deleting endpoint with name: sagemaker-scikit-learn-2024-05-31-21-48-17-591


Endpoint deleted successfully.


In [27]:
pip freeze

anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1708355285029/work
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1692818318753/work
argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1695386548994/work
arrow @ file:///home/conda/feedstock_root/build_artifacts/arrow_1696128962909/work
async-lru @ file:///home/conda/feedstock_root/build_artifacts/async-lru_1690563019058/work
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1704011227531/work
awscli==1.32.116
Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1702422572539/work
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1705564648255/work
bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1696630167146/work
boto3==1.34.113
botocore==1.34.116
Brotli @ file:///home/conda/feedstock_root

In [5]:
import sagemaker
from sagemaker.estimator import Estimator

# Replace with your role ARN
role = "arn:aws:iam::381491862757:role/SageMakerExecutionRole"

# Define the estimator
estimator = Estimator(
    image_uri="381491862757.dkr.ecr.us-east-2.amazonaws.com/wine-predictor:latest",
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://wine-bucket-quality/output",  # Replace with your bucket
    sagemaker_session=sagemaker.Session()
)

# Start the training job
estimator.fit(inputs={"train": "s3://wine-bucket-quality/winequality-red-cleaned.csv"})  # Replace with your bucket and data path


INFO:sagemaker:Creating training-job with name: wine-predictor-2024-06-01-00-00-59-750


2024-06-01 00:00:59 Starting - Starting the training job...
2024-06-01 00:01:16 Starting - Preparing the instances for training...
2024-06-01 00:01:43 Downloading - Downloading input data...
2024-06-01 00:02:21 Training - Training image download completed. Training in progress.
2024-06-01 00:02:21 Uploading - Uploading generated training model
2024-06-01 00:02:21 Completed - Training job completed
..Training seconds: 38
Billable seconds: 38


In [10]:
import sagemaker
from sagemaker.estimator import Estimator

role = "arn:aws:iam::381491862757:role/SageMakerExecutionRole"

# Define the estimator
estimator = Estimator(
    image_uri="381491862757.dkr.ecr.us-east-2.amazonaws.com/wine-predictor:latest",
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://wine-bucket-quality/output",  # Replace with your bucket
    sagemaker_session=sagemaker.Session()
)

# Start the training job
estimator.fit(inputs={"train": "s3://wine-bucket-quality/winequality-red-cleaned.csv"})  # Replace with your bucket and data path


INFO:sagemaker:Creating training-job with name: wine-predictor-2024-06-01-01-41-08-684


2024-06-01 01:41:08 Starting - Starting the training job...
2024-06-01 01:41:27 Starting - Preparing the instances for training...
2024-06-01 01:41:49 Downloading - Downloading input data...
2024-06-01 01:42:37 Training - Training image download completed. Training in progress.
2024-06-01 01:42:37 Uploading - Uploading generated training model
2024-06-01 01:42:37 Completed - Training job completed
..Training seconds: 48
Billable seconds: 48


In [12]:
import sagemaker
from sagemaker.estimator import Estimator

role = "arn:aws:iam::381491862757:role/SageMakerExecutionRole"

# Define the estimator
estimator = Estimator(
    image_uri="381491862757.dkr.ecr.us-east-2.amazonaws.com/wine-predictor:latest",
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://wine-bucket-quality/output",  # Replace with your bucket
    sagemaker_session=sagemaker.Session()
)

# Start the training job
estimator.fit(inputs={"train": "s3://wine-bucket-quality/winequality-red-cleaned.csv"})  # Replace with your bucket and data path


INFO:sagemaker:Creating training-job with name: wine-predictor-2024-06-01-02-04-41-443


2024-06-01 02:04:41 Starting - Starting the training job...
2024-06-01 02:05:06 Starting - Preparing the instances for training...
2024-06-01 02:05:28 Downloading - Downloading input data...
2024-06-01 02:06:11 Training - Training image download completed. Training in progress.
2024-06-01 02:06:11 Uploading - Uploading generated training model
2024-06-01 02:06:11 Completed - Training job completed
..Training seconds: 44
Billable seconds: 44


In [1]:
import sagemaker
from sagemaker.estimator import Estimator

role = "arn:aws:iam::381491862757:role/SageMakerExecutionRole"

# Define the estimator
estimator = Estimator(
    image_uri="381491862757.dkr.ecr.us-east-2.amazonaws.com/wine-predictor:latest",
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://wine-bucket-quality/output",  # Replace with your bucket
    sagemaker_session=sagemaker.Session()
)

# Start the training job
estimator.fit(inputs={"train": "s3://wine-bucket-quality/winequality-red-cleaned.csv"})  # Replace with your bucket and data path


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/studio-lab-user/.config/sagemaker/config.yaml


INFO:sagemaker:Creating training-job with name: wine-predictor-2024-06-01-02-40-25-648


2024-06-01 02:40:25 Starting - Starting the training job...
2024-06-01 02:40:40 Starting - Preparing the instances for training...
2024-06-01 02:41:07 Downloading - Downloading input data...
2024-06-01 02:41:47 Training - Training image download completed. Training in progress.
2024-06-01 02:41:47 Uploading - Uploading generated training model[34mContents of /opt/ml/input/data/train:[0m
[34m['winequality-red-cleaned.csv'][0m

2024-06-01 02:42:00 Completed - Training job completed
Training seconds: 53
Billable seconds: 53


In [5]:
import sagemaker
from sagemaker.estimator import Estimator

role = "arn:aws:iam::381491862757:role/SageMakerExecutionRole"

# Define the estimator
estimator = Estimator(
    image_uri="381491862757.dkr.ecr.us-east-2.amazonaws.com/wine-predictor-sagemaker:latest",
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://wine-bucket-quality/output",  # Replace with your bucket
    sagemaker_session=sagemaker.Session()
)

# Start the training job
estimator.fit(inputs={"train": "s3://wine-bucket-quality/winequality-red-cleaned.csv"})


INFO:sagemaker:Creating training-job with name: wine-predictor-sagemaker-2024-06-01-03-47-39-320


2024-06-01 03:47:39 Starting - Starting the training job...
2024-06-01 03:47:53 Starting - Preparing the instances for training...
2024-06-01 03:48:19 Downloading - Downloading input data...
2024-06-01 03:49:05 Training - Training image download completed. Training in progress.
2024-06-01 03:49:05 Uploading - Uploading generated training model.[34mContents of /opt/ml/input/data/train: ['winequality-red-cleaned.csv'][0m
[34mModel saved to /opt/ml/model/model.joblib[0m

2024-06-01 03:49:18 Completed - Training job completed
Training seconds: 59
Billable seconds: 59


In [7]:
import boto3

s3 = boto3.client('s3')

# List objects within the specified path in the bucket
response = s3.list_objects_v2(Bucket='wine-bucket-quality', Prefix='output/wine-predictor-sagemaker-2024-06-01-03-47-39-320/')

for obj in response.get('Contents', []):
    print(obj['Key'])


output/wine-predictor-sagemaker-2024-06-01-03-47-39-320/debug-output/training_job_end.ts
output/wine-predictor-sagemaker-2024-06-01-03-47-39-320/output/model.tar.gz
output/wine-predictor-sagemaker-2024-06-01-03-47-39-320/profiler-output/framework/training_job_end.ts
output/wine-predictor-sagemaker-2024-06-01-03-47-39-320/profiler-output/system/incremental/2024060103/1717213680.algo-1.json
output/wine-predictor-sagemaker-2024-06-01-03-47-39-320/profiler-output/system/incremental/2024060103/1717213740.algo-1.json
output/wine-predictor-sagemaker-2024-06-01-03-47-39-320/profiler-output/system/training_job_end.ts


In [9]:
from sagemaker.model import Model

# Create the model
model = Model(
    model_data="s3://wine-bucket-quality/output/wine-predictor-sagemaker-2024-06-01-03-47-39-320/output/model.tar.gz",  # Ensure this path is correct
    image_uri="381491862757.dkr.ecr.us-east-2.amazonaws.com/wine-predictor-sagemaker:latest",
    role=role,
    sagemaker_session=sagemaker.Session()
)

# Deploy the model to an endpoint
predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large"
)


INFO:sagemaker:Creating model with name: wine-predictor-sagemaker-2024-06-01-04-31-04-618
INFO:sagemaker:Creating endpoint-config with name wine-predictor-sagemaker-2024-06-01-04-31-05-265
INFO:sagemaker:Creating endpoint with name wine-predictor-sagemaker-2024-06-01-04-31-05-265


------------------------------------------*

UnexpectedStatusException: Error hosting endpoint wine-predictor-sagemaker-2024-06-01-04-31-05-265: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.. Try changing the instance type or reference the troubleshooting page https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-troubleshooting.html

In [16]:
import sagemaker
from sagemaker.estimator import Estimator

role = "arn:aws:iam::381491862757:role/SageMakerExecutionRole"
image_uri = "381491862757.dkr.ecr.us-east-2.amazonaws.com/wine-predictor-sagemaker:latest"

estimator = Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://wine-bucket-quality/output",
    sagemaker_session=sagemaker.Session()
)

estimator.fit({"train": "s3://wine-bucket-quality/winequality-red-cleaned.csv"})


INFO:sagemaker:Creating training-job with name: wine-predictor-sagemaker-2024-06-01-06-38-47-329


2024-06-01 06:38:47 Starting - Starting the training job...
2024-06-01 06:39:01 Starting - Preparing the instances for training...
2024-06-01 06:39:47 Downloading - Downloading the training image...
2024-06-01 06:40:08 Training - Training image download completed. Training in progress.
2024-06-01 06:40:08 Uploading - Uploading generated training model...
2024-06-01 06:40:20 Completed - Training job completed
..Training seconds: 53
Billable seconds: 53


In [17]:
import boto3
from botocore.exceptions import NoCredentialsError, PartialCredentialsError, ClientError

s3 = boto3.client('s3')

bucket_name = 'wine-bucket-quality'
file_key = 'winequality-red-cleaned.csv'

try:
    s3.head_object(Bucket=bucket_name, Key=file_key)
    print("File exists.")
except ClientError as e:
    if e.response['Error']['Code'] == '404':
        print("File does not exist.")
    else:
        print("Error occurred:", e)
except (NoCredentialsError, PartialCredentialsError) as e:
    print("Credentials not available:", e)


File exists.


In [18]:
from sagemaker.model import Model

model = Model(
    model_data="s3://wine-bucket-quality/output/wine-predictor-sagemaker-2024-06-01-06-38-47-329/output/model.tar.gz",  # Ensure this path is correct
    image_uri=image_uri,
    role=role,
    sagemaker_session=sagemaker.Session()
)

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large"
)

# Example prediction
response = predictor.predict({
    "fixed_acidity": 7.4, "volatile_acidity": 0.7, "citric_acid": 0, 
    "residual_sugar": 1.9, "chlorides": 0.076, "free_sulfur_dioxide": 11, 
    "total_sulfur_dioxide": 34, "density": 0.9978, "pH": 3.51, 
    "sulphates": 0.56, "alcohol": 9.4
})

print(response)


INFO:sagemaker:Creating model with name: wine-predictor-sagemaker-2024-06-01-06-45-59-472
INFO:sagemaker:Creating endpoint-config with name wine-predictor-sagemaker-2024-06-01-06-46-00-012
INFO:sagemaker:Creating endpoint with name wine-predictor-sagemaker-2024-06-01-06-46-00-012


------------------

KeyboardInterrupt: 

In [19]:
import sagemaker
from sagemaker.estimator import Estimator

role = "arn:aws:iam::381491862757:role/SageMakerExecutionRole"
image_uri = "381491862757.dkr.ecr.us-east-2.amazonaws.com/wine-predictor-sagemaker:latest"

estimator = Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://wine-bucket-quality/output",
    sagemaker_session=sagemaker.Session()
)

estimator.fit({"train": "s3://wine-bucket-quality/winequality-red-cleaned.csv"})


INFO:sagemaker:Creating training-job with name: wine-predictor-sagemaker-2024-06-01-06-56-18-345


2024-06-01 06:56:18 Starting - Starting the training job...
2024-06-01 06:56:37 Starting - Preparing the instances for training...
2024-06-01 06:57:02 Downloading - Downloading input data...
2024-06-01 06:57:47 Training - Training image download completed. Training in progress.
2024-06-01 06:57:47 Uploading - Uploading generated training model...
2024-06-01 06:58:00 Completed - Training job completed
..Training seconds: 59
Billable seconds: 59


In [20]:
from sagemaker.model import Model

model = Model(
    model_data="s3://wine-bucket-quality/output/wine-predictor/output/model.tar.gz",  # Ensure this path is correct
    image_uri=image_uri,
    role=role,
    sagemaker_session=sagemaker.Session()
)

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large"
)

# Example prediction
response = predictor.predict({
    "fixed_acidity": 7.4, "volatile_acidity": 0.7, "citric_acid": 0, 
    "residual_sugar": 1.9, "chlorides": 0.076, "free_sulfur_dioxide": 11, 
    "total_sulfur_dioxide": 34, "density": 0.9978, "pH": 3.51, 
    "sulphates": 0.56, "alcohol": 9.4
})

print(response)


INFO:sagemaker:Creating model with name: wine-predictor-sagemaker-2024-06-01-06-58-46-206


ClientError: An error occurred (ValidationException) when calling the CreateModel operation: Could not find model data at s3://wine-bucket-quality/output/wine-predictor/output/model.tar.gz.