In [36]:
import boto3
import pandas as pd
from sagemaker.predictor import Predictor
import sagemaker
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer
from sklearn.metrics import accuracy_score
s3 = boto3.client('s3')

session = boto3.Session()
sagemaker_session = session.client("sagemaker")


In [37]:
#Loading data

bucket_name = "test-bucket-hamady"
test_data_key = "splitData/X_test.csv"
response = s3.get_object(Bucket=bucket_name, Key=test_data_key)

X_test = pd.read_csv(response['Body'])


X_test = X_test[["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked_Q", "Embarked_S"]]


In [38]:
endpoint_name = "sagemaker-xgboost-2025-01-04-05-18-57-532"  
predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sagemaker.Session(),
    serializer=CSVSerializer(),  
    deserializer=JSONDeserializer() 
)




In [39]:
#récupérer le nom du modèle 
import boto3

# Initialize the SageMaker client
sagemaker_client = boto3.client("sagemaker", region_name="eu-west-3")  # Adjust region

# List all models
response = sagemaker_client.list_models()

# Extract model names
models = [model["ModelName"] for model in response["Models"]]
print("Available models:", models)

Available models: ['sagemaker-xgboost-2025-01-04-05-18-57-532', 'sagemaker-xgboost-2025-01-03-13-33-32-589', 'sagemaker-xgboost-2025-01-03-13-27-51-379', 'sagemaker-xgboost-2025-01-03-13-23-29-426']


In [40]:
import boto3
from sagemaker import get_execution_role
from sagemaker.model import Model


In [80]:
bucket_name = "test-bucket-hamady"
key = "splitData/X_test.csv" 

# Step 1: Download the file
response = s3.get_object(Bucket=bucket_name, Key=key)
data = response['Body'].read().decode('utf-8')

# Step 2: Remove the header
lines = data.splitlines()
data_without_header = "\n".join(lines[1:]) 
new_key = "splitData/X_test_no_header.csv" 
s3.put_object(Bucket=bucket_name, Key=new_key, Body=data_without_header)

{'ResponseMetadata': {'RequestId': 'H9XVAXCM7C498MN5',
  'HostId': 'n3puCe7tYO81pgwqZ39+z/0VT7o7VLkGOY0bZI7IVZOhNARNFEsV5Xlyfh5U8IJuXL7cRUyIwHc=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'n3puCe7tYO81pgwqZ39+z/0VT7o7VLkGOY0bZI7IVZOhNARNFEsV5Xlyfh5U8IJuXL7cRUyIwHc=',
   'x-amz-request-id': 'H9XVAXCM7C498MN5',
   'date': 'Sat, 04 Jan 2025 09:40:22 GMT',
   'x-amz-version-id': 'gB5Ep2yHDjuGKQq1ZtxQoJqvUJahq1Hc',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"c28fed8875545f8e1c42b30dbb36cc83"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"c28fed8875545f8e1c42b30dbb36cc83"',
 'ServerSideEncryption': 'AES256',
 'VersionId': 'gB5Ep2yHDjuGKQq1ZtxQoJqvUJahq1Hc'}

In [85]:
import boto3


sagemaker_client = boto3.client("sagemaker", region_name="eu-west-3") 

# Define job parameters
transform_job_name = "titanicdata-batch-transform-job"
model_name = "sagemaker-xgboost-2025-01-04-05-18-57-532"  
input_data_location = "s3://test-bucket-hamady/splitData/X_test_no_header.csv"  # Input data
output_data_location = "s3://test-bucket-hamady/inference-output/"  # Output location

# Create the transform job
response = sagemaker_client.create_transform_job(
    TransformJobName=transform_job_name,
    ModelName=model_name,
    TransformInput={
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": input_data_location,
            }
        },
        "ContentType": "text/csv",  
    },
    TransformOutput={
        "S3OutputPath": output_data_location,
        "Accept": "text/csv",  
    },
    TransformResources={
        "InstanceType": "ml.m5.xlarge",  
        "InstanceCount": 1,
    },
    BatchStrategy="MultiRecord"
)

print("Batch Transform Job initiated:", response["TransformJobArn"])


Batch Transform Job initiated: arn:aws:sagemaker:eu-west-3:913524936566:transform-job/titanicdata-batch-transform-job


In [91]:
response = sagemaker_client.describe_transform_job(TransformJobName=transform_job_name)
job_status = response["TransformJobStatus"]
print(f"Transform Job Status: {job_status}")

if "FailureReason" in response:
    print(f"Failure Reason: {response['FailureReason']}")



Transform Job Status: Completed


In [101]:
# --- Paramètres S3 et SageMaker ---
bucket_name = "test-bucket-hamady"
output_key = "inference-output/"
y_test_key = "splitData/y_test.csv"

# --- Étape 1 : Récupérer les prédictions ---
# Lister les fichiers d'inférence générés
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=output_key)
prediction_files = [content['Key'] for content in response.get('Contents', [])]

# Télécharger et charger les fichiers des prédictions
predictions = []
for file_key in prediction_files:
    obj = s3.get_object(Bucket=bucket_name, Key=file_key)
    batch_predictions = pd.read_csv(obj['Body'], header=None)  
    predictions.append(batch_predictions)

# Fusionner les prédictions
predictions_df = pd.concat(predictions, axis=0)

# --- Étape 2 : Charger les vraies étiquettes ---
obj = s3.get_object(Bucket=bucket_name, Key=y_test_key)
y_test = pd.read_csv(obj['Body'], header=None)  # Aucun en-tête

#  il faut ignorer la première ligne (index 0)
y_test = y_test[1:].reset_index(drop=True)  

# --- Étape 3 : Convertir les prédictions continues en classes binaires ---
# Appliquer un seuil de 0.5 pour transformer les prédictions en classes binaires
y_pred = (predictions_df[0] > 0.5).astype(int)  # Si > 0.5, prédiction = 1, sinon 0


# Convertir y_test en int 
y_test = y_test[0].astype(int)

# --- Étape 4 : Calculer l'accuracy ---
accuracy = accuracy_score(y_test, y_pred)  

# --- Afficher les résultats ---
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 0.83


In [104]:
import boto3

# Code pour arréter l'excustion de tous afin d'économiser en ressources
sagemaker = boto3.client('sagemaker')

# Stop all models
def stop_models():
    models = sagemaker.list_models()
    for model in models['Models']:
        model_name = model['ModelName']
        print(f"Stopping model: {model_name}")
        sagemaker.delete_model(ModelName=model_name)

# Stop all batch transform jobs
def stop_batch_jobs():
    jobs = sagemaker.list_transform_jobs()
    for job in jobs['TransformJobSummaries']:
        job_name = job['TransformJobName']
        print(f"Stopping batch job: {job_name}")
        sagemaker.stop_transform_job(TransformJobName=job_name)

# Stop all training jobs
def stop_training_jobs():
    jobs = sagemaker.list_training_jobs()
    for job in jobs['TrainingJobSummaries']:
        job_name = job['TrainingJobName']
        print(f"Stopping training job: {job_name}")
        sagemaker.stop_training_job(TrainingJobName=job_name)

# Stop all endpoints
def stop_endpoints():
    endpoints = sagemaker.list_endpoints()
    for endpoint in endpoints['Endpoints']:
        endpoint_name = endpoint['EndpointName']
        print(f"Stopping endpoint: {endpoint_name}")
        sagemaker.delete_endpoint(EndpointName=endpoint_name)

# Main function to stop all resources
def stop_all_resources():
    print("Stopping all SageMaker models...")
    stop_models()
    
    print("\nStopping all training jobs...")
    stop_training_jobs()
    print("\nStopping all endpoints...")
    stop_endpoints()
    print("\nStopping all batch transform jobs...")
    stop_batch_jobs()

# Execute the function
stop_all_resources()


Stopping all SageMaker models...

Stopping all training jobs...
Stopping training job: sagemaker-xgboost-2025-01-04-05-14-02-316
