## Link to Kaggle Notebook to Upload Datasets to S3 Bucket

https://www.kaggle.com/code/samanyuparvathaneni/capstone-project-final-notebook

## Exploratory Data Analysis (EDA)/Experiment with Various Models- Kaggle Notebook Link

https://www.kaggle.com/code/samanyuparvathaneni/capstone-project-notebook

In [1]:
pip install -r requirements.txt

Collecting sagemaker==1.7.1 (from -r requirements.txt (line 1))
  Using cached sagemaker-1.7.1.tar.gz (162 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting boto3==1.17.52 (from -r requirements.txt (line 6))
  Using cached boto3-1.17.52-py2.py3-none-any.whl.metadata (6.4 kB)
Collecting botocore==1.20.52 (from -r requirements.txt (line 9))
  Using cached botocore-1.20.52-py2.py3-none-any.whl.metadata (5.7 kB)
Collecting PyYAML==5.4.1 (from -r requirements.txt (line 10))
  Using cached PyYAML-5.4.1.tar.gz (175 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[48 lines of output][0m
  [31m   [0m running egg_info
  [31m   [0m writing lib3/PyYAML.egg-info/PKG-INFO
  [31m   [0m writing dependency_links to lib3/PyYA

In [None]:
import os
import boto3
import sagemaker
from sagemaker.inputs import TrainingInput
from sagemaker.estimator import Estimator
from sagemaker import image_uris
import time
from datetime import datetime, timezone

def get_temporary_credentials(duration_seconds=3600):
    """
    Obtain temporary security credentials using AWS STS.
    
    :param duration_seconds: Duration for which the credentials are valid (default: 1 hour).
    :return: Temporary credentials as a dictionary.
    """
    sts_client = boto3.client('sts')
    response = sts_client.get_session_token(DurationSeconds=duration_seconds)
    
    # Extract credentials
    credentials = response['Credentials']
    return {
        'AccessKeyId': credentials['AccessKeyId'],
        'SecretAccessKey': credentials['SecretAccessKey'],
        'SessionToken': credentials['SessionToken'],
        'Expiration': credentials['Expiration']
    }

def refresh_credentials():
    temp_creds = get_temporary_credentials(duration_seconds=3600)
    
    # Update environment variables
    os.environ['AWS_ACCESS_KEY_ID'] = temp_creds['AccessKeyId']
    os.environ['AWS_SECRET_ACCESS_KEY'] = temp_creds['SecretAccessKey']
    os.environ['AWS_SESSION_TOKEN'] = temp_creds['SessionToken']
    
    # Update boto3 session
    global boto_session
    boto_session = boto3.Session(
        aws_access_key_id=temp_creds['AccessKeyId'],
        aws_secret_access_key=temp_creds['SecretAccessKey'],
        aws_session_token=temp_creds['SessionToken'],
        region_name="us-east-1"
    )
    
    # Update SageMaker session
    global sagemaker_session
    sagemaker_session = sagemaker.Session(boto_session=boto_session)
    
    return temp_creds['Expiration']


# Step 1: Obtain initial temporary credentials and set them in environment variables
expiration_time = refresh_credentials()

# Step 2: Initialize a Boto3 session with temporary credentials
boto_session = boto3.Session(
    aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
    aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],
    aws_session_token=os.environ['AWS_SESSION_TOKEN'],
    region_name="us-east-1"
)

# Step 3: Pass the Boto3 session into SageMaker
sagemaker_session = sagemaker.Session(boto_session=boto_session)

# Step 4: Define SageMaker role and resources
role = "arn:aws:iam::202533500438:role/service-role/AmazonSageMaker-ExecutionRole-20240825T092662"
bucket = 'capstone-project-ucsd-mle-bootcamp'
data_key = "datasets/train.csv"
data_location = f"s3://{bucket}/{data_key}"
output_path = f"s3://{bucket}/output/"

# Step 5: Define training input
train_input = TrainingInput(
    s3_data=data_location,
    content_type="text/csv"
)

# Step 6: Retrieve the correct XGBoost image URI for your region
image_uri = image_uris.retrieve(
    framework="xgboost",
    region=sagemaker_session.boto_region_name,
    version="1.5-1",
    image_scope="training"
)

# Step 7: Define the Estimator for training
estimator = Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=3,
    instance_type="ml.m5.large",
    volume_size=100,
    output_path=output_path,
    hyperparameters={
        "n_estimators": 1150,
        "max_depth": 56,
        "learning_rate": 0.003,
        "colsample_bytree": 0.9,
        "subsample": 0.8,
        "min_child_weight": 1,
        "random_state": 42,
        "objective": "reg:squarederror",
        "tree_method": "hist",
        "device": "gpu" if "p" in "ml.m5.large" else "cpu",
    },
)

# Monitor token expiration and refresh if needed during long-running jobs
def monitor_and_refresh_token():
    global expiration_time
    
    while True:
        # Check current time against expiration time (with buffer of 5 minutes)
        current_time = datetime.now(timezone.utc)
        if (expiration_time - current_time).total_seconds() < 300:
            print("Refreshing temporary credentials...")
            expiration_time = refresh_credentials()
            print(f"New token expiration time: {expiration_time}")
        
        # Sleep for a short interval before checking again (e.g., every minute)
        time.sleep(60)

# Start token monitoring in a separate thread (optional)
import threading
monitor_thread = threading.Thread(target=monitor_and_refresh_token, daemon=True)
monitor_thread.start()

# Step 8: Launch the training job
estimator.fit({"train": train_input})

print("Training job started.")

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-12-29-20-59-27-655


2024-12-29 20:59:29 Starting - Starting the training job...
2024-12-29 20:59:43 Starting - Preparing the instances for training...
2024-12-29 21:00:16 Downloading - Downloading input data............