In [None]:
## Uncomment and install libs

# %pip install --upgrade pip
# %pip install pandas==2.2.2
# %pip install xgboost-cpu==2.1.1
# %pip install scikit-learn==1.5.1
# %pip install numpy==2.0.1
# %pip install ipytest==0.14.2
# %pip install python-dotenv==1.0.1

import os
import logging

%load_ext dotenv
%dotenv

logger = logging.getLogger('RoleManager')
logger.setLevel(logging.INFO)

logger = logging.getLogger('EcrManager')
logger.setLevel(logging.INFO)

logger = logging.getLogger('AWSClientManager')
logger.setLevel(logging.INFO)

if not logger.hasHandlers():
    console_handler = logging.StreamHandler()  # Log to console
    console_handler.setLevel(logging.DEBUG)  # Ensure the handler logs DEBUG messages
    formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(formatter)

    logger.addHandler(console_handler)

### AWS Configuration Instructions:

1. **Create Root User (Skip if already done)**:
   - Only use the AWS root account to set up the initial environment.
   - To create a root account (if necessary):
     - Go to [AWS Account](https://aws.amazon.com/resources/create-account/) and sign up for a root user.

2. **Set Up an Admin User and IAM Role**:
   - Create an admin user with enough permissions to create role.
   
   **Steps**:
   - Navigate to **IAM** in the AWS Console.
   - Click on **Users** > **Add User**.
   - Create a user (e.g., `admin-user`) with **Programmatic access** and attach the **AdministratorAccess** policy.
   - Download the **Access Key ID** and **Secret Access Key** for AWS CLI configuration.

   This user will be used to create role and users.

3. **AWS CLI Configuration**:
    - Configure the AWS CLI for both admin and developer users:
   
     ```bash
     aws configure
     ```
    - Enter the **Access Key**, **Secret Key**, default region (e.g., `eu-north-1`), and output format (e.g., `json`).


In [None]:
from IAMUserManger import IAMUserManager

from AWSClientManager import AWSClientManager
from RoleManager import RoleManager

logger = logging.getLogger('RoleManager')
logger.setLevel(logging.DEBUG)

account_id = os.environ["ACCOUNT_ID"]
region = os.environ["AWS_REGION"]
user_name = os.environ["USER_NAME"]
role_name = os.environ["ROLE_NAME"]
policy_name = os.environ["POLICY_NAME"]

iam_user_manager = IAMUserManager()
iam_user_manager.create_user(user_name)

import time

time.sleep(5.5)

role_service = RoleManager(account_id, user_name)
role_arn = role_service.create_role_and_policy()


In [None]:
iam_user_manager.attach_inline_policy(user_name, role_arn)
access_key_info = iam_user_manager.create_access_key(user_name)

access_key = access_key_info['AccessKeyId']
secret_key = access_key_info['SecretAccessKey']

if access_key_info:
    print(f"Access Key ID: {access_key_info['AccessKeyId']}")
    print(f"Secret Access Key: {access_key_info['SecretAccessKey']}")
aws_client_manager = AWSClientManager(region=region, access_key_id=access_key, secret_access_key=secret_key, account_id=account_id, role_name=role_name)

In [None]:
from EcrManager import EcrManager

processor_image_name = 'sagemaker-processing-container'
train_image_name = 'xgb-clf-training-container'

ecr_client = aws_client_manager.get_client('ecr')
ecr_manager = EcrManager(ecr_client)

processor_repository = ecr_manager.create_repository(processor_image_name)
train_repository = ecr_manager.create_repository(train_image_name)

time.sleep(3.3)

ecr_manager.put_lifecycle_policy(processor_image_name)
ecr_manager.put_lifecycle_policy(train_image_name)


In [None]:
import base64
import subprocess

auth_data = ecr_client.get_authorization_token()['authorizationData'][0]
auth_token = auth_data['authorizationToken']

username_password = base64.b64decode(auth_token).decode('utf-8')
username, password = username_password.split(':')
registry_uri = auth_data['proxyEndpoint']

auth_command = f"docker login --username {username} --password {password} {registry_uri}"
result = subprocess.run(auth_command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
print(f"Command executed successfully. Output:\n{result.stdout}")


In [None]:
print("Docker authenticated successfully to ECR.")
tag = ':latest'
processor_image_uri = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(account_id, region, processor_image_name + tag)
print(f'Processor image name: {processor_image_uri}.')

!docker build -t $processor_image_uri ../preprocessor/docker
push_command = f"docker push {processor_image_uri}"
subprocess.run(push_command, shell=True, check=True)

print(f"Docker image pushed to ECR: {processor_image_uri}")


In [None]:
tag = ':latest'
train_image_uri = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(account_id, region, train_image_name + tag)
print(f'Processor image name: {train_image_uri}.')

!docker build -t $train_image_uri ../preprocessor/docker
push_command = f"docker push {train_image_uri}"
subprocess.run(push_command, shell=True, check=True)
print(f"Docker image pushed to ECR: {train_image_uri}")

In [None]:
from botocore.exceptions import ClientError

bucket = os.environ["BUCKET"]

s3_client = aws_client_manager.get_client('s3')

try:
    s3_client.create_bucket(
        Bucket=bucket,
        CreateBucketConfiguration={'LocationConstraint': region}
    )
    logging.info(f"S3 bucket {bucket} created successfully.")
except s3_client.exceptions.BucketAlreadyOwnedByYou:
    logging.warning(f"S3 bucket {bucket} already exists and is owned by you.")
except ClientError as e:
    logging.error(f"Error creating S3 bucket: {e}")
    raise

In [None]:
from sagemaker.session import Session

s3_location = f"s3://{bucket}/football"
sagemaker_session = Session()

df_local_path = str(os.environ['DATA_FILEPATH_X'])
y_local_path = str(os.environ['DATA_FILEPATH_Y'])

s3_client.upload_file(Filename=df_local_path, Bucket=bucket, Key=f"data/df.csv")
s3_client.upload_file(Filename=y_local_path, Bucket=bucket, Key=f"data/y.csv")