# [Lab0] Environments Setup

In [None]:
%pip install -q mlflow==2.13.2 sagemaker-mlflow

In [None]:
# Restart kernel to get the packages
import IPython
IPython.Application.instance().kernel.do_shutdown(True)

## 1. Basic Environments 

In [None]:
import os
import json
import mlflow
import boto3
import sagemaker 

# boto3 client
boto_session = boto3.Session()
sm_client = boto_session.client("sagemaker")
region = boto_session.region_name

# sagemaker SDK
sess = sagemaker.Session()
role = sagemaker.get_execution_role() 

bucket = sess.default_bucket()
prefix = 'sagemaker/DEMO-xgboost-dm'

In [None]:
%store bucket
%store prefix
%store role
%store region

#### Space Metadata

In [None]:
NOTEBOOK_METADATA_FILE = "/opt/ml/metadata/resource-metadata.json"
domain_id = None

if os.path.exists(NOTEBOOK_METADATA_FILE):
    with open(NOTEBOOK_METADATA_FILE, "rb") as f:
        metadata = json.loads(f.read())
        domain_id = metadata.get('DomainId')
        space_name = metadata.get('SpaceName')
        print(f"SageMaker domain id: {domain_id}")

if not space_name:
    raise Exception(f"Cannot find the current space name. Make sure you run this notebook in a JupyterLab in the SageMaker Studio")
else:
    print(f"Space name: {space_name}")
    
r = sm_client.describe_space(DomainId=domain_id, SpaceName=space_name)
user_profile_name = r['OwnershipSettings']['OwnerUserProfileName']

assert(user_profile_name)
print(f"User profile: {user_profile_name}")

%store domain_id
%store space_name
%store user_profile_name

## 2. Experimental Setup (MLFlow Tracking Server)

In [None]:
from time import gmtime, strftime

ts = strftime('%d-%H-%M-%S', gmtime())
mlflow_name = f"mlflow-{domain_id}-{ts}"
r = boto3.client("sagemaker").create_mlflow_tracking_server(
    TrackingServerName=mlflow_name,
    ArtifactStoreUri=f"s3://{bucket}/mlflow/{ts}",
    RoleArn=role,
    AutomaticModelRegistration=True,
)

mlflow_arn = r['TrackingServerArn']
print(f"Server creation request succeded. The server {mlflow_arn} is being created.")

####      It takes about 20 minutes to initiate a new MLflow tracking server.

## 3. Data Setup 

In [None]:
import zipfile
import pandas as pd

In [None]:
with zipfile.ZipFile('bank-additional.zip', 'r') as zip_ref:
    zip_ref.extractall('.')

In [None]:
data = pd.read_csv('./bank-additional/bank-additional-full.csv')
pd.set_option('display.max_columns', 500)     # Make sure we can see all of the columns
pd.set_option('display.max_rows', 20)         # Keep the output on one page
data

## 4. Check MLFlow Server

In [None]:
# Server in the 'Created' status.
r = boto3.client("sagemaker").list_mlflow_tracking_servers(TrackingServerStatus='Created',)['TrackingServerSummaries']

if len(r) < 1:
    print("You don't have any running MLflow servers. Trying to find a server in the status 'Creating'...")

    r = boto3.client("sagemaker").list_mlflow_tracking_servers(TrackingServerStatus='Creating')['TrackingServerSummaries']

    # Server in the 'Creating' status.
    if len(r) < 1:
        print("You don't have any MLflow server in the status 'Creating'.")
        mlflow_arn = None
        mlflow_name = None
    else:
        mlflow_arn = r[0]['TrackingServerArn']
        mlflow_name = r[0]['TrackingServerName']
        print(f"You have an MLflow server {mlflow_arn} in the status 'Creating', going to use this one")
else:
    mlflow_arn = r[0]['TrackingServerArn']
    mlflow_name = r[0]['TrackingServerName']
    print(f"You have {len(r)} running MLflow server(s). Get the first server ARN:{mlflow_arn}")

In [None]:
experiment_suffix = strftime('%d-%H-%M-%S', gmtime())
experiment_name = f"end-to-end-experiment-{experiment_suffix}"

In [None]:
(mlflow_arn, mlflow_name)

In [None]:
%store mlflow_arn
%store mlflow_name
%store experiment_name