In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

CODE_FOLDER = Path("code")
CODE_FOLDER.mkdir(parents=True, exist_ok=True)

sys.path.append(f"./{CODE_FOLDER}")

In [2]:
# !aws configure set region eu-north-1

In [3]:
# !aws sso login --profile kamil-user

In [4]:
# !aws s3api create-bucket --bucket football-data-kamil --create-bucket-configuration LocationConstraint=eu-north-1

In [5]:
import os
import logging

import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession, LocalPipelineSession

# Update this variable to your bucket name. This name must be unique
# across all AWS accounts.
BUCKET = "football-data-kamil"

# To run this notebook in Local Model, this constant must be set to True.
# I'm trying to do this automatically by checking for a specific environment
# variable that is set by SageMaker when you run the notebook inside SageMaker
# Studio. 
LOCAL_MODE = "SAGEMAKER_INTERNAL_IMAGE_URI" not in os.environ

# This variable will be used to determine the architecture of the
# local machine. If the machine is an ARM64 machine, you will need
# to build a custom Docker image using the setup notebook.
ARCHITECTURE = !(uname -m)

# This is a dummy role that will be ignored when we run the
# pipeline in Local Mode.
DUMMY_ROLE = "arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-11111111111111"

# We'll use these two variables to configure the steps that do not support
# Local Mode.
pipeline_session = PipelineSession(default_bucket=BUCKET) if not LOCAL_MODE else None
execution_role = sagemaker.get_execution_role() if not LOCAL_MODE else DUMMY_ROLE

if LOCAL_MODE:
    config = {
        "session": LocalPipelineSession(),
        "instance_type": "local",
        "role": DUMMY_ROLE,

        # We need to use a custom Docker image when we run the pipeline
        # in Local Model on an ARM64 machine.
        "image": "sagemaker-tensorflow-training-toolkit-local" if ARCHITECTURE[0] == "arm64" else None,
        "framework_version": None if ARCHITECTURE[0] == "arm64" else "2.8",
        "py_version": None if ARCHITECTURE[0] == "arm64" else "py39",
    }
else:
    config = {
        "session": pipeline_session,
        "instance_type": "ml.m5.xlarge",
        "role": execution_role,
        "image": None,        
        "framework_version": "2.6",
        "py_version": "py38",
    }

# By default, The SageMaker SDK logs events related to the default
# configuration using the INFO level. To prevent these from spoiling
# the output of this notebook cells, we can change the logging
# level to ERROR instead.
logging.getLogger("sagemaker.config").setLevel(logging.ERROR)

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\kamil\AppData\Local\sagemaker\sagemaker\config.yaml


Windows Support for Local Mode is Experimental


In [6]:
import boto3

sagemaker_session = sagemaker.session.Session()
sagemaker_client = boto3.client("sagemaker")
iam_client = boto3.client("iam")
region = boto3.Session().region_name

In [7]:
from sagemaker.s3 import S3Uploader

S3_LOCATION = f"s3://{BUCKET}/football"
DATA_FILEPATH_X = r"C:\Users\kamil\Documents\football_project\football_predictor\data\new_features\df.csv"
DATA_FILEPATH_y = r"C:\Users\kamil\Documents\football_project\football_predictor\data\new_features\y.csv"


S3Uploader.upload(local_path=str(DATA_FILEPATH_X), desired_s3_uri=f"{S3_LOCATION}/data", sagemaker_session=sagemaker_session)
S3Uploader.upload(local_path=str(DATA_FILEPATH_y), desired_s3_uri=f"{S3_LOCATION}/data", sagemaker_session=sagemaker_session)

's3://football-data-kamil/football/data/y.csv'