In [1]:
import os

import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker.debugger import Rule, DebuggerHookConfig, TensorBoardOutputConfig, CollectionConfig, ProfilerRule, rule_configs
from sagemaker.debugger import ProfilerConfig, FrameworkProfile

import yfinance as yf
from datetime import datetime
import pandas as pd
import numpy as np

%load_ext autoreload
%autoreload 2

In [2]:
# set SageMaker role
os.environ['AWS_PROFILE']='sagemaker'
os.environ['AWS_DEFAULT_REGION']='eu-central-1'

In [3]:
session = sagemaker.Session()
bucket = session.default_bucket()
print(bucket)
prefix = "sagemaker/ml-capistone-project"
role = sagemaker.get_execution_role()

sagemaker-eu-central-1-292065287762


In [4]:
# Load data
stock_list = ['NFLX', 'EPAM', 'AAPL']

end = datetime.now()
start = datetime(end.year - 5, end.month, end.day)

for stock in stock_list:
    globals()[stock] = yf.download(stock, start, end)

stock_data = [NFLX, EPAM, AAPL]

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [5]:
# set data
stock_df = EPAM
stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-10-13,91.0,91.230003,90.269997,90.959999,90.959999,189100
2017-10-16,90.959999,91.699997,90.300003,91.150002,91.150002,262400
2017-10-17,91.099998,91.970001,90.650002,90.949997,90.949997,172300
2017-10-18,91.0,91.139999,90.540001,90.540001,90.540001,161700
2017-10-19,90.349998,90.980003,89.82,90.540001,90.540001,194800


In [6]:
stock_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1260 entries, 2017-10-13 to 2022-10-14
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1260 non-null   float64
 1   High       1260 non-null   float64
 2   Low        1260 non-null   float64
 3   Close      1260 non-null   float64
 4   Adj Close  1260 non-null   float64
 5   Volume     1260 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 68.9 KB


In [7]:
# save data
data_folder='data'
os.makedirs(data_folder, exist_ok=True)  
stock_df.to_csv(data_folder + '/stock.csv') 

In [8]:
# upload stock data to S3
inputs = session.upload_data(path=data_folder, bucket=bucket, key_prefix=prefix)
print("S3 path: {}".format(inputs))

S3 path: s3://sagemaker-eu-central-1-292065287762/sagemaker/ml-capistone-project


In [9]:
hyperparameters = {
    'learning_rate': 0.00001,
    'feature_columns': '["Adj Close"]' # string is expected here
}
hyperparameters

{'learning_rate': 1e-05, 'feature_columns': '["Adj Close"]'}

In [10]:
rules = [
    Rule.sagemaker(rule_configs.vanishing_gradient()),
    Rule.sagemaker(rule_configs.overfit()),
    Rule.sagemaker(rule_configs.overtraining()),
    Rule.sagemaker(rule_configs.poor_weight_initialization()),
    ProfilerRule.sagemaker(rule_configs.ProfilerReport()),
]

In [11]:
hook_config = DebuggerHookConfig(
    hook_parameters={
        "train.save_interval": "1",
        "eval.save_interval": "1"
    }
)

profiler_config = ProfilerConfig(
    system_monitor_interval_millis=500, framework_profile_params=FrameworkProfile(num_steps=1)
)

In [21]:
estimator = PyTorch(
    entry_point='hpo.py',
    source_dir='src',
    base_job_name='stock-predictor',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='1.4.0',
    py_version='py3',
    hyperparameters=hyperparameters,
    ## Debugger and Profiler parameters
    rules = rules,
    debugger_hook_config=hook_config,
    profiler_config=profiler_config,
)

In [22]:
estimator.fit({'data': inputs}, wait=True)

2022-10-14 20:56:37 Starting - Starting the training job...
2022-10-14 20:56:55 Starting - Preparing the instances for trainingVanishingGradient: InProgress
Overfit: InProgress
Overtraining: InProgress
PoorWeightInitialization: InProgress
ProfilerReport: InProgress
.........
2022-10-14 20:58:37 Downloading - Downloading input data......
2022-10-14 20:59:38 Training - Downloading the training image..bash: cannot set terminal process group (-1): Inappropriate ioctl for device
bash: no job control in this shell
2022-10-14 20:59:54,063 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training
2022-10-14 20:59:54,067 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
2022-10-14 20:59:54,089 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.
2022-10-14 20:59:54,094 sagemaker_pytorch_container.training INFO     Invoking user training script.
2022-10-14 20:59:54,495 sagemaker-containers INFO     Mo

UnexpectedStatusException: Error for Training job stock-predictor-2022-10-14-20-56-35-897: Failed. Reason: AlgorithmError: framework error: 
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_training/trainer.py", line 85, in train
    entrypoint()
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_pytorch_container/training.py", line 99, in main
    train(framework.training_env())
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_pytorch_container/training.py", line 60, in train
    six.reraise(info[0], err, info[2])
  File "/opt/conda/lib/python3.6/site-packages/six.py", line 703, in reraise
    raise value
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_pytorch_container/training.py", line 53, in train
    capture_error=True, runner=framework.runner.ProcessRunnerType)
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_containers/entry_point.py", line 100, in run
    wait, capture_error
  File "/opt/conda/lib/python3.6/site-packages/sagemaker_containers/_process.py", line 112, in run
    cmd, _errors.ExecuteUserScriptError, capture_error=captur