# Import

In [1]:
import sys
sys.path.append('../../')

import os
import sys
import logging
import argparse
from pathlib import Path
from ast import literal_eval
from collections import Counter
from typing import Any, Dict, Optional

In [2]:
from icecream import ic
from tqdm.auto import tqdm

import torchmetrics
from torchmetrics.functional import accuracy, f1, auroc

import sagemaker
from sagemaker import get_execution_role
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.core.decorators import auto_move_data
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import MLFlowLogger


import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix


import matplotlib.pyplot as plt
from pylab import rcParams
from matplotlib import rc

import transformers
from transformers import (
    AdamW,
    AutoConfig,
    AutoModel,
    AutoModelForSequenceClassification,
    T5ForConditionalGeneration,
    T5Tokenizer,
    AutoTokenizer,
)
from transformers.optimization import (
    Adafactor,
    get_linear_schedule_with_warmup,
)

Local constants, regarding the data, MLFlow server, paths, etc..: use them

In [3]:
from deep.constants import *
from deep.utils import *

In [4]:
%load_ext autoreload
%autoreload 2

## Logging and styling

In [5]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [6]:
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 12, 8

In [7]:
ic.configureOutput(outputFunction=sys.stdout.write, includeContext=True)

In [8]:
logger = logging.getLogger(__name__)

In [9]:
RANDOM_SEED=2021
pl.seed_everything(RANDOM_SEED)

Global seed set to 2021


2021

# Data

In [10]:
train_dataset = pd.read_csv(LATEST_DATA_PATH / "data_v0.5_train.csv")
val_dataset = pd.read_csv(LATEST_DATA_PATH / "data_v0.5_val.csv")
##
train_dataset["sectors"] = train_dataset["sectors"].apply(literal_eval)
val_dataset["sectors"] = val_dataset["sectors"].apply(literal_eval)
##
sector_set = set()
for sectors_i in train_dataset["sectors"]:
    sector_set.update(sectors_i)
sectorname_to_sectorid = {sector:i for i, sector in enumerate(list(sorted(sector_set)))}

## Sagemaker Prep

### Session

In [11]:
sess = sagemaker.Session(default_bucket=DEV_BUCKET.name)
role = SAGEMAKER_ROLE
role_arn = 'arn:aws:iam::961104659532:role/service-role/AmazonSageMaker-ExecutionRole-20210519T102514'

### Bucket upload

In [12]:
sample = True

if sample:
    train_dataset = train_dataset.sample(100)
    val_dataset = val_dataset.sample(100)
    
job_name = f"pytorch-{formatted_time()}-test"
input_path = DEV_BUCKET / 'training' / 'input_data' / job_name

train_path = str(input_path / 'train.pickle')
val_path = str(input_path / 'val.pickle')


train_dataset.to_pickle(train_path, protocol=4)
val_dataset.to_pickle(val_path, protocol=4)

### Estimator Definition

In [13]:
instances = [
    'ml.p2.xlarge',
    'ml.p3.2xlarge'
]

In [14]:
input_path

S3Path('s3://sagemaker-deep-experiments-dev/training/input_data/pytorch-2021-07-06-12-09-54-485-test')

In [15]:
from sagemaker.pytorch import PyTorch

hyperparameters={
    'tracking_uri': MLFLOW_SERVER,
    'experiment_name': 'pl_test',
    'max_len': 200,
    'epochs': 1,
    'train_batch_size': 16,
    'eval_batch_size': 16,
    'model_name': 'sentence-transformers/paraphrase-mpnet-base-v2',
    'classes': str(SECTORS)
}

estimator = PyTorch(
    entry_point='train.py',
    source_dir=str(SCRIPTS_EXAMPLES_PATH / 'sector-pl'),
    output_path=str(DEV_BUCKET / 'models/'),
    code_location=str(input_path),
    instance_type='ml.p3.2xlarge',
    instance_count=1,
    role=role,
    framework_version='1.8',
    py_version='py36',
    hyperparameters = hyperparameters,
    job_name=job_name,
#     train_instance_count=2,
#     train_instance_type="ml.c4.xlarge",
)

In [16]:
fit_arguments = {
    'train': str(input_path),
    'test': str(input_path)
}

In [17]:
estimator.fit(fit_arguments, job_name=job_name)

2021-07-06 10:10:02 Starting - Starting the training job...
2021-07-06 10:10:04 Starting - Launching requested ML instancesProfilerReport-1625566199: InProgress
......
2021-07-06 10:11:29 Starting - Preparing the instances for training......
2021-07-06 10:12:39 Downloading - Downloading input data...
2021-07-06 10:13:09 Training - Downloading the training image....................[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-07-06 10:16:42,167 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-07-06 10:16:42,193 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-07-06 10:16:48,437 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-07-06 10:16:48,761 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/opt/conda

In [None]:
import mlflow.sagemaker

# URL of the ECR-hosted Docker image the model should be deployed into
image_uri = '961104659532.dkr.ecr.us-east-1.amazonaws.com/mlflow-pyfunc'
endpoint_name = 'pytorch-trial'
# The location, in URI format, of the MLflow model to deploy to SageMaker.
model_uri = 's3://deep-mlflow-artifact/2/18ce50fe730646b6b80fbafdcd22aeb1/artifacts/sentence-transformers/paraphrase-mpnet-base-v2'

mlflow.sagemaker.deploy(
    mode='create',
    app_name=endpoint_name,
    model_uri=model_uri,
    image_url=image_uri,
    execution_role_arn=role_arn,
    instance_type='ml.m5.xlarge',
    instance_count=1,
    region_name='us-east-1'
)

In [None]:
role

In [None]:
from mlflow.pytorch import pickle_module

In [None]:
pickle_module

In [None]:
import sys
sys.path.append('../../scripts/examples/sector-pl/')
import model

In [None]:
torch.load('/Users/stefano/Downloads/model.pth', pickle_module=pickle_module)

In [19]:
import mlflow
logged_model = 's3://deep-mlflow-artifact/2/9f216acf38d54ff6b185441a0f80e8b7/artifacts/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pytorch.load_model(logged_model)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/stefano/miniconda3/envs/deep/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-19-e5d16cbd2572>", line 5, in <module>
    loaded_model = mlflow.pytorch.load_model(logged_model)
  File "/Users/stefano/miniconda3/envs/deep/lib/python3.9/site-packages/mlflow/pytorch/__init__.py", line 659, in load_model
    local_model_path = _download_artifact_from_uri(artifact_uri=model_uri)
  File "/Users/stefano/miniconda3/envs/deep/lib/python3.9/site-packages/mlflow/tracking/artifact_utils.py", line 79, in _download_artifact_from_uri
    return get_artifact_repository(artifact_uri=root_uri).download_artifacts(
  File "/Users/stefano/miniconda3/envs/deep/lib/python3.9/site-packages/mlflow/store/artifact/artifact_repo.py", line 181, in download_artifacts
    return download_artifact_dir(
  File "/Users/stefano/miniconda3/envs/deep/lib/python3

TypeError: object of type 'NoneType' has no len()

In [None]:
loaded_model.predict(pd.DataFrame({'data': []}))