# Import

In [None]:
!pip install mlflow
!pip install torch==1.8.1
!pip install pytorch-lightning
!pip install transformers
!pip install cloudpathlib

In [1]:
import sys
sys.path.append('../../')

import os
import sys
import logging
import argparse
from pathlib import Path
from ast import literal_eval
from collections import Counter
from typing import Any, Dict, Optional

In [2]:
from tqdm.auto import tqdm

import torchmetrics
from torchmetrics.functional import accuracy, f1, auroc

import mlflow
import mlflow.sagemaker
import sagemaker
from sagemaker import get_execution_role
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.core.decorators import auto_move_data
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import MLFlowLogger


import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix


import matplotlib.pyplot as plt
from pylab import rcParams
from matplotlib import rc

import transformers
from transformers import (
    AdamW,
    AutoConfig,
    AutoModel,
    AutoModelForSequenceClassification,
    T5ForConditionalGeneration,
    T5Tokenizer,
    AutoTokenizer,
)
from transformers.optimization import (
    Adafactor,
    get_linear_schedule_with_warmup,
)

In [3]:
from deep.constants import *
from deep.utils import *

In [4]:
%load_ext autoreload
%autoreload 2

## Data

In [5]:
logged_model = 's3://deep-mlflow-artifact/2/011aa4a20c5d4783bf6ed2fc100813ff/artifacts/model'

In [6]:
temp_model = 's3://deep-mlflow-artifact/2/4a9f23dfab7f4d9cbda9803a1754fe19/artifacts/model'

## Sagemaker Prep

In [7]:
sess = sagemaker.Session(default_bucket=DEV_BUCKET.name)
role = SAGEMAKER_ROLE
role_arn = SAGEMAKER_ROLE_ARN
prefix = "pl/example"

In [None]:
mlflow.sagemaker.deploy(
    'pl-example',
    temp_model,
    execution_role_arn=SAGEMAKER_ROLE_ARN,
    image_url='961104659532.dkr.ecr.us-east-1.amazonaws.com/mlflow-pyfunc:latest',
    region_name='us-east-1',
    instance_type='ml.p2.xlarge',
    mode='replace'
)

# Inference

In [34]:
import json
 
def query_endpoint(app_name, input_json):
    client = boto3.session.Session().client("sagemaker-runtime", 'us-east-1')

    response = client.invoke_endpoint(
      EndpointName=app_name,
      Body=input_json,
      ContentType='application/json; format=pandas-split',
    )
    preds = response['Body'].read().decode("ascii")
    preds = json.loads(preds)
    print("Received response: {}".format(preds))
    return preds

In [44]:
import pandas as pd
test_data = pd.DataFrame(
    {
        "excerpt": ['Agriculture is an important element of life','Young people in school are poor'],
    }
)
input_json = test_data.to_json(orient="split")

In [50]:
prediction1 = query_endpoint(app_name='prova7', input_json=input_json)
prediction1

Received response: [{'0': 1}, {'0': 2}]


[{'0': 1}, {'0': 2}]

In [8]:
mlflow.sagemaker.run_local(logged_model)

2021/07/07 10:20:55 INFO mlflow.sagemaker: launching docker image with path /var/folders/yk/w85rmwmj3bl1l522v_bw9xh80000gn/T/tmpwgtrgc44/model
2021/07/07 10:20:55 INFO mlflow.sagemaker: executing: docker run -v /var/folders/yk/w85rmwmj3bl1l522v_bw9xh80000gn/T/tmpwgtrgc44/model:/opt/ml/model/ -p 5000:8080 -e MLFLOW_DEPLOYMENT_FLAVOR_NAME=python_function --rm mlflow-pyfunc serve


Using the python_function flavor for local serving!


UnsupportedOperation: fileno

## Inference

In [None]:
from sagemaker.pytorch import PyTorchModel

model = PyTorchModel(
    entry_point="inference.py",
    source_dir=str(SCRIPTS_PATH / 'examples/inference-sector-pl'),
    role=role,
    model_data=logged_model,
    framework_version="1.8.1",
    py_version="py3",
)

In [None]:
instances = [
    'ml.p2.xlarge',
    'ml.p3.2xlarge',
    'ml.c4.xlarge'
]

In [None]:
a = pd.read_pickle('/Users/stefano/Downloads/model.ckpt')

In [None]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

# set local_mode to False if you want to deploy on a remote
# SageMaker instance

local_mode = True

if local_mode:
    instance_type = "local"
else:
    instance_type = "ml.p2.xlarge"

predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer(),
)

In [None]:
from mlflow.pytorch import pickle_module

In [None]:
pickle_module

In [None]:
import sys
sys.path.append('../../scripts/examples/sector-pl/')
import model

In [None]:
torch.load('/Users/stefano/Downloads/model.pth', pickle_module=pickle_module)

In [None]:
import mlflow
import torch
logged_model = 's3://deep-mlflow-artifact/2/9f216acf38d54ff6b185441a0f80e8b7/artifacts/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pytorch.load_model(logged_model, map_location=torch.device('cpu'))

In [None]:
from torch.utils.data import Dataset 
import pandas as pd

class SectorsDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.excerpt_text = dataframe["excerpt"].tolist() if dataframe is not None else None
        self.max_len = max_len

    def encode_example(self, excerpt_text: str, index=None, as_batch: bool = False):
        inputs = self.tokenizer(
            excerpt_text,
            None,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding="max_length",
            return_token_type_ids=True,
        )
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]
        token_type_ids = inputs["token_type_ids"]

        encoded = {
            "ids": torch.tensor(ids, dtype=torch.long),
            "mask": torch.tensor(mask, dtype=torch.long),
            "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long),
        }
        if as_batch:
            return {
                "ids": encoded["ids"].unsqueeze(0),
                "mask": encoded["mask"].unsqueeze(0),
                "token_type_ids": encoded["ids"].unsqueeze(0),
            }
        return encoded

    def __len__(self):
        return len(self.excerpt_text)

    def __getitem__(self, index):
        excerpt_text = str(self.excerpt_text[index])
        return self.encode_example(excerpt_text, index)

In [None]:
model_name = 'sentence-transformers/paraphrase-mpnet-base-v2'
data = pd.DataFrame({
    'excerpt': ['hello, how', 'how, hello']
})
tokenizer = AutoTokenizer.from_pretrained(model_name)
dataset = SectorsDataset(data, tokenizer, 200)
loaded_model.cpu()


In [None]:
for batch in dataloader:
    preds = loaded_model.forward(batch)

In [None]:
a = [preds, preds]

In [None]:
list(torch.cat(a).argmax(1).numpy())

In [None]:
p

In [None]:
preds.shape

In [None]:
preds.argmax(1)

In [None]:
loaded_model.predict(pd.DataFrame({'data': []}))