In [2]:
!pip3 install -U -q sagemaker boto3 langchain streamlit ipykernel "ai21[AWS]"

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 5.3.3 requires pyqt5<5.16, which is not installed.
spyder 5.3.3 requires pyqtwebengine<5.16, which is not installed.
awscli 1.29.14 requires botocore==1.31.14, but you have botocore 1.31.20 which is incompatible.
distributed 2022.7.0 requires tornado<6.2,>=6.0.3, but you have tornado 6.3.2 which is incompatible.
jupyterlab 3.4.4 requires jupyter-server~=1.16, but you have jupyter-server 2.7.0 which is incompatible.
notebook 6.5.5 requires jupyter-client<8,>=5.3.4, but you have jupyter-client 8.3.0 which is incompatible.
notebook 6.5.5 requires pyzmq<25,>=17, but you have pyzmq 25.1.0 which is incompatible.
panel 0.13.1 requires bokeh<2.5.0,>=2.4.0, but you have bokeh 3.2.1 which is incompatible.
sagemaker-datawrangler 0.4.3 requires sagemaker-data-insights==0.4.0, but you have sagemaker-data-insights 0

In [5]:
import os
import boto3
import json as json

import sagemaker
from sagemaker import ModelPackage
from sagemaker.utils import name_from_base
from sagemaker.model import Model
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.predictor import Predictor
from sagemaker.jumpstart.model import JumpStartModel
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri




In [4]:
boto3_session = boto3.session.Session()
sess = sagemaker.session.Session(boto_session=boto3_session)
aws_role = sess.get_caller_identity_arn()
sagemaker_session_bucket = sess.default_bucket()
sagemaker_client = boto3.client('sagemaker', region_name=boto3_session.region_name)
print(boto3_session.region_name)

us-west-2


### Deploy the encoder model

In [7]:
def deploy_encoder_llm(sess, aws_role, encoder_model_id = "huggingface-textembedding-gpt-j-6b"):
    encoder_model_version = "*"
    encoder_endpoint_name = f'{encoder_model_id}-endpoint'
    encoder_model_instance_type = "ml.g5.12xlarge"

    encoder_model = JumpStartModel(model_id=encoder_model_id,
                                role=aws_role,
                                sagemaker_session=sess)
    ## deploy the embedding model 

    model_predictor_inference = encoder_model.deploy(
            initial_instance_count=1,
            instance_type=encoder_model_instance_type,
            endpoint_name=encoder_endpoint_name,
            model_data_download_timeout=3600,
            container_startup_health_check_timeout=600,
        )
    print(sess.boto_region_name, encoder_endpoint_name)
    return model_predictor_inference

In [9]:
DEPLOYED=False #change it to True after model has been deployed
if not DEPLOYED:
    # if set to false, Deploy the model
    model_predictor_inference = deploy_encoder_llm(sess, 
                                               aws_role, 
                                               encoder_model_id='huggingface-textembedding-gpt-j-6b')
else:
    # if set to true, delete the deployed model
    encoder_model_id = 'huggingface-textembedding-gpt-j-6b'
    encoder_endpoint_name = f"{encoder_model_id}-endpoint"
    encoder_model = JumpStartModel(model_id=encoder_model_id,
                            role=aws_role,
                            sagemaker_session=sess)
    encoder_model.sagemaker_session.delete_endpoint(encoder_endpoint_name)
    encoder_model.sagemaker_session.delete_endpoint_config(encoder_endpoint_name)

------------!us-west-2 huggingface-textembedding-gpt-j-6b-endpoint


### deploy the Causal LLM that will provide contextual answers 
#### We will use AI21's Contextual answer model 

In [10]:
!pip3 install -U -q "ai21[AWS]"

[0m

In [11]:
import ai21

In [17]:

model_name = "contextual-answers"
endpoint_name = f'{model_name}-endpoint'
# Get the updated ARN
model_package_arn = ai21.SageMaker.get_model_package_arn(model_name=model_name, 
                                                         region=boto3_session.region_name)

real_time_inference_instance_type = (
      "ml.g5.48xlarge"   # Cheaper and faster - up to 10k characters 
)

In [None]:
DEPLOYED=False
if not DEPLOYED:
    # if False, create a deployable model from the model package.
    model = ModelPackage(role=aws_role, 
                        model_package_arn=model_package_arn, 
                        sagemaker_session=sess
    )
    # Deploy the model
    predictor = model.deploy(1, real_time_inference_instance_type, 
                            endpoint_name=endpoint_name, 
                            model_data_download_timeout=3600,
                            container_startup_health_check_timeout=600,
                            )
else:
    # if set to True, delete endpoints and clean up
    model_package_arn = ai21.SageMaker.get_model_package_arn(model_name=model_name, 
                                                            region=boto3_session.region_name)
    model = ModelPackage(
        role=aws_role, model_package_arn=model_package_arn, sagemaker_session=sess
    )
    model.sagemaker_session.delete_endpoint(endpoint_name)
    model.sagemaker_session.delete_endpoint_config(endpoint_name)
    print('Clean up done!')

----------------