In [1]:
%pip install -r code/requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
# %pip install datasets

#### Before we can create our custom sagemaker endpoint we need to tar all the necessary files in an s3-location (For details see: https://huggingface.co/docs/sagemaker/inference)

In [3]:
import os
os.chdir("/home/ec2-user/SageMaker/")

In [22]:
%%sh
# aws s3 cp s3://sagemaker-eu-west-1-211125449279/LLM-Textmarker-mistralai-Mistral-7B-Instruct-v0-2-16-43-42/checkpoints/model_files ./tar_folder --recursive
# cp ./LLM_Highlighter/inference/code ./tar_folder/code --recursive
tar zcvf model.tar.gz -C ./tar_folder/ .

./
./adapter_model.safetensors
./code/
./code/inference.py
./code/.ipynb_checkpoints/
./code/.ipynb_checkpoints/inference-checkpoint.py
./code/.ipynb_checkpoints/requirements-checkpoint.txt
./code/requirements.txt
./adapter_config.json


In [23]:
!aws s3 cp model.tar.gz s3://sagemaker-eu-west-1-211125449279/LLM-Textmarker-mistralai-Mistral-7B-Instruct-v0-2-16-43-42/checkpoints/custom_inference/

upload: ./model.tar.gz to s3://sagemaker-eu-west-1-211125449279/LLM-Textmarker-mistralai-Mistral-7B-Instruct-v0-2-16-43-42/checkpoints/custom_inference/model.tar.gz


In [24]:
s3_location = "s3://sagemaker-eu-west-1-211125449279/LLM-Textmarker-mistralai-Mistral-7B-Instruct-v0-2-16-43-42/checkpoints/custom_inference/model.tar.gz"

In [25]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::211125449279:role/service-role/AmazonSageMaker-ExecutionRole-20240307T175168
sagemaker bucket: sagemaker-eu-west-1-211125449279
sagemaker session region: eu-west-1


#### Create the actual Sagemaker endpoint (only reachable from within AWS)

In [26]:
from sagemaker.huggingface.model import HuggingFaceModel
from dotenv import load_dotenv

load_dotenv("LLM_Highlighter/env.txt")
hf_token = os.getenv("HF_TOKEN")
env_dict = {
    "HF_TOKEN": hf_token
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=s3_location,       # path to your model and script
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.37",  # transformers version used
   pytorch_version="2.1",        # pytorch version used
   env=env_dict, # env variables
   py_version='py310',            # python version used
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.2xlarge"
    )

----------!

In [27]:
from datasets import load_dataset

# dataset used
data_path = 'MichaelAI23/English_CVs'

data = load_dataset(data_path)

train_val = data["train"].train_test_split(
    test_size=100, shuffle=True, seed=42
)

train_data = train_val["train"]
val_data = train_val["test"]

In [28]:
val_data[0]["overall"]

'As a Business Analyst at TechSolutions Inc., I was responsible for analyzing market trends and providing strategic recommendations to drive business growth. I started my role in January 2014 after graduating in 2012 and continued until December 2018, where I successfully implemented data-driven solutions that improved operational efficiency.\n- Published research paper on "The Impact of Data Analytics in Business Decision Making" in Journal of Business Intelligence, 2012\n- Co-authored a study on "Predictive Modeling for Customer Churn Analysis" in International Conference on Business Analytics, 2013\n- Presented findings on "Optimizing Supply Chain Management through Big Data Analysis" at the Annual Meeting of Operations Research Society, 2014\n- Contributed to a book chapter titled "Emerging Trends in Business Forecasting Techniques" published by Springer, 2015\n- Received Best Paper Award for research on "Machine Learning Applications in Marketing Strategy Development" at the Acade

In [29]:
val_data[32]["overall"]

"Experienced Data Engineer with a strong foundation in designing and implementing scalable data pipelines for complex analytics projects. Proficient in utilizing cutting-edge technologies such as Apache Spark, Hadoop, and Kafka to optimize data processing and storage. Skilled in collaborating with cross-functional teams to deliver innovative solutions that drive business growth and enhance decision-making capabilities.\n- I enjoy skiing in the winter and hitting the slopes whenever I get the chance.\n- Photography is a passion of mine, capturing moments and scenes that inspire me.\n- Cooking is a relaxing hobby for me, experimenting with new recipes and flavors in the kitchen.\n- Traveling to new destinations and immersing myself in different cultures is something I love to do in my free time.\n- Playing the guitar helps me unwind and express myself creatively outside of work.\n- Mail: ajamirez@peninsula.com\n- Address: 108 E Superior St, Chicago, IL 60611, USA\n- Phone: +1 (312) 337-2

In [36]:
import time
start_time = time.time()
data = {
    "inputs": val_data[32]["overall"],
    "parameters": {"max_new_tokens": 128, "temperature": 0}
}

res = predictor.predict(data=data)
print("--- %s seconds ---" % (time.time() - start_time))

print(res)

--- 3.286539077758789 seconds ---
[{'generated_text': " {'pe': {'s': '- Mail', 'e': '/alessandrojamirez'}, 'ed': {'s': 'A bachelor', 'e': ' completed.'}, 'wo': {'s': 'Data Engineer, 2014', 'e': ' datasets.'}, 'sk': {'s': 'Experienced', 'e': 'capabilities.'}}"}]


In [37]:
# delete endpoint
predictor.delete_endpoint()