In [None]:
#run this code if you are setting up your environment here for the first time
pip install sagemaker
pip install transformers

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()

sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")



In [None]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
  "huggingface",
  version="2.3.1"
)

# print ecr image uri
print(f"llm image uri: {llm_image}")



In [None]:
import json
from sagemaker.huggingface import HuggingFaceModel

# sagemaker config
instance_type = "ml.g6.48xlarge"
number_of_gpu = 8
health_check_timeout = 900

# Define Model and Endpoint configuration parameter
config = {
  'HF_MODEL_ID': "meta-llama/Llama-3.3-70B-Instruct", 
  'SM_NUM_GPUS': json.dumps(number_of_gpu), # Number of GPU used per replica
  'HUGGING_FACE_HUB_TOKEN': "INSERT_YOUR_TOKEN_HERE"
}

# create HuggingFaceModel with the image uri
llm_model = HuggingFaceModel(
  role=role,
  image_uri=llm_image,
  env=config
)

In [None]:
# Deploy model to an endpoint
llm = llm_model.deploy(
  initial_instance_count=1,
  instance_type=instance_type,
  container_startup_health_check_timeout=health_check_timeout, # up to 15 minutes to be able to load the model, otherwise it times out
)

In [None]:
from transformers import AutoTokenizer
model_id = 'meta-llama/Llama-3.3-70B-Instruct'
tokenizer = AutoTokenizer.from_pretrained(model_id, token = 'INSERT_YOUR_TOKEN_HERE')


In [None]:
import pandas as pd
reviews_df = pd.read_csv('reviews_list.csv')


In [None]:
# Function to clean text by removing non-ASCII characters
def clean_text(text):
    return ''.join([char if ord(char) < 128 else ' ' for char in text])

# Apply the cleaning function to the 'Comment' column 
reviews_df['Comment'] = reviews_df['Comment'].astype(str).apply(clean_text)


In [None]:
import numpy as np
# Remove rows with empty, NaN, or whitespace-only comments
reviews_df['Comment'] = reviews_df['Comment'].replace('nan', np.nan)
reviews_df = reviews_df.dropna(subset=['Comment'])
reviews_df = reviews_df[reviews_df['Comment'].str.strip() != '']
reviews_df

In [None]:
# Filter data for detractor scores (0-6) for each month
detractors=reviews_df[(reviews_df['Score'] <= 6)]

In [None]:
detractors = detractors.reset_index(drop=True)
detractors['llama_system_label'] = ''

In [None]:
output_list = []
for i in range(len(detractors)):
    if i%10 == 0:
        print(i)

    review = detractors.loc[i,'Comment']

    chat = [
    {
    "role": "system",
    "content": """You are a helpful AI model that categorizes customer reviews based on predefined labels related to a retail shoe company. 
    The company sells shoes through both e-commerce and physical stores, employs remote customer service agents and in-store retail workers, and provides an AI chatbot on its website.
    
    Your job is to analyze a given review and output the appropriate label(s) that best describe the customer's issue. Follow these guidelines:
    - Only output the label(s) as specified, comma-separated, and in double quotes.
    - If multiple labels apply, separate them with a comma.
    - If no label applies, output "other".
    
    Possible labels:
    "Delayed shipping" : shipping is delayed.
    "Item lost": item was lost during delivery.
    "Expedited shipping not on time": customer chose expedited shipping, but the item did not arrive on time.
    "No international": customer is upset that there is no international shipping.
    "Delay from order to start of ship": customer is upset that it takes too long for the product to start shipping after the order has been completed.
    "Shipping Carrier (USPS, UPS, FedEx, DHL)": customer is experiencing issues with one or more of these shipping carriers. Note that if this label applies, output the entire label name "Shipping Carrier (USPS, UPS, FedEx, DHL)".
    "Customer service can't resolve shipping": customer service is unable to resolve a shipping issue.
    "Negative store experience": customer had a negative in-store experience.
    "Unable to update address": customer is unable to update address.
    "Unable to honor discount codes in an exchange": customer had bought a discounted product, then later tried making an exchange to a different product, but the original discount wasn't rolled over to the new product.
    "Remote customer service agent issues, rude, unhelpful": a remote customer service agent was rude, unhelpful, or problematic. Note this does not apply to the in-store retail workers!
    "Chat bot issue": issue with the site AI chatbot.
    "Issues using $20 discount": customer has issues using the $20 discount.
    "Unable to cancel": customer is unable to cancel their order.
    "Item went out of stock": item is out of stock.
    "Return and exchange policies": customer has an issue with the return or exchange policies.
    "Final sale": customer has an issue with the special discount known as 'final sale'.
    "Sizing different from recommendation": the sizing of a product is different from what was suggested.
    "Size inconsistency between styles": the sizing is inconsistent between different styles.
    "Size inconsistency within style": the sizing is inconsistent between products of the same style.
    "Size inconsistency with other brands": the sizing is different than what other brands have.
    "Uncomfortable": the product is uncomfortable.
    "No support": the shoe has insufficient support for the foot.
    "No wide option": customer is upset that there is no wide option for the shoe.
    "Gift card hard to find": customer can't locate the gift card.
    "Can't send gift card to recipient": customer can't send a gift card to someone else.
    "Can't print gift card": customer can't print gift card.
    "Shoes arrive worn": the shoes arrive worn.
    "Incorrect order arrives": the wrong product arrives.
    "Damaged box on arrival": the delivery box is damaged upon arrival.
    "Shoes falling apart": the shoes get worn out too quickly.
    "Retains odor": the product retains too much odor.
    "Quality declined": the quality of the company's products has declined over time.
    "Fabric too thin": the fabric of the product is too thin or fragile.
    "Survey sent before order arrives": the survey is sent before the order arrives.
    "Shrinks in wash": the product shrinks excessively in the wash.
    "Stains don't come out": the stains don't come out.
    "Website not user friendly": the website is not user friendly.
    "Can't log in": customer is unable to log in to the website.
    "Difficult to verify teacher / medical professional discount": customer can't use their SheerID / teacher / medical professional discount.
    "Unsubscribed still receiving emails": customer has tried to unsubscribe, but is still receiving emails.
    "Too many emails": customer receives too many emails.
    "Overpriced": customer believes the products are overpriced.
    "Unattractive, matronly": customer believes the products are not attractive or are too matronly.
    "Limited color choices" : customer does not like the limited color choices.
    "Not environmentally friendly": customer is upset that the products are not environmentally friendly. 

    Notes:
    The "Remote customer service agent issues, rude, unhelpful" applies only to remote customer service agents, not the people who work in the store!

    Some examples:
    review: "The shoes were too tight even though the sizing chart recommended my size. I would have expected better given the price"
    answer: "Sizing different from recommendation", "Overpriced"
    
    review: "Have not received yet..... 2 weeks.... what's up with that???"
    answer: "Delayed shipping"
    
    review: "I placed an order 3 weeks ago, but the shoe never started shipping! What's going on??"
    answer: "Delay from order to start of ship"
    
    review: "The shoe gave me terrible blisters after a bit of wear. Unfortunately, I don't think I can return them anymore now."
    answer: "Uncomfortable", "Return and exchange policies"

    review: "The shoe has a weird looking design. Disappointing"
    answer: "Unattractive, matronly"
    """
    },
    {
    "role": "user",
    "content": f"""Review: "{review}"
    Assigned Labels:"""
    }
    ]
    
    tokenizer.use_default_system_prompt = False
    formatted_prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    parameters = {
    "inputs": formatted_prompt,
    "parameters": {
    "max_new_tokens": 200
    }
    }
    response1 = llm.predict(parameters)
    detractors.loc[i,'llama_system_label'] = response1[0]['generated_text'][len(formatted_prompt):]
    

In [None]:
detractors.to_csv("assigned_labels.csv", index=False)

In [None]:
#IMPORTANT: when you are done, run this code to remove your endpoint, so the costs don't accumulate!!!
llm.delete_model()
llm.delete_endpoint()
#ALSO: make sure to delete the kernel too. To do that, go to far left hand pane, click on the "Running Terminals and Kernels" tab, and click "Shut Down All" for the Kernels section