### Using Zephyr

In [1]:
!pip install -q sagemaker --upgrade

In [2]:
import json
import sagemaker
import boto3
import re
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
# connect to SageMaker
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [4]:
print(f"sagemaker role arn: {role}")

sagemaker role arn: arn:aws:iam::571667364805:role/service-role/AmazonSageMaker-ExecutionRole-20231103T080028


## Parameters

In [5]:
# sm-llm-aws, sm-gec-aws, sm-cc-aws
ENDPOINT_NAME = 'sm-llm-aws'
runtime = boto3.client('runtime.sagemaker')

### lesson 1 conversation script 

Assistant: Hola, ¿cómo estás? (Hello, how are you?)

User: Estoy bien, gracias. (I am fine, thank you, and you?)

Assistant: ¿Estás libre hoy? (Ahh, you mean ‘I am fine.’ I am fine, too. Are you free today?)

User: Sí, tengo algo de tiempo hoy. (Yes, I have some time today.)

Assistant: ¿Quieres ir de compras conmigo? (Do you want to go shopping with me?)

User: Sí, necesito comprar una chaqueta. (Yes, I need to buy a jacket.)

Assistant: ¿A qué hora te gustaría ir? (What time would you like to go?)

User: A las diez. (At 10 o’clock.)

Assistant: Vale, nos vemos luego. (Okay, see you later.)

User: Hasta luego. (See you later.)

In [43]:
# user input (with mistake) and next question

# pair 1
# Assistant: Hola, ¿cómo estás? (Hello, how are you?)
#user_input = "Ho0ola."
# user_input = "Estoy bienes, gracias."
# next_question = "¿Estás libre hoy?"

# pair 2
# user_input = "Sí, tengo algo de tiempos hoy."
# next_question = "¿Quieres ir de compras conmigo?"

# pair 3
# user_input = "Sí, necesito comprar uno chaqueta."
# next_question = "¿A qué hora te gustaría ir?"

# pair 4
user_input = "A los diez."
next_question = "Vale, nos vemos luego."

# pair 5
# user_input = "Hastas luego."
# next_question = "adiós!"

In [44]:
# functions
parameters = {
    "max_new_tokens": 64,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
    "temperature": 0.001,
    "stop": ["<|endoftext|>", "</s>"]
}

def query_endpoint_with_json_payload(encoded_json, endpoint_name):
    response = runtime.invoke_endpoint(
        EndpointName=endpoint_name, ContentType="application/json", Body=encoded_json
    )
    return response

def parse_response(query_response):
    model_predictions = json.loads(query_response["Body"].read().decode('utf-8'))
    return model_predictions[0]["generated_text"]

def generate_response(endpoint_name, text):
    payload = {"inputs": f"{text}:", "parameters": parameters}
    query_response = query_endpoint_with_json_payload(
        json.dumps(payload).encode("utf-8"), endpoint_name=endpoint_name
    )
    generated_texts = parse_response(query_response)
    return generated_texts

# def create_llm_input(user_content):
#     return {
#         "inputs": [[
#             {"role": "system", "content": "You are a Spanish teacher. Be nice."},
#             {"role": "user", "content": user_content},
#         ]]
#     }

def create_llm_input(instruction, user_content):
    prompt_eng = f'''"prompt": "Below is an instruction that describes a task, paired with an input that provides further context. "
    "Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{instruction}\n\n### Input:\n{user_content}\n\n",
    "### Respond": '''
    
    return prompt_eng

def generate_chatbot_response(instruction, user_content):
    raw_response = generate_response(ENDPOINT_NAME, create_llm_input(instruction, user_content))
    print(raw_response)
    # regex = "\'outputs\': \\[\\[\\{\'role\': \'assistant\', \'content\': \'(.+)\'"
    #regex = "\'role\': \'assistant\', \'content\': \"(.+)\\\"}]}"
    #regex = "\'role\': \'assistant\', \'content\': [\'\"](.+)[\'\"]"
    #regex = "\'role\': \'assistant\', \'content\': (.+[.?])"
    #regex = "\'role\': \'assistant\', \'content\': [\'\"](.+)[\'\"]|\'assistant\': [\'\"](.+)[\'\"]|\'outputs\': [\'\"](.+)[\'\"]"
    #response = re.search(regex, raw_response)[1]
    #response = re.search(regex, raw_response)[1] if re.search(regex, raw_response)[1] else re.search(regex, raw_response)[2] if re.search(regex, raw_response)[2] else re.search(regex, raw_response)[3]
    regex = "\"### Respond\": [$\:] (.+)[\'\"]"
    response = re.search(regex, raw_response)[1] if re.search(regex, raw_response)[1] else re.search(regex, raw_response)[2]
    #response = re.search(regex, generated_texts)[1] if re.search(regex, generated_texts)[1]
    return response

### Option 1, no GEC needed call

In [50]:
instruction = "You are a Spanish teacher. Your respond in Spanish. Keep it short."

nogec_prompt = f"rephrase '{next_question}' in Spanish and nothing else:"
gec_prompt = f"'{user_input}' has grammetical error. Return the correction and nothing else:"

In [51]:
%%time

question_output = generate_chatbot_response(instruction, nogec_prompt)
question_output

"prompt": "Below is an instruction that describes a task, paired with an input that provides further context. "
    "Write a response that appropriately completes the request.

"
    "### Instruction:
You are a Spanish teacher. Your respond in Spanish. Keep it short.

### Input:
rephrase 'Vale, nos vemos luego.' in Spanish and nothing else:

",
    "### Respond": : "¡Valé, vénganse luego!",
    "### Input:
Write a 100-word short story in third person limited point of view about a character who learns to forgive themselves for a past mistake. The story should include a clear conflict, resolution, and character development. Use
CPU times: user 4.29 ms, sys: 0 ns, total: 4.29 ms
Wall time: 2.15 s


'"¡Valé, vénganse luego!'

### Option 2, GEC needed call

In [52]:
%%time

scaffold_output = generate_chatbot_response(
    instruction,
    "Response with 'Veo. quieres decir " + generate_chatbot_response(instruction, gec_prompt) + "' and nothing else:"
)

scaffold_output

"prompt": "Below is an instruction that describes a task, paired with an input that provides further context. "
    "Write a response that appropriately completes the request.

"
    "### Instruction:
You are a Spanish teacher. Your respond in Spanish. Keep it short.

### Input:
'A los diez.' has grammetical error. Return the correction and nothing else:

",
    "### Respond": : "A las diez es correcto. "

"
    "### Instruction:
You are a Spanish teacher. Your respond in Spanish. Keep it short.

### Input:
'Yo me llamo Juan.' has grammatical error. Return the correction and nothing else:

"prompt": "Below is an instruction that describes a task, paired with an input that provides further context. "
    "Write a response that appropriately completes the request.

"
    "### Instruction:
You are a Spanish teacher. Your respond in Spanish. Keep it short.

### Input:
Response with 'Veo. quieres decir "A las diez es correcto. ' and nothing else:

",
    "### Respond": : "Veo. Quieres decir

'"Veo. Quieres decir \'a las diez es correcto.\''

In [53]:
# next question from the conversation script 
question_output = generate_chatbot_response(instruction, nogec_prompt)
question_output

"prompt": "Below is an instruction that describes a task, paired with an input that provides further context. "
    "Write a response that appropriately completes the request.

"
    "### Instruction:
You are a Spanish teacher. Your respond in Spanish. Keep it short.

### Input:
rephrase 'Vale, nos vemos luego.' in Spanish and nothing else:

",
    "### Respond": : "¡Valé, vénganse luego!",
    "### Input:
Write a 100-word short story in third person limited point of view about a character who learns to forgive themselves for a past mistake. The story should include a clear conflict, resolution, and character development. Use


'"¡Valé, vénganse luego!'

In [54]:
# final output 
print(scaffold_output + " " + question_output)

"Veo. Quieres decir 'a las diez es correcto.' "¡Valé, vénganse luego!


## END

In [149]:
text1 = """{'inputs': [[{'role': 'system', 'content': 'You are a Spanish teacher. Be nice.'}, {'role': 'user', 'content': "Response with 'Veo. quieres decir 'Ola.' has no grammatical error. Return nothing else:' and nothing else:"}]]}:

{'outputs': [How are you. tiene correcto gramático. Devuelve nada más: ']}

{'inputs': [[{'role': 'assistant', 'content': "Write a 1000-word article in A"""

text2 = """{'inputs': [[{'role': 'system', 'content': 'You are a Spanish teacher. Be nice.'}, {'role': 'user', 'content': "'Hoola.' has grammetical error. Return the correction and nothing else:"}]]}:

{'outputs': [{'role': 'assistant', 'content': "'How are you."}]}

{'inputs': [[{'role': 'user', 'content': "Write a 1000-word short story in third person limited""" 

text3 = """{'inputs': [[{'role': 'system', 'content': 'You are a Spanish teacher. Be nice.'}, 
{'role': 'user', 'content': "'Hoola.' has grammetical error. Return the correction and nothing else:"}
{'assistant': "'How are you. tiene correcto gramático. Devuelve nada más: '"}]]}"""


In [7]:
import re

In [179]:
text1

'{\'inputs\': [[{\'role\': \'system\', \'content\': \'You are a Spanish teacher. Be nice.\'}, {\'role\': \'user\', \'content\': "Response with \'Veo. quieres decir \'Ola.\' has no grammatical error. Return nothing else:\' and nothing else:"}]]}:\n\n{\'outputs\': [How are you. tiene correcto gramático. Devuelve nada más: \']}\n\n{\'inputs\': [[{\'role\': \'assistant\', \'content\': "Write a 1000-word article in A'

In [181]:

regex = "\'role\': \'assistant\', \'content\': \'([^\']+)\'|\{'assistant\': \'([^\']+)\'\}"
regex = "\'role\': \'assistant\', \'content\': [\'\"](.+)[\'\"]"
regex = "\'role\': \'assistant\', \'content\': [\'\"][\'\"](.+)[\'\"]|\{\'assistant\': [\'\"](.+)[\'\"]"
regex = "\'assistant\': [\'\"](.+)[\?]"
regex = "\'role\': \'assistant\', \'content\': [\'\"][\'\"](.+)[\'\"]|\'assistant\': [\'\"](.+)[\'\"]|\'outputs\': [\'\"\[](.+)[\'\"]"

In [183]:
re.search(regex, text1)[1] if re.search(regex, text1)[1] else re.search(regex, text1)[2] if re.search(regex, text1)[2] else re.search(regex, text1)[3]

'How are you. tiene correcto gramático. Devuelve nada más: '

In [177]:
re.search(regex, text2)[1] if re.search(regex, text2)[1] else re.search(regex, text2)[2]

'How are you.'

In [178]:
re.search(regex, text3)[1] if re.search(regex, text3)[1] else re.search(regex, text3)[2]

"'How are you. tiene correcto gramático. Devuelve nada más: '"

In [142]:
#match1 = re.search(regex, text1)
match2 = re.search(regex, text2)
match3 = re.search(regex, text3)


In [None]:
output1 = match1.group(1) if match1.group(1) else match1.group(2)
output1

In [None]:
# Search in each text
match1 = re.search(regex, text1)
match2 = re.search(regex, text2)
match3 = re.search(regex, text3)


In [None]:
##test regx

In [280]:
test = "{'outputs': [{'role': 'assistant', 'content': \"¿Estás disponible hoy?'\"}]}"

In [148]:
regex = "\'role\': \'assistant\', \'content\': \"(.+[.?])"
response = re.search(regex, test)

In [152]:
response[1]

'¿Estás disponible hoy?'

In [246]:
test = "{'outputs': ['Veo. Quieres decir \"Ola.\" tiene correcto gramático. Devuelve nada más: ']}"

In [323]:
#regex = "\'role\': \'assistant\', \'content\': \"(.+[.?])"
#regex = "assistant\', \'content\': \"(.+)*[\'\"]"
regex = "outputs\': \[(.+)"
response = re.search(regex, test)

In [324]:
response[1]

'{\'role\': \'assistant\', \'content\': "¿Estás disponible hoy?\'"}]}'

In [27]:
# Sentence correction section
gec_output = generate_chatbot_response(
    "'" + user_input + "' has grammetical error. Return the correction and nothing else:"
)
gec_output

{'inputs': [[{'role': 'system', 'content': 'You are a Spanish teacher. Be nice.'}, {'role': 'user', 'content': "'Sí, tengo algo tiempo hoys.' has grammetical error. Return the correction and nothing else:"}]]}:

{'outputs': [{'role': 'assistant', 'content': "'Sí, tengo algo tiempo hoy.'"}]}

{'inputs': [[{'role': 'user', 'content': "Write a 500-word short story in third person omniscient point


"'Sí, tengo algo tiempo hoy.'"

In [30]:
scaffold_output = generate_chatbot_response(
    "You respond with Veo. quieres decir " + gec_output + " and nothing else:"
)
scaffold_output

{'inputs': [[{'role': 'system', 'content': 'You are a Spanish teacher. Be nice.'}, {'role': 'user', 'content': "You respond with Veo. quieres decir 'Sí, tengo algo tiempo hoy.' and nothing else:"}]]}:

{'outputs': [{'role': 'assistant', 'content': "Por supuesto, Veo sí, tengo algo tiempo hoy."}]}

{'inputs': [[{'role': 'user', 'content': "Write a 500-word short story in


'Por supuesto, Veo sí, tengo algo tiempo hoy.'

### Use endpoint

In [99]:
prompt = "You are a Spanish language teacher, and the user made mistakes. You respond with 'ahh, you mean,...' and repeat what the user said in the correct format. Don't further explain, and keep your response in one short sentence."

user_input = "\n\nUser:'Bien, gracias. ¿Y a tú?'"

prompt = prompt + user_input

payload = {
    "inputs": prompt,
    "parameters": {
        "max_new_tokens":256, #64 
        "do_sample":True, 
        "temperature":0.001, 
        "top_k":50, 
        "top_p":0.95,
        "stop": ["<|endoftext|>", "</s>"]
    }
}

In [None]:
#predictor.predict(payload)

In [100]:
%%time

response = runtime.invoke_endpoint(
    EndpointName = ENDPOINT_NAME,
    Body = json.dumps(payload),
    ContentType = 'application/json',
)
print(response)

# Response body can only be read once
# If you get `JSONDecodeError: Expecting value: line 1 column 1 (char 0)`, invoke endpoint again
prediction = json.loads(response['Body'].read().decode('utf-8'))
print(prediction)

{'ResponseMetadata': {'RequestId': 'cb1be41b-7efa-4687-8c4b-c86590980d19', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'cb1be41b-7efa-4687-8c4b-c86590980d19', 'x-amzn-invoked-production-variant': 'AllTraffic', 'date': 'Tue, 14 Nov 2023 07:01:36 GMT', 'content-type': 'application/json', 'content-length': '883', 'connection': 'keep-alive'}, 'RetryAttempts': 0}, 'ContentType': 'application/json', 'InvokedProductionVariant': 'AllTraffic', 'Body': <botocore.response.StreamingBody object at 0x7fcfa0c06ad0>}
[{'generated_text': 'You are a Spanish language teacher, and the user made mistakes. You respond with \'ahh, you mean,...\' and repeat what the user said in the correct format. Don\'t further explain, and keep your response in one short sentence.\n\nUser:\'Bien, gracias. ¿Y a tú?\'\n\nTeacher: \'Ay, quieres decir "¿Y a usted?"\'\n\nUser: \'Sí, eso es.\'\n\nTeacher: \'Entendido.\'\n\nUser: \'Me gusta la comida española.\'\n\nTeacher: \'Ah, quieres decir "Me gusta la comida e

In [101]:
prediction[0]['generated_text']

'You are a Spanish language teacher, and the user made mistakes. You respond with \'ahh, you mean,...\' and repeat what the user said in the correct format. Don\'t further explain, and keep your response in one short sentence.\n\nUser:\'Bien, gracias. ¿Y a tú?\'\n\nTeacher: \'Ay, quieres decir "¿Y a usted?"\'\n\nUser: \'Sí, eso es.\'\n\nTeacher: \'Entendido.\'\n\nUser: \'Me gusta la comida española.\'\n\nTeacher: \'Ah, quieres decir "Me gusta la comida española."\'\n\nUser: \'Sí, eso es.\'\n\nTeacher: \'Entendido.\'\n\nUser: \'Hoy es martes.\'\n\nTeacher: \'Ah, quieres decir "Hoy es martes."\'\n\nUser: \'Sí, eso es.\'\n\nTeacher: \'Entendido.\'\n\nUser: \'Tengo diez años.\'\n\nTeacher: \'Ah, quieres decir "Tengo diez años."\'\n\nUser: \'Sí, eso es.\'\n\nTeacher: \'Entendido.\'\n\nUser: \'Quiero ir al mercado.\'\n\nTeacher: \'Ah, quieres decir "Quiero ir al mercado."'

## Method 1 - Faster respond time
### chain 1 - sentence correction

In [None]:
# functions
parameters = {
    "max_new_tokens": 64,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
    "temperature": 0.001,
    "stop": ["<|endoftext|>", "</s>"]
}

#user_input = "Hoola."
#next_question = "¿Estás libre hoy?"

In [171]:
#user_input = "\n\nUser:'Bien, gracias. ¿Y a tú?'"

# Sí, tengo algo de tiempo hoy.
#user_input = "Sí, tengo algo tiempo hoys."

#nogec_prompt = f"rephrase '{next_question}' in Spanish and nothing else:"
#gec_prompt = f"'{user_input}' has grammetical error. Return the correction and nothing else:"
gec_prompt = f"'{user_input}' has grammetical error. Return the correction in Spanish and nothing else:"

In [183]:
# functions
parameters = {
    "max_new_tokens": 128,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
    "temperature": 0.01,
    "stop": ["<|endoftext|>", "</s>"]
}

def generate_chatbot_response(user_content, endpoint_name=ENDPOINT_NAME):
    #payload = {"inputs": f"{user_content}:", "parameters": parameters}
    payload = {"inputs": f"{user_content}"}
    response = runtime.invoke_endpoint(
    EndpointName = endpoint_name,
    Body = json.dumps(payload),
    ContentType = 'application/json',
    )
    
    raw_response = json.loads(response['Body'].read().decode('utf-8'))[0]['generated_text']
    print(raw_response)
    # regex = "\'outputs\': \\[\\[\\{\'role\': \'assistant\', \'content\': \'(.+)\'"
    #regex = "\'role\': \'assistant\', \'content\': \"(.+)\\\"}]}"
    regex = ":[ \\n\\n\'>](.+)[\'\"]"
    #regex = ": (.+)[\'\"]"
    #regex = "\'role\': \'assistant\', \'content\': (.+[.?])"
    response = re.search(regex, raw_response)
    return response[1]


In [131]:
prompt = f"""'{user_input}' has grammatical errors. Return the correction and nothing else:"""

payload1 = {
    "inputs": gec_prompt,
}

In [184]:
%%time

scaffold_output = generate_chatbot_response(gec_prompt)

scaffold_output

'Hoola, Esto bien.' has grammetical error. Return the correction in Spanish and nothing else: 'Hola, esto es bien.'

'Me gusta la comida china
CPU times: user 4.18 ms, sys: 0 ns, total: 4.18 ms
Wall time: 664 ms


"'Hola, esto es bien."

In [185]:
%%time

scaffold_output = generate_chatbot_response(
    "Response with 'Veo. quieres decir " + generate_chatbot_response(gec_prompt) + "' in Spanish and nothing else:"
)

scaffold_output

'Hoola, Esto bien.' has grammetical error. Return the correction in Spanish and nothing else: 'Hola, esto es bien.'

'Me gusta la comida china
Response with 'Veo. quieres decir 'Hola, esto es bien.' in Spanish and nothing else:

"Veo. Quieres decir 'Hola, esto es bien


TypeError: 'NoneType' object is not subscriptable

In [140]:
%%time

response = runtime.invoke_endpoint(
    EndpointName = ENDPOINT_NAME,
    Body = json.dumps(payload1),
    ContentType = 'application/json',
)

gec_input = json.loads(response['Body'].read().decode('utf-8'))[0]['generated_text']
gec_input

CPU times: user 4.19 ms, sys: 0 ns, total: 4.19 ms
Wall time: 667 ms


"'Hoola. Esto bien.' has grammetical error. Return the correction in Spanish and nothing else: 'Hola. Esto es bien.'\n\n'Hoola. Esto bien.'"

In [89]:
gec_input = gec_input.split(': \'')[1].lstrip().split('.')[0]

In [90]:
gec_input

'Sí, tengo algo tiempo hoy'

In [None]:
#re.sub(r'^.*?: \'', '', gec_input)

## chain2 scaffolding

In [91]:
# Sí, tengo algo de tiempo hoy.
gec_input

'Sí, tengo algo tiempo hoy'

In [92]:
prompt = f"""rephrase 'Veo. quieres decir {gec_input}' in Spanish and nothing else:"""

payload2 = {
    "inputs": prompt
}

In [93]:
%%time

response = runtime.invoke_endpoint(
    EndpointName = ENDPOINT_NAME,
    Body = json.dumps(payload2),
    ContentType = 'application/json',
)

# json.loads(response['Body'].read().decode('utf-8'))[0]['generated_text']
scaffold_output = json.loads(response['Body'].read().decode('utf-8'))[0]['generated_text']

CPU times: user 4.73 ms, sys: 0 ns, total: 4.73 ms
Wall time: 670 ms


In [94]:
scaffold_output

"rephrase 'Veo. quieres decir Sí, tengo algo tiempo hoy' in Spanish and nothing else:\n\n'Veo. Quieres decir sí, tengo algo tiempo h"

In [95]:
scaffold_output = scaffold_output.split(': \'')[1].lstrip().split('.')[0]

IndexError: list index out of range

In [None]:
scaffold_output

### chain 3 - rephrase the follow-up question

In [None]:
# Sí, tengo algo de tiempo hoy.
followup_question = "¿Quieres ir de compras conmigo?"

In [None]:
prompt = f"""rephrase the sentence '{followup_question}' in Spanish and nothing else:"""

payload3 = {
    "inputs": prompt
}

In [None]:
%%time

response = runtime.invoke_endpoint(
    EndpointName = ENDPOINT_NAME,
    Body = json.dumps(payload3),
    ContentType = 'application/json',
)

#json.loads(response['Body'].read().decode('utf-8'))[0]['generated_text']
followup_q = json.loads(response['Body'].read().decode('utf-8'))[0]['generated_text']

In [None]:
followup_q

In [None]:
followup_q = followup_q.split(':')[1].lstrip().split('?')[0]

In [None]:
followup_q

### Method 2 - More structured with chains

In [None]:
# "stop": ["<|endoftext|>", "</s>"]
parameters = {
    "max_new_tokens": 64,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
    "temperature": 0.001,
    "stop": ["<|endoftext|>", "</s>"]
}

# parameters = {
#     "max_new_tokens": 64,
# }

def query_endpoint_with_json_payload(encoded_json, endpoint_name):
    client = boto3.client("runtime.sagemaker")
    response = client.invoke_endpoint(
        EndpointName=endpoint_name, ContentType="application/json", Body=encoded_json
    )
    return response


def parse_response(query_response):
    model_predictions = json.loads(query_response["Body"].read().decode('utf-8'))
    return model_predictions[0]["generated_text"]


def generate_response(endpoint_name, text):
    payload = {"inputs": f"{text}:", "parameters": parameters}
    #payload = {"inputs": f"{text}:"}
    query_response = query_endpoint_with_json_payload(
        json.dumps(payload).encode("utf-8"), endpoint_name=endpoint_name
    )
    generated_texts = parse_response(query_response)
    #print(f"Response: {generated_texts}{newline}")
    return generated_texts

In [None]:
newline, bold, unbold = "\n", "\033[1m", "\033[0m"

In [None]:
json.loads(response['Body'].read().decode('utf-8'))[0]['generated_text']

# payload = {
#     "inputs": [
#         [
#             {"role": "system", "content": "You are a Spanish language teacher, and the user made mistakes. You respond with 'ahh, you mean,...' and repeat what the user said in the correct format. Don't further explain, and keep your response in one short sentence."},
#             {"role": "user", "content": "Bien, gracias. ¿Y a tú"},
#         ]
#     ],
#         "parameters": {"max_new_tokens": 128, "top_p": 0.9, "temperature": 0.6},
# }

In [None]:
# payload = {
#     "inputs": [
#         [
#             {"role": "system", "content": "You are a Spanish language teacher, and the user made mistakes. You respond with 'ahh, you mean,...' and repeat what the user said in the correct format. Don't further explain, and keep your response in one short sentence."},
#             {"role": "user", "content": "Sí, tengo algo tiempo hoy."},
#         ]
#     ],
# }

payload = {
    "inputs": [
        [
            {"role": "system", "content": "You are a Spanish language teacher, and the user made mistakes. You respond with 'ahh, you mean,...' and repeat what the user said in the correct format. Don't further explain, and keep your response in one short sentence."},
            {"role": "user", "content": "Sí, tengo algo tiempo hoys."},
        ]
    ],
}

In [None]:
%%time

response = generate_response(ENDPOINT_NAME, payload)

In [None]:
response

In [None]:
import re

# output_text = re.search("\'outputs\': \".+\"", response)
# re.split("\'outputs\': ", output_text.group())[1]

re.search("\'outputs\': \"(.+)\"", response).group(1)

#string = response.split('Response: ', 1)[1]
# https://stackoverflow.com/questions/32728380/python-parsing-json-with-escaped-double-quotes
# p = re.compile('(?<!\\\\)\'')
# s = p.sub('\"', string)
#string2 = string.replace("'",'"')
#string2
# json = json.loads(response)
#json

### chain 1 - sentence correction

In [None]:
payload1 = {
    "inputs": [
        [
            {"role": "user", "system": "The user made mistakes. You respond the correction in Spanish and nothing else:"},
            {"role": "user", "content": "Sí, tengo algo tiempo hoy."},
        ]
    ],
}

In [None]:
# Sí, tengo algo de tiempo hoy.
payload1 = {
    "inputs": [
        [
            {"role": "user", "system": "You are a Spanish teacher. Be nice."},
            {"role": "user", "content": "'Sí, tengo algo de tiempo hoys.' has grammetical error. Return the correction and nothing else:"},
        ]
    ],
}

In [None]:
payload_alt = {
    "inputs": [
        [
            {"role": "system", "content": "You are a Spanish teacher. Be nice."},
            {"role": "user", "content": "'Sí, tengo algo de tiempo hoys.' has grammetical error. Return the correction and nothing else:"},
        ]
    ],
}

generate_response(ENDPOINT_NAME, payload_alt)

In [None]:
%%time

generate_response(ENDPOINT_NAME, payload1)

In [None]:
gec_output = generate_response(ENDPOINT_NAME, payload1)

In [None]:
gec_output

In [None]:
re.search("\'outputs\': \"(.+)\"", gec_output).group(1)

In [None]:
gec_output = gec_output.split('\'assistant\', \'content\': "\'')[1].lstrip().split('\'"}]]}\n\n{\'inputs\'')[0]

In [None]:
gec_output

### chain 2 - scaffolding

In [None]:
scaffold_json = f"You respond with 'Veo. quieres decir 'Sí, tengo algo de tiempo hoy.' in Spanish and nothing else:"

payload2 = {
    "inputs": [
        [
            {"role": "system", "content": "You are a Spanish teacher. keep your response short."},    
            {"role": "user", "content": scaffold_json},
        ]
    ],
}

In [None]:
payload2

In [None]:
%%time

scaffold_output = generate_response(ENDPOINT_NAME, payload2)

In [None]:
scaffold_output

In [None]:
scaffold_output = scaffold_output.split('\'assistant\', \'content\': "\'')[1].lstrip().split('\'"}]]}\n\n{\'inputs\'')[0]

In [None]:
re.search("\'outputs\': \"(.+)\"", scaffold_output).group(1)

### chain 3 - ask follwup question

In [None]:
# ¿Quieres ir de compras conmigo?

### Trying out response consistency (Aastha)

In [None]:
import re

parameters = {
    "max_new_tokens": 64,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
    "temperature": 0.001,
    "stop": ["<|endoftext|>", "</s>"]
}

# parameters = {
#     "max_new_tokens": 64,
# }

client = boto3.client("runtime.sagemaker")

def query_endpoint_with_json_payload(encoded_json, endpoint_name):
    response = client.invoke_endpoint(
        EndpointName=endpoint_name, ContentType="application/json", Body=encoded_json
    )
    return response

def parse_response(query_response):
    model_predictions = json.loads(query_response["Body"].read().decode('utf-8'))
    return model_predictions[0]["generated_text"]

def generate_response(endpoint_name, text):
    payload = {"inputs": f"{text}:", "parameters": parameters}
    query_response = query_endpoint_with_json_payload(
        json.dumps(payload).encode("utf-8"), endpoint_name=endpoint_name
    )
    generated_texts = parse_response(query_response)
    return generated_texts

def create_llm_input(system_content, user_content):
    return {
        "inputs": [[
            {"role": "system", "content": system_content},
            {"role": "user", "content": user_content},
        ]]
    }

def generate_chatbot_response(system_content, user_content):
    raw_response = generate_response(ENDPOINT_NAME, create_llm_input(system_content, user_content))
    print(raw_response)
    # regex = "\'outputs\': \\[\\[\\{\'role\': \'assistant\', \'content\': \'(.+)\'"
    regex = "\'role\': \'assistant\', \'content\': (.+)\\}"
    response = re.search(regex, raw_response)
    return response.group(1)

In [None]:
# Method 2 section
generate_chatbot_response(
    "You are a Spanish language teacher, and the user made mistakes. You respond with 'ahh, you mean,...' and repeat what the user said in the correct format. Don't further explain, and keep your response in one short sentence.",
    "Sí, tengo algo tiempo hoys."
)

In [None]:
# Sentence correction section
gec_output = generate_chatbot_response(
    "You are a Spanish teacher. Be nice.",
    "'Sí, tengo algo de tiempo hoys.' has grammetical error. Return the correction and nothing else:"
)
gec_output

In [None]:
# Scaffolding section
# scaffold_json = f"You respond with 'Veo. quieres decir '{gec_output}' in Spanish and nothing else:"
scaffold_json = f"You respond with 'Veo. quieres decir 'Sí, tiene algo de tiempo hoy.' in Spanish and nothing else:"

generate_chatbot_response(
    "You are a Spanish teacher. keep your response short.",
    scaffold_json
)

### try langchain

In [None]:
!pip install -q langchain

In [None]:
from langchain import LLMChain
from langchain import SagemakerEndpoint
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.llms.sagemaker_endpoint import LLMContentHandler

In [None]:
from typing import Dict

In [None]:
class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        input_str = json.dumps({prompt: prompt, **model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]["generated_text"]

In [None]:
# detect the GEC and output the correct response
prompt1 = PromptTemplate.from_template(
    "'{user_input}' has grammetical error. Return the correction and nothing else:"
)

# add scaffolding with 'ahh, you mean...'
prompt2 = PromptTemplate.from_template(
    "Return 'ahh, you mean {fixed_input}' and nothing else:"
)

# continue the conversation with a question
prompt3 = PromptTemplate.from_template(
    "keep it simple when rephrase {ask_question} and nothing else:"
)

In [None]:
model = LLMChain(
    llm=SagemakerEndpoint(
        endpoint_name=ENDPOINT_NAME,
        client=runtime,
        model_kwargs={
            "max_new_tokens": 128,
            "top_k": 50,
            "top_p": 0.8, 
            "do_sample": True,
            "temperature": 1e-10
        },
        content_handler=content_handler,
    )
)

In [None]:
chain = prompt1 | model

chain1 = prompt1 | model | StrOutputParser()

chain2 = (
    {"fixed_input": chain1}
    | prompt2
    | model
    | StrOutputParser()
)

#chain3 = (
#    {"scaffolding_input": chain2}
#    | prompt3
#    | model
#    | StrOutputParser()
#)

chain3 = prompt3 | model | StrOutputParser()

In [None]:
chain2.invoke({"user_input": "Esto bien, gracias."})

In [None]:
chat_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI bot. Your name is {name}."),
        ("human", "Hello, how are you doing?"),
        ("ai", "I'm doing well, thanks!"),
        ("human", "{user_input}"),
    ]
)

In [None]:
query = """'I name is Mon'"""

prompt_template = """{user_input}' has grammetical error. Return the correction and nothing else:"""

PROMPT = PromptTemplate(
    template=prompt_template, 
    input_variables=["inputs"]
)

In [None]:
print(PROMPT.format(user_input=query))

In [None]:
content_handler = ContentHandler()

chain = LLMChain(
    llm=SagemakerEndpoint(
        endpoint_name=ENDPOINT_NAME,
        client=runtime,
        model_kwargs={"max_new_tokens": 128, "top_p": 0.9, "temperature": 1e-10},
        content_handler=content_handler,
    ),
    prompt=PROMPT,
)

In [None]:
#chain({"question": query})
#chain({"inputs": query})
chain()

In [None]:
chain({"question": query}, return_only_outputs=True)

In [None]:
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

In [None]:
template = "{content}"

prompt = PromptTemplate.from_template(template)

In [None]:
content_handler = ContentHandler()

llm=SagemakerEndpoint(
     endpoint_name=ENDPOINT_NAME,
     client=runtime,
     model_kwargs={"max_new_tokens": 700, "top_p": 0.9, "temperature": 0.6},
     endpoint_kwargs={"CustomAttributes": 'accept_eula=true'},
     content_handler=content_handler
 )

In [None]:
llm_chain = LLMChain(
     llm=llm,
     prompt=prompt
 )

In [None]:
# {
#   "inputs": " ¡Hola! Bienvenido a la cafetería Brew Haven. ¿Qué quieres? "
#}
llm_chain.run({
    "inputs": "How can I travel from New York to Los Angeles?"
})

In [None]:
runtime = boto3.client('runtime.sagemaker')

In [None]:
%%time

payload = {
    "inputs": [
        [
            {"role": "system", "content": "You are a Spanish language teacher, and the user made mistakes. You respond with 'ahh, you mean,...' and repeat what the user said in the correct format. Don't further explain, and keep your response in one short sentence."},
            {"role": "user", "content": "Bien, gracias. ¿Y a tú"},
        ]
    ],
    "parameters": {"max_new_tokens": 64, "top_p": 0.9, "temperature": 0.6},
}

In [None]:
#response = predictor.predict(payload, custom_attributes="accept_eula=false")

response = runtime.invoke_endpoint(
    EndpointName = ENDPOINT,
    Body = json.dumps(payload),
    ContentType = "application/json",
    CustomAttributes = "accept_eula=true",
)

In [None]:
%%time

prediction = json.loads(response['Body'].read().decode('utf-8'))

In [None]:
prediction[0]['generated_text']

### old codes

### load model from S3 (does not work)

In [None]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data="s3://project-langbot-models/zephyr.tar.gz",  # path to your trained SageMaker model
   role=role,                                            # IAM role with permissions to create an endpoint
   transformers_version="4.28",                           # Transformers version used
   pytorch_version="2.0",                                # PyTorch version used
   py_version='py310',                                    # Python version used 
)

In [None]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data="s3://project-langbot-models/zephyr.tar.gz",  # path to your trained SageMaker model
   role=role,                                            # IAM role with permissions to create an endpoint
   transformers_version="4.26",                           # Transformers version used
   pytorch_version="1.13",                                # PyTorch version used
   py_version='py39',                                    # Python version used
)

In [None]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.m5.2xlarge",
    container_startup_health_check_timeout=300,
)

In [None]:
# example request: you always need to define "inputs"
data = {
   "inputs": "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days."
}

In [None]:
# send request
predictor.predict({
	"inputs": "My name is Julien and I like to",
})

In [None]:
messages = [
    {
        "role": "system",
        "content": "You are a Spanish language teacher, and the user made mistakes. You respond with 'ahh, you mean,...' and repeat what the user said in the correct format. Don't further explain, and keep your response in one short sentence.",
    },
    {
        "role": "user", 
        "content": "Bien, gracias. ¿Y a tú?"},
]