In [25]:
import requests
import pandas as pd
import time
import re

In [2]:
from dotenv import load_dotenv
import os
token = os.getenv('HUGGINGFACE_TOKEN')

In [3]:
df = pd.read_csv('golden answers.csv', encoding='latin-1')
prompt_list = df['Golden Question'].to_list()
df.head()

Unnamed: 0,Index,Notes,Golden Question,Golden Answer
0,1,"Reddit, ChatGPT, human reviewed",What are your favorite luxury camp meals for a...,It sounds like a fantastic trip you have plann...
1,2,"Reddit, ChatGPT, human reviewed",I'm taking a solo trip with my dog around Than...,It sounds like a fantastic trip you have plann...
2,3,"Reddit, ChatGPT, human reviewed",I'm planning a 3-4 week family road trip from ...,Embarking on a cross-Canada road trip with you...
3,4,"Reddit, ChatGPT, human reviewed",What are the main differences between visiting...,If you're choosing between Seoul and Tokyo for...
4,5,"Reddit, ChatGPT, human reviewed",Seeking destination and activity suggestions f...,Planning a short solo trip for your birthday i...


In [4]:
df.shape

(37, 4)

# Inference Endpoint Method Deployed on AWS

In [5]:
headers = {
    'Accept': 'application/json',
    'Authorization': f'Bearer hf_{token}',
    'Content-Type': 'application/json'
}

In [6]:
model_dict = {'pretrained Llama':"https://elmtyqbmlx704v13.us-east-1.aws.endpoints.huggingface.cloud", #https://huggingface.co/meta-llama/Llama-2-7b-hf
              'pretrained Mistral':"https://b15auwvx0xu3uymo.us-east-1.aws.endpoints.huggingface.cloud", #https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
              'Llama QLORA':'https://hoeh5xdq1cy2pg01.us-east-1.aws.endpoints.huggingface.cloud', #https://huggingface.co/beraht/llama-2-7b_qlora_falcon_417
              'Llama RAFT':'https://qazfnvfi7y7n1vok.us-east-1.aws.endpoints.huggingface.cloud', #https://huggingface.co/beraht/Llama2_Falcon_RAFT_50e_10s/tree/main
              'Mistral QLORA':"https://i32y3wwlqdt9257k.us-east-1.aws.endpoints.huggingface.cloud", #https://huggingface.co/sherrys/mistral-2-7b_qlora_falcon_426/tree/main
              'Mistral RAFT':'https://ddhgi892zzbiynte.us-east-1.aws.endpoints.huggingface.cloud', #https://huggingface.co/sherrys/426_mistral_RAFT_50e_10s
              'RLHF Test':'https://reyil3szz2jf1p19.us-east-1.aws.endpoints.huggingface.cloud' #https://huggingface.co/chriztopherton/Wanderchat_Mistral_RAFT_RLHF
              }

In [7]:
# current_model = 'Llama QLORA'
# current_model = 'pretrained Llama'
# current_model = 'pretrained Mistral'
# current_model = 'Llama RAFT'
# current_model = 'Mistral QLORA'
# current_model = 'Mistral RAFT'
current_model = 'RLHF Test'

In [8]:
API_URL = model_dict[current_model]

def query(payload):
  response = requests.post(API_URL, headers=headers, json=payload)
  return response.json()

In [9]:
custom_prompt = '''Answer the question as if you are a travel agent and your goal is to provide excellent customer service and to provide
        personalized travel recommendations with reasonings based on their question. Do not repeat yourself or include any links or HTML.'''

In [65]:
answers = []
answer_time = []

for question in prompt_list:
    prompt = f"""{custom_prompt}
        Question: {question}"""
    input_len = len(prompt.split())
    max_token_len = 1500-input_len-100 #100 buffer

    start_time = time.time()
    # answer = query({'inputs': f"<s>[INST] {prompt} [/INST]",
    #                 'parameters': {"max_new_tokens": max_token_len}})
    while True: #while loop for token
        answer = query({'inputs': f"<s>[INST] {prompt} [/INST]",
                    'parameters': {"max_new_tokens": max_token_len}})
        if 'error' not in answer:
            break  #exit the while loop if there is no error
        max_token_len -= 100 #reduce by 100 in while loop
        print(f"Failed to process prompt with token length: {max_token_len}")
        if max_token_len <= 0:
            break
    end_time = time.time()
    duration = end_time - start_time
    
    answer = answer[0]['generated_text'].replace(f"<s>[INST] {prompt} [/INST]","")
    answer = answer.replace(" . ",". ").strip()
    answer = re.sub(r'<ANSWER>.*$', '', answer, flags=re.DOTALL) #RAFT specific
    answer = re.sub(r'Final answer: .*$', '', answer, flags=re.DOTALL) #RAFT specific
    answers.append(answer)
    answer_time.append(duration)
    print(question)  
    print('\n')
    print(duration)
    print(answer)
    print('\n-----------------------------\n')

Failed to process prompt with token length: 1232
Failed to process prompt with token length: 1132
Failed to process prompt with token length: 1032
Failed to process prompt with token length: 932
Failed to process prompt with token length: 832
Failed to process prompt with token length: 732
Failed to process prompt with token length: 632
Failed to process prompt with token length: 532
Failed to process prompt with token length: 432
Failed to process prompt with token length: 332
Failed to process prompt with token length: 232
Failed to process prompt with token length: 132
Failed to process prompt with token length: 32
Failed to process prompt with token length: -68


KeyError: 0

In [95]:
df[f'{current_model} Output'] = answers
df[f'{current_model} Output Time'] = answer_time
df.to_csv(f'log/{current_model} golden answers output.csv')

In [96]:
df.columns

Index(['Index', 'Notes', 'Golden Question', 'Golden Answer',
       'Llama QLORA Output', 'Llama QLORA Output Time',
       'pretrained Mistral Output', 'pretrained Mistral Output Time',
       'Mistral QLORA Output', 'Mistral QLORA Output Time',
       'Llama RAFT Output', 'Llama RAFT Output Time', 'Mistral RAFT Output',
       'Mistral RAFT Output Time', 'pretrained Llama Output',
       'pretrained Llama Output Time'],
      dtype='object')

In [99]:
import re

def remove_repeating_substrings(s):
    try:
        # This pattern matches any sequence of characters that is followed by itself
        pattern = re.compile(r"(?P<repeat>.+)(?P=repeat)")

        # Keep replacing until no more replacements can be made
        previous_s = None
        while previous_s != s:
            previous_s = s
            s = re.sub(pattern, r"\1", s)

        return s
    except TypeError:  # Handles the case when 's' is not a string
        return s

In [100]:
df = df.applymap(remove_repeating_substrings)

  df = df.applymap(remove_repeating_substrings)


In [104]:
def remove_unfinished_sentences(text):
    try:
        if isinstance(text, str):
            # Find the last occurrence of a period
            last_period_index = text.rfind('.')
        # Return the substring from the beginning to the last period (if a period was found)
            return text[:last_period_index + 1] if last_period_index != -1 else text
        else:
            return text
    except TypeError:
        return text

df = df.applymap(remove_unfinished_sentences)

  df = df.applymap(remove_unfinished_sentences)


In [105]:
df.to_csv(f'log/all cleaned golden answers output.csv')

# Israel Example

In [10]:
from pinecone import Pinecone
# connect to index
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY") )
index_name = 'wanderchat-israel-rag'
index = pc.Index(index_name)

In [11]:
from langchain.embeddings.openai import OpenAIEmbeddings
OAI_embed_model = OpenAIEmbeddings(model="text-embedding-ada-002")

  warn_deprecated(


In [12]:
from langchain.vectorstores import Pinecone

text_field = "text"  # the metadata field that contains our text
# initialize the vector store object
vectorstore = Pinecone(
    index, OAI_embed_model.embed_query, text_field
)

  warn_deprecated(


In [13]:
custom_prompt = '''Answer the question as if you are a travel agent and your goal is to provide excellent customer service and to provide
        personalized travel recommendations with reasonings based on their question. Do not repeat yourself or include any links or HTML.'''

In [14]:
question=prompt_list[20]
question

'My college roommate is getting married in Israel next month, and I would like to attend her wedding, but Is it safe to visit Israel right now?'

In [15]:
results = vectorstore.similarity_search(question, k=3)
    # get the text from the results
source_knowledge = "\n".join([x.page_content for x in results])
source_knowledge



In [16]:
prompt = f"""{custom_prompt}
    Question: {question}
    Context: {source_knowledge}"""
prompt



In [22]:
input_len = len(prompt.split())
max_token_len = 1500-input_len-200 #100 buffer
max_token_len

933

In [26]:
answer = query({'inputs': f"<s>[INST] {prompt} [/INST]",
                    'parameters': {"max_new_tokens": max_token_len}})
answer = answer[0]['generated_text'].replace(f"<s>[INST] {prompt} [/INST]","")
answer = answer.replace(" . ",". ").strip()
answer = re.sub(r'<ANSWER>.*$', '', answer, flags=re.DOTALL) #RAFT specific
answer = re.sub(r'Final answer: .*$', '', answer, flags=re.DOTALL) #RAFT specific
answer

"Based on the context provided, I would recommend that you consider the following factors when deciding whether to attend your college roommate's wedding in Israel:\n\n1. Travel advisories: The U.S. Department of State has issued a travel advisory for Israel, the West Bank, and Gaza, advising U.S. citizens to exercise increased caution due to the potential for terrorism and civil unrest. This advisory also restricts personal travel to certain areas near borders with Lebanon, Syria, and Gaza.\n2. Safety concerns: The context mentions that there is an increased risk in some areas of Israel, the West Bank, and Gaza, and that travelers should maintain a high degree of situational awareness and exercise caution at all times. This includes avoiding demonstrations and crowds, following instructions from security and emergency response officials, and reporting suspicious activities to local police.\n3. Travel restrictions: The context also mentions that some areas are currently restricted to U

In [28]:
print(answer)

Based on the context provided, I would recommend that you consider the following factors when deciding whether to attend your college roommate's wedding in Israel:

1. Travel advisories: The U.S. Department of State has issued a travel advisory for Israel, the West Bank, and Gaza, advising U.S. citizens to exercise increased caution due to the potential for terrorism and civil unrest. This advisory also restricts personal travel to certain areas near borders with Lebanon, Syria, and Gaza.
2. Safety concerns: The context mentions that there is an increased risk in some areas of Israel, the West Bank, and Gaza, and that travelers should maintain a high degree of situational awareness and exercise caution at all times. This includes avoiding demonstrations and crowds, following instructions from security and emergency response officials, and reporting suspicious activities to local police.
3. Travel restrictions: The context also mentions that some areas are currently restricted to U.S. g