### Exploration of the LangChain Chatbot Output
This notebook compares the output on various queries to make sure the langchain model is functioning correctly and incorporating API context, not just returning information directly from openai.</br></br>
get_context() returns the direct response from a query's API call</br>
nps_chain.invoke() returns the lanchain openai based chatbot response with the response from a query's API as context</br>
chat_model.invoke() returns the openai chatbot based response with no context</br>

In [1]:
#IMPORTS
import sys
sys.path.insert(0,'../')
from environment import env
config = env.env()

OPENAI_API_KEY = config['gpt_api_key']
import os
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

from langchain_openai import ChatOpenAI
from langchain.schema.messages import HumanMessage, SystemMessage
from langchain.memory.buffer import ConversationBufferMemory
from langchain.prompts import (
    ChatPromptTemplate,
)

from model_functions.tfidf_model import *
from model_functions.get_context import *
from lang_chatbot import nps_chain

import nest_asyncio
import openai
openai.api_key = OPENAI_API_KEY
nest_asyncio.apply()

from langchain.chains import RetrievalQA
from ragas.metrics import answer_similarity
from ragas import evaluate
#from ragas.langchain.evalchain import RagasEvaluatorChain
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from datasets import Dataset


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/lauralyns/Documents/MADS/SIADS 699 -
[nltk_data]     Capstone/Capstone
[nltk_data]     VS/MADS_Capstone/.venv/lib/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/lauralyns/Documents/MADS/SIADS 699 -
[nltk_data]     Capstone/Capstone
[nltk_data]     VS/MADS_Capstone/.venv/lib/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/lauralyns/Documents/MADS/SIADS 699 -
[nltk_data]     Capstone/Capstone
[nltk_data]     VS/MADS_Capstone/.venv/lib/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
  from .autonotebook import tqdm as notebook_tqdm


In [16]:
print(.11*30145/117,
      ((1.09-.11)/1000)*30145,
      ((1.90-.11)/2000)*30145,
      ((1.90-1.09)/1000)*30145,
      ((2.73-1.90)/1000)*30145,
      ((3.55-2.73)/1000)*30145,
      ((3.55-1.09)/3000)*30145,
)

28.34145299145299 29.542100000000005 26.979774999999997 24.417449999999995 25.020350000000004 24.718899999999998 24.718899999999998


In [5]:
times = pd.read_pickle('../04_nps_evaluation/times.pkl')
print(len(times))
times.head()

21


Unnamed: 0,gpt,spacy,nltk,tfidf
1000,1.901767,0.009635,0.026306,0.014053
2000,1.904685,0.011351,0.025409,0.014472
3000,1.865997,0.010164,0.025863,0.013879
4000,1.856267,0.013058,0.025485,0.014104
5000,1.886181,0.007882,0.023694,0.012772


In [3]:
test_data = pd.read_pickle('../04_nps_evaluation/test_data.pkl')
print(len(test_data))
test_data.head()

22000


Unnamed: 0,query,intent,endpoint,parkcode,gpt_endpoint,gpt_parkcode,gpt_intent,spacy_endpoint,spacy_parkcode,spacy_intent,nltk_endpoint,nltk_parkcode,nltk_intent,tfidf_endpoint,tfidf_parkcode,tfidf_intent
27641,What are the fees for visiting Virgin Islands ...,feespass,feespasses,vicr,fees,vicr,feespass,parks,vicr,other,parks,vicr,other,feespasses,vicr,feespass
15777,What is available at Denali National Park & Pr...,amenities,amenities,dena,amenities,dena,amenities,parks,dena,other,parks,dena,other,amenities,dena,amenities
24454,What events are scheduled at Lower Delaware?,events,events,lode,events,lode,events,parks,none,other,parks,none,other,events,lode,events
6328,What is the address of Saint Paul's Church,address,parks,sapa,parks,sapa,address,parks,none,address,parks,none,address,parks,sapa,address
28771,What are the fees for visiting Grant-Kohrs Ranch,feespass,feespasses,grko,fees,grko,feespass,parks,none,other,parks,none,other,feespasses,grko,feespass


In [21]:
test_data['gpt_intent'].value_counts()

gpt_intent
alerts         802
amenities      764
events         654
feespass       589
description    371
address        351
fullname       252
state          216
fe               1
Name: count, dtype: int64

In [2]:
#VARIABLES
model = tfidf_model
chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0,api_key  = OPENAI_API_KEY)

In [34]:
question = "Describe Yosemite"
context = get_context(question, model)

def flatten(xs): 
    # Initialize list for this layer 
    flat_list = []
    for x in xs: 
        # If it's a list, recurse down and return the interior list
        if isinstance(x.values(), dict): 
            flat_list += flatten(x)
        # Otherwise, add to this layer's list
        else: 
            flat_list.append(x.values()) 
    return flat_list

flat_context = flatten(context)
flat_context
#context = 'Yosemite park is very nice'
description = context[0]['description']


In [6]:
question = "Describe Yosemite"
answer = nps_chain.invoke({"context": get_context(question, model), "question": question})
answer

'Yosemite National Park is not just a great valley, but a shrine to human foresight, the strength of granite, the power of glaciers, the persistence of life, and the tranquility of the High Sierra. First protected in 1864, Yosemite National Park is best known for its waterfalls, but within its nearly 1,200 square miles, you can find deep valleys, grand meadows, ancient giant sequoias, a vast wilderness area, and much more. It covers nearly 1,200 square miles in the Sierra Nevada, with elevations ranging from about 2,000 feet to 13,000 feet.'

In [16]:
messages = [
    SystemMessage(content="""You're an assistant knowledgeable about national parks. Only answer national park related questions."""),
    HumanMessage(content='What events are at Yosemite?'),]
alt_answer = chat_model.invoke(messages).content
alt_answer


'Yosemite National Park offers a variety of events throughout the year, including guided hikes, photography workshops, stargazing programs, art classes, and ranger-led talks. You can check the official Yosemite National Park website or contact the park directly for the most up-to-date information on events and activities.'

In [41]:
d1 = {
    "question": [question],
    "answer": [answer],
    "ground_truth": [description],
    #"contexts": [description]
}

d2 = {
    "question": [question],
    "answer": [alt_answer],
    "ground_truth": [description],
    #"contexts": [description]
}

In [42]:
dataset1 = Dataset.from_dict(d1)
dataset2 = Dataset.from_dict(d2)

#score = evaluate(dataset, metrics=[faithfulness, answer_relevancy, context_precision, context_recall, answer_similarity])
score1 = evaluate(dataset1, metrics=[answer_similarity])#, faithfulness])
score2 = evaluate(dataset2, metrics=[answer_similarity])#, faithfulness])

#score_df = score.to_pandas()

score1, score2
#score_df[['faithfulness','answer_relevancy', 'context_precision', 'context_recall',
#score_df[['answer_similarity']].mean(axis=0)
#score_df

Evaluating: 100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
Evaluating: 100%|██████████| 1/1 [00:00<00:00,  2.31it/s]


({'answer_similarity': 0.9848}, {'answer_similarity': 0.8453})

In [6]:
faithfulness_chain = RagasEvaluatorChain(metric=faithfulness)
answer_rel_chain = RagasEvaluatorChain(metric=answer_relevancy)
context_rel_chain = RagasEvaluatorChain(metric=context_precision)
context_recall_chain = RagasEvaluatorChain(metric=context_recall)

NameError: name 'RagasEvaluatorChain' is not defined

In [8]:
question = "Where is Yosemite"
get_context(question, model)

[{'id': '4324B2B4-D1A3-497F-8E6B-27171FAE4DB2',
  'url': 'https://www.nps.gov/yose/index.htm',
  'fullName': 'Yosemite National Park',
  'parkCode': 'yose',
  'description': 'Not just a great valley, but a shrine to human foresight, the strength of granite, the power of glaciers, the persistence of life, and the tranquility of the High Sierra. First protected in 1864, Yosemite National Park is best known for its waterfalls, but within its nearly 1,200 square miles, you can find deep valleys, grand meadows, ancient giant sequoias, a vast wilderness area, and much more.',
  'latitude': '37.84883288',
  'longitude': '-119.5571873',
  'latLong': 'lat:37.84883288, long:-119.5571873',
  'activities': [{'id': '09DF0950-D319-4557-A57E-04CD2F63FF42',
    'name': 'Arts and Culture'},
   {'id': '5F723BAD-7359-48FC-98FA-631592256E35', 'name': 'Auto and ATV'},
   {'id': '0B4A5320-216D-451A-9990-626E1D5ACE28', 'name': 'Scenic Driving'},
   {'id': '13A57703-BB1A-41A2-94B8-53B692EB7238', 'name': 'Astr

In [9]:
question = "Where is Yosemite"
nps_chain.invoke({"context": get_context(question, model), "question": question})

2024-07-23 13:30:16 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'Yosemite National Park is located in California.'

In [10]:
messages = [
    SystemMessage(content="""You're an assistant knowledgeable about national parks. Only answer national park related questions."""),
    HumanMessage(content="Where is Yosemite"),]
chat_model.invoke(messages)

2024-07-23 13:30:53 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content='Yosemite National Park is located in the Sierra Nevada mountains of California, USA.')

In [11]:
question = "What events are at Yosemite"
get_context(question, model)

[{'location': '',
  'updateuser': '',
  'contactname': '',
  'contacttelephonenumber': '',
  'recurrencedateend': '2024-08-20',
  'longitude': '',
  'datestart': '2024-07-23',
  'isrecurring': 'true',
  'datetimeupdated': '',
  'portalname': '',
  'types': ['Hike', 'Partner Program'],
  'createuser': '',
  'isfree': 'false',
  'contactemailaddress': '',
  'regresurl': 'https://www.travelyosemite.com/things-to-do/hiking/guided-group-hikes/',
  'description': '<p>Join the Yosemite Mountaineering School for a guided group hike in Yosemite Valley!</p>\n<p>Half-day and full-day group hikes are offered from May through October for hikers of all experience levels. Bring a small backpack with at least 2 liters of water (some situations require more), snacks, lunch, hat, sunglasses, sunscreen and extra layers.</p>\n<p>Check the website for details and to book your spot!</p>\n<p><em>All hikers must be at least 12 years old.</em></p>',
  'images': [{'path': '/common/uploads/event_calendar/C2AA717

In [12]:
question = "What events are at Yosemite"
nps_chain.invoke({"context": get_context(question, model), "question": question})

2024-07-23 13:31:19 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


"Here are some events happening at Yosemite National Park:\n\n1. Adventure Hike: El Capitan Loop (Yosemite Valley)\n2. Adventure Hike: Nevada Fall (Yosemite Valley)\n3. Ansel Adams' Legacy and Your Digital Camera (Yosemite Valley)\n4. Art Class (Yosemite Valley)\n5. Campfire Program (Tuolumne Meadows)\n6. Curry Village Historic Tour (Yosemite Valley)\n7. Discovery Walk (Yosemite Valley)\n8. Explore Yosemite's Night Sky (Yosemite Valley)\n9. Forest Art Walk | Yosemite Conservancy\n10. Glacier Point Starry Skies\n\nThese events include hikes, art classes, campfire programs, guided tours, and stargazing experiences."

In [None]:
messages = [
    SystemMessage(content="""You're an assistant knowledgeable about national parks. Only answer national park related questions."""),
    HumanMessage(content="What events are at Yosemite"),]
chat_model.invoke(messages)

In [14]:
question = "What is address of everglades"
get_context(question, model)

[{'id': '5EA02193-276A-4037-B7DB-5765A56935FD',
  'url': 'https://www.nps.gov/ever/index.htm',
  'fullName': 'Everglades National Park',
  'parkCode': 'ever',
  'description': 'Everglades National Park protects an unparalleled landscape that provides important habitat for numerous rare and endangered species like the manatee, American crocodile, and the elusive Florida panther. An international treasure as well - a World Heritage Site, International Biosphere Reserve, a Wetland of International Importance, and a specially protected area under the Cartagena Treaty.',
  'latitude': '25.37294225',
  'longitude': '-80.88200301',
  'latLong': 'lat:25.37294225, long:-80.88200301',
  'activities': [{'id': '5F723BAD-7359-48FC-98FA-631592256E35',
    'name': 'Auto and ATV'},
   {'id': '0B4A5320-216D-451A-9990-626E1D5ACE28', 'name': 'Scenic Driving'},
   {'id': '13A57703-BB1A-41A2-94B8-53B692EB7238', 'name': 'Astronomy'},
   {'id': 'D37A0003-8317-4F04-8FB0-4CF0A272E195', 'name': 'Stargazing'},
 

In [16]:
question = "What is address of everglades"
nps_chain.invoke({"context": get_context(question, model), "question": question})

2024-07-23 13:34:46 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'The address of Everglades National Park is 40001 State Road 9336, Homestead, FL 33034.'

In [15]:
messages = [
    SystemMessage(content="""You're an assistant knowledgeable about national parks. Only answer national park related questions."""),
    HumanMessage(content="What is address of everglades"),]
chat_model.invoke(messages)

2024-07-23 13:34:30 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content='The Everglades National Park is located in Florida, and the main entrance address is 40001 State Road 9336, Homestead, FL 33034.')

In [19]:
question ='What alerts are at bryce?'
get_context(question, model)

[{'id': '9840308A-02FB-4A2F-90A3-E5597EB3881B',
  'url': 'https://www.nps.gov/brca/planyourvisit/conditions.htm',
  'title': 'Main Road Status',
  'parkCode': 'brca',
  'description': 'The main park road is fully open to Rainbow Point (Mile 18 of 18). All park roads are currently open for the season. During snowstorms the road may temporarily close at Mile 3 for snowplow operations.',
  'category': 'Information',
  'relatedRoadEvents': [],
  'lastIndexedDate': '2024-04-02 13:59:18.0'},
 {'id': '8833A24A-3F71-4E6E-9254-FF7743304115',
  'url': 'https://www.nps.gov/brca/planyourvisit/basicinfo.htm',
  'title': 'Bryce Canyon is Open - No Reservations Required to Enter',
  'parkCode': 'brca',
  'description': 'No reservations are required to enter Bryce Canyon National Park at any time of year. Simply pay your park entrance fee or present your America the Beautiful pass upon arrival.',
  'category': 'Information',
  'relatedRoadEvents': [],
  'lastIndexedDate': '2024-02-16 15:58:11.0'}]

In [20]:
question = 'What alerts are at bryce?'
nps_chain.invoke({"context": get_context(question, model), "question": question})

2024-07-23 13:44:28 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'At Bryce Canyon National Park, there are currently no alerts or road closures. The main park road is fully open to Rainbow Point (Mile 18 of 18), and all park roads are currently open for the season. However, during snowstorms, the road may temporarily close at Mile 3 for snowplow operations. You can check the latest road conditions at [Main Road Status at Bryce Canyon National Park](https://www.nps.gov/brca/planyourvisit/conditions.htm).'

In [21]:
messages = [
    SystemMessage(content="""You're an assistant knowledgeable about national parks. Only answer national park related questions."""),
    HumanMessage(content='What alerts are at bryce?'),]
chat_model.invoke(messages)

2024-07-23 13:44:43 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content="At Bryce Canyon National Park, visitors should be aware of alerts related to weather conditions, trail closures, wildlife encounters, and park regulations. It's always a good idea to check the park's official website or contact the visitor center for the most up-to-date information on any alerts or advisories in the park.")

In [16]:
nps_template_str = """You're an assistant knowledgeable about national parks. Only answer national park related questions. Use the following context to anser these questions.

{context}

{question}
"""

In [22]:
nps_template = ChatPromptTemplate.from_template(nps_template_str)
question = 'Describe Yosemite'
context = get_context(question, model)

In [23]:
nps_template.format(context = context, question = question)

'Human: You\'re an assistant knowledgeable about national parks. Only answer national park related questions. Use the following context to anser these questions.\n\n[{\'id\': \'4324B2B4-D1A3-497F-8E6B-27171FAE4DB2\', \'url\': \'https://www.nps.gov/yose/index.htm\', \'fullName\': \'Yosemite National Park\', \'parkCode\': \'yose\', \'description\': \'Not just a great valley, but a shrine to human foresight, the strength of granite, the power of glaciers, the persistence of life, and the tranquility of the High Sierra. First protected in 1864, Yosemite National Park is best known for its waterfalls, but within its nearly 1,200 square miles, you can find deep valleys, grand meadows, ancient giant sequoias, a vast wilderness area, and much more.\', \'latitude\': \'37.84883288\', \'longitude\': \'-119.5571873\', \'latLong\': \'lat:37.84883288, long:-119.5571873\', \'activities\': [{\'id\': \'09DF0950-D319-4557-A57E-04CD2F63FF42\', \'name\': \'Arts and Culture\'}, {\'id\': \'5F723BAD-7359-48F