-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
144 additions
and
1 deletion.
There are no files selected for viewing
133 changes: 133 additions & 0 deletions
133
... Making LLMs Interact with the World Using Tools/3 Recreating the Bing Chatbot/chatbot.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
from langchain.prompts import PromptTemplate | ||
from langchain.chains import LLMChain | ||
from langchain.llms import OpenAI | ||
|
||
llm = OpenAI(temperature=0) | ||
|
||
template = """You are an assistant that answers the following question correctly and honestly: {question}\n\n""" | ||
prompt_template = PromptTemplate(input_variables=["question"], template=template) | ||
|
||
question_chain = LLMChain(llm=llm, prompt=prompt_template, verbose=True) | ||
|
||
response = question_chain.run("what is the latest fast and furious movie?") | ||
print(response) | ||
|
||
print ("----- GETTING SEARCH RESULTS -----") | ||
|
||
|
||
|
||
|
||
from langchain.tools import Tool | ||
from langchain.utilities import GoogleSearchAPIWrapper | ||
|
||
search = GoogleSearchAPIWrapper() | ||
TOP_N_RESULTS = 10 | ||
|
||
def top_n_results(query): | ||
return search.results(query, TOP_N_RESULTS) | ||
|
||
tool = Tool( | ||
name = "Google Search", | ||
description="Search Google for recent results.", | ||
func=top_n_results | ||
) | ||
|
||
query = "What is the latest fast and furious movie?" | ||
|
||
results = tool.run(query) | ||
|
||
for result in results: | ||
print(result["title"]) | ||
print(result["link"]) | ||
print(result["snippet"]) | ||
print("-"*50) | ||
|
||
|
||
print ("------- DOWNLOAD AND PARSE CONTENTS -----") | ||
|
||
|
||
import newspaper | ||
|
||
pages_content = [] | ||
|
||
for result in results: | ||
try: | ||
article = newspaper.Article(result["link"]) | ||
article.download() | ||
article.parse() | ||
if len(article.text) > 0: | ||
pages_content.append({ "url": result["link"], "text": article.text }) | ||
except: | ||
continue | ||
|
||
|
||
print ("----- PROCESS SEARCH RESULTS -----") | ||
|
||
|
||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from langchain.docstore.document import Document | ||
|
||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=100) | ||
|
||
docs = [] | ||
for d in pages_content: | ||
chunks = text_splitter.split_text(d["text"]) | ||
for chunk in chunks: | ||
new_doc = Document(page_content=chunk, metadata={ "source": d["url"] }) | ||
docs.append(new_doc) | ||
|
||
|
||
|
||
|
||
|
||
|
||
from langchain.embeddings import OpenAIEmbeddings | ||
|
||
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") | ||
|
||
docs_embeddings = embeddings.embed_documents([doc.page_content for doc in docs]) | ||
query_embedding = embeddings.embed_query(query) | ||
|
||
|
||
|
||
import numpy as np | ||
from sklearn.metrics.pairwise import cosine_similarity | ||
|
||
def get_top_k_indices(list_of_doc_vectors, query_vector, top_k): | ||
# convert the lists of vectors to numpy arrays | ||
list_of_doc_vectors = np.array(list_of_doc_vectors) | ||
query_vector = np.array(query_vector) | ||
|
||
# compute cosine similarities | ||
similarities = cosine_similarity(query_vector.reshape(1, -1), list_of_doc_vectors).flatten() | ||
|
||
# sort the vectors based on cosine similarity | ||
sorted_indices = np.argsort(similarities)[::-1] | ||
|
||
# retrieve the top K indices from the sorted list | ||
top_k_indices = sorted_indices[:top_k] | ||
|
||
return top_k_indices | ||
|
||
top_k = 2 | ||
best_indexes = get_top_k_indices(docs_embeddings, query_embedding, top_k) | ||
best_k_documents = [doc for i, doc in enumerate(docs) if i in best_indexes] | ||
|
||
|
||
print("---- CHAIN WITH SOURCE ---") | ||
|
||
from langchain.chains.qa_with_sources import load_qa_with_sources_chain | ||
from langchain.llms import OpenAI | ||
|
||
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff") | ||
|
||
response = chain({"input_documents": best_k_documents, "question": query}, return_only_outputs=True) | ||
|
||
response_text, response_sources = response["output_text"].split("SOURCES:") | ||
response_text = response_text.strip() | ||
response_sources = response_sources.strip() | ||
|
||
print(f"Answer: {response_text}") | ||
print(f"Sources: {response_sources}") | ||
|
||
|
File renamed without changes.
11 changes: 11 additions & 0 deletions
11
... with the World Using Tools/4 Integrating Multiple Tools for Web Based Question Answering
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from langchain import LLMChain, PromptTemplate | ||
from langchain.llms import OpenAI | ||
|
||
llm = OpenAI(temperature=0) | ||
|
||
template = """You are an assistant that answers the following question correctly and honestly: {question}\n\n""" | ||
prompt_template = PromptTemplate(input_variables=["question"], template=template) | ||
|
||
question_chain = LLMChain(llm=llm, prompt=prompt_template) | ||
|
||
question_chain.run("what is the latest fast and furious movie?") |
1 change: 0 additions & 1 deletion
1
... with the World Using Tools/4-Integrating Multiple Tools for Web Based Question Answering
This file was deleted.
Oops, something went wrong.