Skip to content

Commit

Permalink
bing chatbot
Browse files Browse the repository at this point in the history
  • Loading branch information
manufy committed Jun 8, 2024
1 parent 9ec7823 commit 6c5f2a1
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import OpenAI

llm = OpenAI(temperature=0)

template = """You are an assistant that answers the following question correctly and honestly: {question}\n\n"""
prompt_template = PromptTemplate(input_variables=["question"], template=template)

question_chain = LLMChain(llm=llm, prompt=prompt_template, verbose=True)

response = question_chain.run("what is the latest fast and furious movie?")
print(response)

print ("----- GETTING SEARCH RESULTS -----")




from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper

search = GoogleSearchAPIWrapper()
TOP_N_RESULTS = 10

def top_n_results(query):
return search.results(query, TOP_N_RESULTS)

tool = Tool(
name = "Google Search",
description="Search Google for recent results.",
func=top_n_results
)

query = "What is the latest fast and furious movie?"

results = tool.run(query)

for result in results:
print(result["title"])
print(result["link"])
print(result["snippet"])
print("-"*50)


print ("------- DOWNLOAD AND PARSE CONTENTS -----")


import newspaper

pages_content = []

for result in results:
try:
article = newspaper.Article(result["link"])
article.download()
article.parse()
if len(article.text) > 0:
pages_content.append({ "url": result["link"], "text": article.text })
except:
continue


print ("----- PROCESS SEARCH RESULTS -----")


from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=100)

docs = []
for d in pages_content:
chunks = text_splitter.split_text(d["text"])
for chunk in chunks:
new_doc = Document(page_content=chunk, metadata={ "source": d["url"] })
docs.append(new_doc)






from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

docs_embeddings = embeddings.embed_documents([doc.page_content for doc in docs])
query_embedding = embeddings.embed_query(query)



import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_top_k_indices(list_of_doc_vectors, query_vector, top_k):
# convert the lists of vectors to numpy arrays
list_of_doc_vectors = np.array(list_of_doc_vectors)
query_vector = np.array(query_vector)

# compute cosine similarities
similarities = cosine_similarity(query_vector.reshape(1, -1), list_of_doc_vectors).flatten()

# sort the vectors based on cosine similarity
sorted_indices = np.argsort(similarities)[::-1]

# retrieve the top K indices from the sorted list
top_k_indices = sorted_indices[:top_k]

return top_k_indices

top_k = 2
best_indexes = get_top_k_indices(docs_embeddings, query_embedding, top_k)
best_k_documents = [doc for i, doc in enumerate(docs) if i in best_indexes]


print("---- CHAIN WITH SOURCE ---")

from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")

response = chain({"input_documents": best_k_documents, "question": query}, return_only_outputs=True)

response_text, response_sources = response["output_text"].split("SOURCES:")
response_text = response_text.strip()
response_sources = response_sources.strip()

print(f"Answer: {response_text}")
print(f"Sources: {response_sources}")


Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from langchain import LLMChain, PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(temperature=0)

template = """You are an assistant that answers the following question correctly and honestly: {question}\n\n"""
prompt_template = PromptTemplate(input_variables=["question"], template=template)

question_chain = LLMChain(llm=llm, prompt=prompt_template)

question_chain.run("what is the latest fast and furious movie?")

This file was deleted.

0 comments on commit 6c5f2a1

Please sign in to comment.