In [1]:
# Using Langchain to upload documentation to a Pinecone index:

# Initial Imports
import os
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
import openai
import pinecone
from langchain.vectorstores import Pinecone


# Load the .env file
load_dotenv()

openai.api_key = os.getenv("OPENAI_KEY2")
openai.organization = os.getenv("OPENAI_ORG2")

pinecone_key = os.getenv("PINECONE_KEY")
pinecone_env = os.getenv("PINECONE_ENV")

  from tqdm.autonotebook import tqdm


In [55]:
from anthropic import Anthropic
anthropic = Anthropic(api_key=os.getenv("ANTHROPIC_KEY"))

In [2]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("first_rule3.pdf")
pages = loader.load_and_split()

In [7]:
from pprint import pprint

pprint(pages[1].page_content)

('First Rule\n'
 '1\n'
 'CONFIDENTIAL - DO NOT DISSEMINATE. This business plan contains confidential, '
 'trade-secret \n'
 'information and is shared only with the understanding that you will not '
 'share its contents or ideas with \n'
 'third parties without the express written consent of the plan '
 'author.Executive Summary\n'
 'Opportunity\n'
 'Problem\n'
 'Vocal technology is advancing rapidly, but protections for artists have '
 'lagged \n'
 'behind. There are currently few regulations preventing unauthorized vocal \n'
 "replication or ensuring fair compensation when an artist's voiceprint is "
 'used. First \n'
 'Rule seeks to empower singers by giving them full control over if, when, and '
 'how \n'
 'their voice is used. More specifically, control over consent, credit, '
 'compensation, \n'
 'creation, and collaboration.\n'
 'Our platform solves three key problems facing artists today:\n'
 '1.Deepfake Risks: Artificial intelligence can now create synthetic media, \n'
 'inclu

In [3]:
import tiktoken

tokenizer = tiktoken.get_encoding('cl100k_base')

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

tiktoken_len("hello I am a chunk of text and using the tiktoken_len function "
             "we can find the length of this chunk of text in tokens")

26

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
    length_function=tiktoken_len,
    separators=["\n\n", "\n", " ", ""]
)

NameError: name 'tiktoken_len' is not defined

In [74]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("first_rule.pdf")
pages = loader.load_and_split(text_splitter=text_splitter)

In [3]:
for page in pages:
    print(page)

page_content='CONFIDENTIAL\nRULE 1\nYour Voice, Your Signature, Your Legacy\nBusiness Plan\nPrepared October 2023\nContact Information\nJoel Kaiser\nvocalockr@gmail.com\n5714656108\nwww.vocalockr.com' metadata={'source': 'first_rule3.pdf', 'page': 0}
page_content="First Rule\n1\nCONFIDENTIAL - DO NOT DISSEMINATE. This business plan contains confidential, trade-secret \ninformation and is shared only with the understanding that you will not share its contents or ideas with \nthird parties without the express written consent of the plan author.Executive Summary\nOpportunity\nProblem\nVocal technology is advancing rapidly, but protections for artists have lagged \nbehind. There are currently few regulations preventing unauthorized vocal \nreplication or ensuring fair compensation when an artist's voiceprint is used. First \nRule seeks to empower singers by giving them full control over if, when, and how \ntheir voice is used. More specifically, control over consent, credit, compensation, 

In [4]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key = openai.api_key
)

In [5]:
docs=pages

In [9]:
index = Pinecone.from_documents(docs, embeddings, index_name="bplan")

In [8]:
pinecone.init(api_key = pinecone_key, environment=pinecone_env) # Initialize pinecone
index = pinecone.Index("bplan") # Get the index

In [309]:
from git import Repo

repo = Repo.clone_from(
    "https://github.com/tiangolo/fastapi", to_path="./example_data/test_repo1"
)
branch = repo.head.reference

In [262]:
from langchain.document_loaders import TextLoader
loader = TextLoader("openai.yaml")

In [221]:
from langchain.document_loaders import PythonLoader

In [326]:
from langchain.document_loaders import DirectoryLoader
# Initialize the document loader.  Use the glob
loader = DirectoryLoader('../../bakespace_fastapi/', loader_cls=PythonLoader, glob='**/*.py')

In [6]:
for doc in docs:
    print(doc.page_content)

 
 
BakeSpace Partner API Version 1.40  
 
This document will outline the query structure for the BakeSpace partner API. The purpose is 
so that partners can retrieve bakespace recipe information for their display purposes.  
 
The basic structure of a query is:  
 
http://bakespace.com/api/query.php?pcode=<partner_code>&type=<type>&limit=<number>  
 
where partner_code  is an alphanumeric string the partner is assigned by BakeSpace, type is 
the query type, and limit is how many results are to be returned.  
 
Reply content type is XML and a typical replay structure is as follows:  
 
<results>  
   <count></count>  
   <item> 
      <recipeid></recipeid>  
      <name></name>  
      <author></author>  
      <chosendate></chosendate>  
      <foodimg></foodimg>  
      <thumbnailimg></thumbn ailimg> 
      <fullimg></fullimg>  
      <desc></desc>  
      <preptime></preptime>  
      <cooktime></cooktime>  
      <servings></servings>  
      <directions></directions>  
      <ingr

In [12]:
pinecone.init(api_key = pinecone_key, environment=pinecone_env) # Initialize pinecone

In [8]:
from langchain.embeddings.openai import OpenAIEmbeddings

embed = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    openai_api_key = openai.api_key,
    disallowed_special=()
)


In [28]:
# Create a chroma db from the documents
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(docs, embed)

NameError: name 'embed' is not defined

In [25]:
# For each doc in docs, create a list of tuples, where each tuple is doc.page_content, doc.metadata['source']
docs_list = [(doc.page_content, doc.metadata['source']) for doc in docs]

In [26]:
# Determine the token length of each doc
docs_token_length = [tiktoken_len(doc[0]) for doc in docs_list]

# Sum the token lengths
sum(docs_token_length)

18601

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: Internal server error {
    "error": {
        "message": "Internal server error",
        "type": "auth_subrequest_error",
        "param": null,
        "code": "internal_error"
    }
}
 500 {'error': {'message': 'Internal server error', 'type': 'auth_subrequest_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Fri, 20 Oct 2023 04:09:28 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '166', 'Connection': 'keep-alive', 'vary': 'Origin', 'x-request-id': '3ca05cda57b9b205c0e2bce41fdc7ce0', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'CF-Cache-Status': 'DYNAMIC', 'Server': 'cloudflare', 'CF-RAY': '818e67aeda6af7f4-BNA', 'alt-svc': 'h3=":443"; ma=86400'}.
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: Internal server error {
    "error": {
 

KeyboardInterrupt: 



In [90]:
print(compressed_docs)

[Document(page_content='We expect profitability to correlate with platform adoption, backing our brand \nmessaging of being the new industry standard, and development of native and \ndownstream marketing opportunities. Conservatively, we forecast running at a loss \nfor 18–24 months, but with an aim to generate revenue as early as December of \n2023.', metadata={'page': 24.0, 'source': 'first_rule.pdf'}), Document(page_content='$16,783,471', metadata={'page': 30.0, 'source': 'first_rule.pdf'}), Document(page_content='Total Revenue $1,750 $9,000 $11,000 $17,350 $26,278 $44,222 $112,907 $144,351 $190,077 $299,727 $386,568 $723,746', metadata={'page': 40.0, 'source': 'first_rule.pdf'}), Document(page_content='$16,783,471', metadata={'page': 43.0, 'source': 'first_rule.pdf'})]


In [59]:
for doc in compressed_docs:
    print(doc.page_content)

Revenue $1,966,976 $16,783,471 $45,155,399 $122,734,165
The relevant part of the context is:

2023 2024 2025 2026 2027
Total Revenue $1,966,976 $16,783,471 $45,155,399 $122,734,165

This table contains the company's revenue targets for 2024 and other years.


In [50]:
query = "What are the company's revenue targets in 2024?"

# Similarity search
results = index.similarity_search(query, k=3)

In [65]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers.document_compressors import EmbeddingsFilter

embeddings = OpenAIEmbeddings()
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents("What are the company's revenue targets in 2024?")


Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: Internal server error {
    "error": {
        "message": "Internal server error",
        "type": "auth_subrequest_error",
        "param": null,
        "code": "internal_error"
    }
}
 500 {'error': {'message': 'Internal server error', 'type': 'auth_subrequest_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Fri, 20 Oct 2023 04:12:37 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '166', 'Connection': 'keep-alive', 'vary': 'Origin', 'x-request-id': '228060595a56312e92070715d2777763', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'CF-Cache-Status': 'DYNAMIC', 'Server': 'cloudflare', 'CF-RAY': '818e6c4dad9ef7d0-BNA', 'alt-svc': 'h3=":443"; ma=86400'}.


KeyboardInterrupt: 

In [53]:
for result in results:
    print(result.page_content)


First Rule
24
CONFIDENTIAL - DO NOT DISSEMINATE. This business plan contains confidential, trade-secret 
information and is shared only with the understanding that you will not share its contents or ideas with 
third parties without the express written consent of the plan author.• Staffing: Engineers, support, executives, and marketers will make up a 
substantial part of operating expenses.
• Acquisitions: The purchases of Controlla.XYZ, Emvoice, and Suno.AI should 
be viewed as key strategic investments.
Profit Expectations
We expect profitability to correlate with platform adoption, backing our brand 
messaging of being the new industry standard, and development of native and 
downstream marketing opportunities. Conservatively, we forecast running at a loss 
for 18–24 months, but with an aim to generate revenue as early as December of 
2023.
In summary, First Rule's projections indicate an innovative, opportunity-rich 
venture poised for strategic growth. Aligning with industry trend

In [68]:
from langchain.chat_models import ChatAnthropic
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

llm = ChatAnthropic(temperature=0, model="claude-2", anthropic_api_key=os.getenv("ANTHROPIC_KEY"))

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents("What are the company's revenue targets in 2024?")





In [70]:
index.similarity_search("What are the company's revenue targets in 2024?", k=3)


[Document(page_content="First Rule\n24\nCONFIDENTIAL - DO NOT DISSEMINATE. This business plan contains confidential, trade-secret \ninformation and is shared only with the understanding that you will not share its contents or ideas with \nthird parties without the express written consent of the plan author.• Staffing: Engineers, support, executives, and marketers will make up a \nsubstantial part of operating expenses.\n• Acquisitions: The purchases of Controlla.XYZ, Emvoice, and Suno.AI should \nbe viewed as key strategic investments.\nProfit Expectations\nWe expect profitability to correlate with platform adoption, backing our brand \nmessaging of being the new industry standard, and development of native and \ndownstream marketing opportunities. Conservatively, we forecast running at a loss \nfor 18–24 months, but with an aim to generate revenue as early as December of \n2023.\nIn summary, First Rule's projections indicate an innovative, opportunity-rich \nventure poised for strateg

In [None]:
for doc 

In [67]:
for doc in compressed_docs:
    print(doc.page_content)

The given context does not contain any information relevant to answering the question about what the president said regarding Ketanji Brown Jackson.
No relevant information is found in the given context to answer the question about what the president said regarding Ketanji Brown Jackson.


In [17]:
for text in context:
    print(text.page_content)

text_list = [text.page_content for text in context]
for text in text_list:
    print(tiktoken_len(text))

 
 
BakeSpace Partner API Version 1.40  
 
This document will outline the query structure for the BakeSpace partner API. The purpose is 
so that partners can retrieve bakespace recipe information for their display purposes.  
 
The basic structure of a query is:  
 
http://bakespace.com/api/query.php?pcode=<partner_code>&type=<type>&limit=<number>  
 
where partner_code  is an alphanumeric string the partner is assigned by BakeSpace, type is 
the query type, and limit is how many results are to be returned.  
 
Reply content type is XML and a typical replay structure is as follows:  
 
<results>  
   <count></count>  
   <item> 
      <recipeid></recipeid>  
      <name></name>  
      <author></author>  
      <chosendate></chosendate>  
      <foodimg></foodimg>  
      <thumbnailimg></thumbn ailimg> 
      <fullimg></fullimg>  
      <desc></desc>  
      <preptime></preptime>  
      <cooktime></cooktime>  
      <servings></servings>  
      <directions></directions>  
      <ingr

In [19]:
import openai
from dotenv import load_dotenv
import os

load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")
openai.organization = os.getenv("OPENAI_ORG")

def get_response(query):
    context = vectorstore.similarity_search(query, k=4)
    text_list = [text.page_content for text in context]
    messages = [
        {"role" : "system", "content" : f"""You are a master programmer helping the user
         craft API calls using the python requests library to the Bakespace API.  Bakespace
         is a website for sharing recipes.  Reference the following context {text_list}
         from the Bakespace API documentation to help the user craft their API call."""},
        {"role" : "user", "content" : query}
    ]
    models = ["gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"]
    for model in models:
        try:
            response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            temperature=0.9,
            max_tokens=350,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0.6,
            )
            answer = response.choices[0].message.content

            return answer
        except:
            continue


    

In [13]:
import requests

url = "https://www.bakespace.com/feeds/api_v2/latestRecipes"
params = {
    "limit": 10,  # Optional parameter to limit the number of recipes returned
    "startrow": 0  # Optional parameter to specify the starting row
}

response = requests.get(url, params=params)

if response.status_code == 200:
    recipes = response.text
    # Do something with the recipes
else:
    print("Error:", response.status_code)

In [15]:
print(recipes)


<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
    <meta name="description" content="Explore 70,000+ recipes & indie cookbooks created by our community of home cooks. Join to share recipes & make your own cookbook.">
    <meta name="author" content="">
    <meta name="google-site-verification" content="qwPQWlO-5B13In_kLFS7e7YB8yaUmwzqqTRr-pKmU_U" />
    <link rel="icon" href="/favicon.ico">

    <title>BakeSpace - Food community, recipes, cookbooks & cooking contests</title>

    <!-- extra CSS -->
    <link href="/bower_components/bootstrap/dist/css/bootstrap.min.css" rel="stylesheet">
    <link href="/bower_components/font-awesome/css/font-awesome.min.css" rel="stylesheet">
    <link href="/bower_components/ekko-lightbox/dist/ekko-lightbox.min.css" rel="stylesheet">
    <link href="/bower_components/featherlight/relea

In [33]:
answer = get_response("I want to create a javascript and html frontend that will\
    display a Bakespace user's recipe on the page and then pass the recipe information to an\
    API that will store then give the recipe to an LLM as context to answer any\
    questions the user may have about the recipe via an expandable chat bubble\
        in the bottom right hand corner of the web page.  Can you help me code snippets\
            assuming the LLM takes in the recipep text as a string and a user\
                question as a string?  A unique session_id should be created and\
                    passed to the api endpoint as well when making the POST after\
                        the user submits their question about the recipe in the\
                            expandable chat bubble.")

In [35]:
# Save the answer to a new text file
with open("answer.txt", "w") as f:
    f.write(answer)

In [23]:
from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.llms.openai import OpenAI
from langchain.agents.agent_types import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI

# Replace <your_api_key> in openai_api_key="<your_api_key>" with your actual OpenAI key.
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")

python_executor = create_python_agent(
    llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
    tool=PythonREPLTool(),
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    agent_executor_kwargs={"handle_parsing_errors": True},
)

def get_context(query):
    context = vectorstore.similarity_search(query, k=4)
    return [context.page_content for context in context]

# Define a list of tools offered by the agent
tools = [
    Tool(
        name="Python REPL Tool",
        func=python_executor.run,
        description="Useful for running Python code in a REPL.",
    ),
    Tool(
        name="Query Bakespace API Docs",
        func=get_context,
        description="Useful for querying the Bakespace API docs.",
    )
]

In [26]:
mrkl = initialize_agent(
    tools, llm, agent=AgentType.OPENAI_MULTI_FUNCTIONS, verbose=True
)
code = mrkl.run("How can I load a specific user's information from the Bakespace API?")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Query Bakespace API Docs` with `{'tool_input': 'load user information'}`


[0m[33;1m[1;3m["Reply:  \n<results>  \n <item> \n  <author>Bob Jones</author>  \n  <author_member_id>12</author_member_id>  \n  <author_facebook>http://facebook.com/myusername</author _facebook>  \n  <author_twitter>http://twitter.com/myusername</author_twitter>  \n  <about>Bob is from a small town and likes cats. He likes all kind of \ncats.</about>  \n </item> \n</results>  \n \n \n \ntype:  getCookbookProfile  \n \nDescription:  \nReturns details of a specified c ookbook.  \n \nParameters:  \ncookbookid : required  \n \nReply:  \n<results>  \n <item> \n  <cookbook_id>1</cookbook_id>  \n  <title>Bob's Cookbook</title>  \n  <description>Bob made this cookbook one rainy sunday \nafternoon.</description>  \n  <cost>7.95</cost>  \n  <overall_rating />  \n  <author_name>Bob Jones</author_name>  \n  <author_member_id>45783</author_member_id>

InvalidRequestError: 'Query Bakespace API Docs' does not match '^[a-zA-Z0-9_-]{1,64}$' - 'messages.3.name'

In [6]:
from langchain.embeddings.openai import OpenAIEmbeddings

embed = OpenAIEmbeddings(
    openai_api_key=openai.api_key,
    openai_organization=openai_org,
)


vectorstore = Pinecone(
    index, embed.embed_query, text_field, namespace="documentation"
)



In [7]:
query = "How can I create a multi-tool agent using the langchain library?"

answers = vectorstore.similarity_search(
    namespace="documentation",
    query=query,
    k=3
)

In [9]:
for answer in answers:
    print(answer.page_content)

"""Agent toolkits."""

from langchain.agents.agent_toolkits.azure_cognitive_services.toolkit import (
    AzureCognitiveServicesToolkit,
)
from langchain.agents.agent_toolkits.csv.base import create_csv_agent
from langchain.agents.agent_toolkits.file_management.toolkit import (
    FileManagementToolkit,
)
from langchain.agents.agent_toolkits.gmail.toolkit import GmailToolkit
from langchain.agents.agent_toolkits.jira.toolkit import JiraToolkit
from langchain.agents.agent_toolkits.json.base import create_json_agent
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
from langchain.agents.agent_toolkits.office365.toolkit import O365Toolkit
from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
from langchain.agents.agent_too

In [11]:
user_question = "What are the highlights of next year's revenue projections?"
context = vectorstore.similarity_search(
    query=user_question,
    k=3
)

In [14]:
# Create a list of tuples from the context
context_list = [(doc.page_content, doc.metadata['source'], doc.metadata['page']) for doc in context]

In [19]:
# Create a list of the page content from the context
context_page_content = [doc[0] for doc in context_list]

def get_bplan_response(question: str, context: list):
    messages = [
        {
            "role": "system", "content": f"""You are a master busines advisor
            and start-up strategist answering a question {question} about 
            an early stage company's business plan.  The relevant information
            from the business plan is {context}."""
        },
        {
            "role": "user", "content": f"""Please answer my {question} about the 
            business plan."""
        },
    ]
    models = ["gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-0613, gpt-3.5-turbo"] # Set list of models to iterate through
    for model in models:
        try:
            response = openai.ChatCompletion.create(
                model=model,
                messages = messages,
                max_tokens=500,
                frequency_penalty=0.5,
                presence_penalty=0.5,
                temperature=1,
                n=1
            )
            answer = response.choices[0].message.content

            return answer
        except Exception as e:
            print(e)
            continue

In [20]:
answer = get_bplan_response(user_question, context_page_content)

print(f"{answer}, Sources: {[doc[1] for doc in context_list]}")

Based on the information provided, the revenue projections for next year are as follows:

- January: $406,380
- February: $488,037
- March: $566,877
- April: $730,579
- May: $834,314
- June: $935,740
- July: $1,030,004
- August: $1,227,245
- September: $1,357,593
- October: $1,438,669
- November: $1,555,585
- December: $1,850,950

These projections show a steady increase in revenue throughout the year. It is important to note that these figures are estimates and may be subject to change based on market conditions and other factors., Sources: ['vlocker.pdf', 'vlocker.pdf', 'vlocker.pdf']


In [14]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# completion llm
llm = ChatOpenAI(
    openai_api_key=openai.api_key,
    model_name='gpt-3.5-turbo-16k-0613',
    temperature=0.0
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [15]:
from langchain.chains import RetrievalQAWithSourcesChain

qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [23]:
def create_prompt():
    """Create the prompt for the chatbot"""
    template = """
    You are a master business advisor, advising a client based on
    the provided context about their business plan.  If you cannot
    find any relevant data in the context, you may offer general 
    advice, but note that you did not find any relevant context.
    {context}
    Question: {question}
    Helpful Answer:"""
    qa_chain_prompt = PromptTemplate.from_template(template)

    return qa_chain_prompt


In [24]:
qa_chain_prompt = create_prompt()

In [33]:
from langchain.chains import RetrievalQA

question = "How much do you think we should be asking for in a pre-seed round of financing."
llm = ChatOpenAI(model_name="gpt-4", temperature=0.5)
qa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever(),
chain_type_kwargs={"prompt": qa_chain_prompt}, verbose=True)
qa_chain({"query": question})




[1m> Entering new RetrievalQA chain...[0m


InvalidRequestError: The model `gpt-4` does not exist or you do not have access to it. Learn more: https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4.

In [42]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List, Optional
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import OpenAI

class Answer(BaseModel):
    answer: str = Field(description = "The corrected answer text")

output_parser = PydanticOutputParser(pydantic_object=Answer)
template = "Look over this response {response} from a large language model\
      and correct any errors before returning the result as a string.  If there are not\
    specific code examples provided, please add them before returning the result.\n{format_instructions}\n{response}\n"
prompt = PromptTemplate(
    template=template, 
    input_variables=["response"], 
    partial_variables = {"format_instructions": output_parser.get_format_instructions()}
)

gpt_4 = ChatOpenAI(model_name = 'gpt-4-0613', verbose = True, max_retries = 4, max_tokens = 1000)
llm_chain = LLMChain(prompt=prompt, llm=gpt_4, verbose=True)

In [43]:
from langchain.python import PythonREPL
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI

In [45]:
#llm = ChatOpenAI(temperature=0, model=gpt_4)

python = PythonREPL(llm=gpt_4, verbose=True)

tools = [
    Tool(
        name="python_repl",
        func=python.run,
        description="Useful for when you need to write or test python code."
    ),
    Tool(
        name="documentation",
        func=qa.run,
        description="Useful for when you need to search for documentation, api references, etc."
    ),
    Tool(
        name="format_final_response",
        func=llm_chain.predict_and_parse,
        description="Useful for when you need to check the final answer for errors and format before\
              returning to the user."
    )    
]

In [46]:
mrkl = initialize_agent(tools, gpt_4, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)

In [47]:
base_query = f'How can I use the langchain library to create a multi-tool agent\
    that will help farmers find the legal forms they need to fill out in order to start and maintain a farm?\
    I want to use multi-tool agents that can also utilize openai\'s function capabilities and any other tools\
    that the agent might need to accomplish this task?  I want to use Streamlit to create the interface for the app.'

'''additional_query = "How can I adjust the pairings functions to be able to generate multiple pairings per recipe,\
    use redis to manage the pairings and the associated recipes in state, and then use sqalchemy to store the pairings in the bakespace database?\
    as well as be able to initiate a new chat session with the pairings as context?"'''

#query = base_query + additional_query

'additional_query = "How can I adjust the pairings functions to be able to generate multiple pairings per recipe,    use redis to manage the pairings and the associated recipes in state, and then use sqalchemy to store the pairings in the bakespace database?    as well as be able to initiate a new chat session with the pairings as context?"'

In [48]:
response = mrkl.run(base_query)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)).




[1m> Entering new  chain...[0m
[32;1m[1;3mBuilding a multi-tool agent to help farmers find the legal forms they need to start and maintain a farm using the langchain library, openai's function capabilities, and Streamlit for the interface involves several steps. Here's a general guide:

1. **Understand the Legal Requirements**: First, you need to understand the legal requirements for starting and maintaining a farm. This could vary by location, so you might need to gather data from various legal databases or use APIs that can provide this information.

2. **Use Langchain Library**: Langchain is a library that allows you to parse, transform, and analyze legal texts. You can use it to process the legal requirements you gathered in the first step. It can help you identify key phrases and sections that refer to forms that need to be filled out.

3. **Leverage openai's Function Capabilities**: Openai's function capabilities can be used to automate the process of searching for and retr

In [49]:
print(response)

Building a multi-tool agent to help farmers find the legal forms they need to start and maintain a farm using the langchain library, openai's function capabilities, and Streamlit for the interface involves several steps. Here's a general guide:

1. **Understand the Legal Requirements**: First, you need to understand the legal requirements for starting and maintaining a farm. This could vary by location, so you might need to gather data from various legal databases or use APIs that can provide this information.

2. **Use Langchain Library**: Langchain is a library that allows you to parse, transform, and analyze legal texts. You can use it to process the legal requirements you gathered in the first step. It can help you identify key phrases and sections that refer to forms that need to be filled out.

3. **Leverage openai's Function Capabilities**: Openai's function capabilities can be used to automate the process of searching for and retrieving the necessary forms based on the legal re

In [425]:
import pandas as pd
session_log_df = pd.read_csv("session_log.csv")

In [426]:
# Add the response and the query to the session log df
session_log_df = session_log_df.append({"query": additional_query, "response": response}, ignore_index=True)

  session_log_df = session_log_df.append({"query": additional_query, "response": response}, ignore_index=True)


In [427]:
# Export the session log df to a csv
session_log_df.to_csv("session_log.csv", index=False)

In [318]:
# Loop through the docs and create a vector for each
# In the format needed for Pinecone
# Create a pandas dataframe with the vectors and metadata

# Create the dataframe with the columns "id", "values", "metadata", "text"
# The "id" column should be a unique identifier for each vector
# The "values" column should be a list of floats
# The "metadata" column should be a dictionary with keys for "type", "url", and "access"
# The "text" column should be the text of the document
import pandas as pd

vectors_df = pd.DataFrame(columns=["id", "values", "metadata", "text"])
texts = []
for i, doc in enumerate(docs):
    text = doc.page_content
    source = doc.metadata["source"]
    texts.append(text)

    # Create the vectors
    vectors = [
        {'id': f'FastAPI_docs{i}',
        'values': [],
        'metadata': {'type': 'Github repo docs', 'url': 'https://github.com/tiangolo/fastapi', 'access': 'public', 'text': text, 'source': source},
        }
    ]
    # Add the vectors to the dataframe
    vectors_df = vectors_df.append(vectors, ignore_index=True)

  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_df.append(vectors, ignore_index=True)
  vectors_df = vectors_d

In [319]:
# Add the text column to the dataframe
vectors_df["text"] = texts

# Embed the texts for the values column
vectors_df["values"] = embed.embed_documents(vectors_df["text"])

In [23]:
from langchain.vectorstores import Pinecone
index = Pinecone.from_documents(docs, embed, index_name="vocalockr-bplan")

TypeError: expected string or bytes-like object

In [321]:
# Drop the text column
vectors_df = vectors_df.drop(columns=["text"])


In [322]:
# Convert the dataframe to a list of dictionaries
vectors = vectors_df.to_dict(orient="records")



In [323]:
for i, vector in enumerate(vectors):
    if len(vector["values"]) != 1536:
        print(i, len(vector["values"]))

In [324]:
import pinecone
import os
from dotenv import load_dotenv

load_dotenv()

pinecone_key = os.getenv("PINECONE_KEY2")
pinecone_env = os.getenv("PINECONE_ENV2")
pinecone.init(api_key = pinecone_key, environment=pinecone_env) # Initialize pinecone
index = pinecone.Index(index_name="coding-assist")

In [325]:
# Upsert the vectors into the vector store
index.upsert(vectors=vectors, batch_size=25, namespace="documentation")

Upserted vectors:   0%|          | 0/403 [00:00<?, ?it/s]

{'upserted_count': 403}

In [15]:
# Query the vector store
from langchain.vectorstores import Pinecone

text_field = "text"

index = pinecone.Index('coding-assist')

vectorstore = Pinecone(
    index, embed.embed_query, text_field
)


In [16]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    model_name = 'gpt-3.5-turbo-16k',
    temperature = 0.5,
)

qa = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = vectorstore.as_retriever(),
)


In [17]:
query = "How can I use the langchain library to code an app in Streamlit that\
    can be used by farmers to find the forms that they need to fill out to open and\
    run their business based on their location?  I want to use multi-tool agents\
    to accomplish this task."

response = qa.run(query)

In [18]:
print(response)


I'm sorry, but I don't have any information about the langchain library or its capabilities. It's possible that the langchain library is a custom library or a library that is not widely known. I recommend referring to the documentation or resources specific to the langchain library for guidance on how to use it for your specific task.


In [None]:
# Create functions to be able to load YouTube transcripts and create a vector store from them
from langchain.document_loaders import YouTubeTranscriptLoader

# Create a YouTubeTranscriptLoader object
yt_loader = YouTubeTranscriptLoader(
    video_id="dQw4w9WgXcQ",
    language="en",
)