In [1]:
import pandas as pd
from langchain.chains import LLMChain, ReduceDocumentsChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chains import create_tagging_chain_pydantic
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.docstore.document import Document
from langchain.document_loaders.csv_loader import CSVLoader
import pandas as pd

In [2]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [3]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=api_key)

In [4]:
# map prompt
def map_prompt():

    intermediate_template = f"""Based on this list of docs, please identify the itinerary name, web address, tour length, cost, departure date, travel style, physical grading and a summary of the itinerary.
    If one or more of the interests is not mentioned include this in the answer.
    Helpful Answer:"""

    map_template = """The following is a set of documents
    {docs}
    """
    map_template += intermediate_template

    map_prompt = PromptTemplate.from_template(map_template)
    return map_prompt

# first reduce prompt
def first_reduce_prompt():
    # Reduce
    reduce_template = """The following is set of summaries:
    {doc_summaries}
    Take these and distill it into a final, consolidated summary of the the itinerary name, web address, tour length, travel style, physical grading and a summary of the itinerary. After the name also include the range of costs and the departure dates.
    Return a single paragraph rather than bullet points
    Helpful Answer:"""
    reduce_prompt = PromptTemplate.from_template(reduce_template)
    return reduce_prompt

In [5]:
map_chain = LLMChain(llm=llm, prompt=map_prompt())

# Run chain
reduce_chain = LLMChain(llm=llm, prompt=first_reduce_prompt())

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain, #_documents
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)



In [6]:
from langchain.document_loaders import BigQueryLoader

PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_day"

BASE_QUERY = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE tour_name = 'Highlights of Jordan'
    """

loader = BigQueryLoader(BASE_QUERY)

docs = loader.load()

In [None]:
itinerary_summary = map_reduce_chain.run(docs)

In [None]:
print(itinerary_summary)

In [None]:
# print(docs[0].page_content)