In [1]:
import pandas as pd
from langchain.chains import LLMChain, ReduceDocumentsChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chains import create_tagging_chain_pydantic
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.docstore.document import Document
from langchain.document_loaders.csv_loader import CSVLoader
import pandas as pd

In [2]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [3]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=api_key)

In [4]:
# map prompt
def map_prompt():
#    If one or more of the interests is not mentioned include this in the answer.
    intermediate_template = f"""Based on this list of docs, please identify the tour opperator, itinerary name, web address, tour length, cost, departure date, travel style, physical grading and a summary of the itinerary.
    Helpful Answer:"""

    map_template = """The following is a set of documents
    {docs}
    """
    map_template += intermediate_template

    map_prompt = PromptTemplate.from_template(map_template)
    return map_prompt

# first reduce prompt
def first_reduce_prompt():
    # Reduce
    reduce_template = """The following is set of summaries:
    {doc_summaries}
    Take these and distill it into a final, consolidated summary of the the itinerary name, web address, tour length, travel style, physical grading and a summary of the itinerary. After the name also include the range of costs and the departure dates.
    Return a single paragraph rather than bullet points. Note the currency is in GBP.
    Helpful Answer:"""
    reduce_prompt = PromptTemplate.from_template(reduce_template)
    return reduce_prompt

In [5]:
map_chain = LLMChain(llm=llm, prompt=map_prompt())

# Run chain
reduce_chain = LLMChain(llm=llm, prompt=first_reduce_prompt())

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain, #_documents
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)



In [6]:
from langchain.document_loaders import BigQueryLoader

PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

BASE_QUERY = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE tour_name = 'Highlights of Jordan'
    """



In [7]:
%%time
loader = BigQueryLoader(BASE_QUERY)
docs = loader.load()
itinerary_summary = map_reduce_chain.run(docs)

CPU times: user 665 ms, sys: 106 ms, total: 772 ms
Wall time: 3min 42s


In [8]:
print(itinerary_summary)

G Adventures offers an 8-day tour called "Highlights of Jordan" with a web address of https://www.gadventures.com/trips/highlights-of-jordan/DWHJ/. The tour is classified as Classic, providing all the highlights, culture, access, and unforgettable moments at a great price. The physical grading is average, including light hiking, biking, rafting, or kayaking in addition to walking. The itinerary includes visits to the ancient Roman town of Jerash, the Dead Sea, Madaba mosaics, Mt Nebo, Karak crusader castle, Wadi Musa, iconic Petra, Little Petra, Wadi Rum, and the Red Sea. The tour departs on multiple dates in April 2024, with a cost of £1259.0.


In [9]:
from langchain.document_loaders import BigQueryLoader

PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

BASE_QUERY = query = f"""
    SELECT
      MAX(tour_operator) AS tour_operator,
      tour_name,
      itinerary_name,
      MAX(visited_countries) AS visited_countries,
      MAX(currency) AS currency,
      ARRAY_AGG(DISTINCT Standard___Adult) AS Costs,
      MAX(duration) AS duration,
      ARRAY_AGG(CAST(start_date AS STRING) ORDER BY start_date) AS start_dates,
      MAX(Travel_Style) AS Travel_Style,
      MAX(Service_Level) AS Service_Level,
      MAX(Physical_Grading) AS Physical_Grading,
      MAX(Merchandising) AS Merchandising,
      MAX(Trip_Type) AS Trip_Type,
      MAX(itinerary) AS itinerary,
      MAX(url) AS url
    FROM
      {PROJECT}.{DATASET}.{TABLE}
    WHERE 
      tour_name = 'Highlights of Jordan'
    GROUP BY
      tour_name, itinerary_name;
    """


In [10]:
%%time
loader = BigQueryLoader(BASE_QUERY)
docs = loader.load()
itinerary_summary = map_reduce_chain.run(docs)

CPU times: user 101 ms, sys: 4.1 ms, total: 105 ms
Wall time: 36.7 s


In [11]:
print(itinerary_summary)

The tour operator G Adventures offers an 8-day trip to Jordan called "Highlights of Jordan." The tour, which costs £1259.0, departs on April 7, 2024. It falls under the Classic travel style, providing a comprehensive experience of the country's highlights, culture, and unique moments at an affordable price. With a physical grading of 3 (average), the itinerary includes light hiking, biking, rafting, or kayaking in addition to walking. The trip begins with a visit to the ancient Roman town of Jerash and the Dead Sea. Other highlights include exploring the intricate Madaba mosaics, visiting the iconic Petra, experiencing a 4x4 excursion in Wadi Rum, and relaxing on the beach at the Red Sea. The tour concludes with a stop at a G Adventures-supported community cafe before returning to Amman. For more information and booking, visit https://www.gadventures.com/trips/highlights-of-jordan/DWHJ/.
