In [1]:
import pandas as pd
from langchain.chains import LLMChain, ReduceDocumentsChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chains import create_tagging_chain_pydantic
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.docstore.document import Document
from langchain.document_loaders.csv_loader import CSVLoader
import pandas as pd

In [2]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [3]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=api_key)

In [4]:
# map prompt
def map_prompt():
#    If one or more of the interests is not mentioned include this in the answer.
    intermediate_template = f"""Based on this list of docs, please identify the tour opperator, itinerary name, web address, tour length, cost, departure date, travel style, physical grading and a summary of the itinerary.
    Helpful Answer:"""

    map_template = """The following is a set of documents
    {docs}
    """
    map_template += intermediate_template

    map_prompt = PromptTemplate.from_template(map_template)
    return map_prompt

# first reduce prompt
def first_reduce_prompt():
    # Reduce
    reduce_template = """The following is set of summaries:
    {doc_summaries}
    Take these and distill it into a final, consolidated summary of the the itinerary name, web address, tour length, travel style, physical grading and a summary of the itinerary. After the name also include the range of costs and the departure dates.
    Return a single paragraph rather than bullet points. 
    Helpful Answer:"""
    reduce_prompt = PromptTemplate.from_template(reduce_template)
    return reduce_prompt

In [5]:
map_chain = LLMChain(llm=llm, prompt=map_prompt())

# Run chain
reduce_chain = LLMChain(llm=llm, prompt=first_reduce_prompt())

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain, #_documents
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)



In [6]:
from langchain.document_loaders import BigQueryLoader

PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

BASE_QUERY = f"""
    SELECT *
    FROM {PROJECT}.{DATASET}.{TABLE}
    WHERE tour_name = 'Highlights of Jordan'
    """



In [7]:
# %%time
# loader = BigQueryLoader(BASE_QUERY)
# docs = loader.load()
# itinerary_summary = map_reduce_chain.run(docs)

In [8]:
# print(itinerary_summary)

In [15]:
from langchain.document_loaders import BigQueryLoader

PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

BASE_QUERY = query = f"""
    SELECT
      MAX(tour_operator) AS tour_operator,
      tour_name,
      itinerary_name,
      MAX(visited_countries) AS visited_countries,
      MAX(currency) AS currency,
      ARRAY_AGG(DISTINCT Standard___Adult) AS Costs,
      MAX(duration) AS duration,
      ARRAY_AGG(CAST(start_date AS STRING) ORDER BY start_date) AS start_dates,
      MAX(Travel_Style) AS Travel_Style,
      MAX(Service_Level) AS Service_Level,
      MAX(Physical_Grading) AS Physical_Grading,
      MAX(Merchandising) AS Merchandising,
      MAX(Trip_Type) AS Trip_Type,
      MAX(itinerary) AS itinerary,
      MAX(url) AS url
    FROM
      {PROJECT}.{DATASET}.{TABLE}
    WHERE 
      tour_name = 'Peru Panorama'
    GROUP BY
      tour_name, itinerary_name;
    """


In [10]:
%%time
loader = BigQueryLoader(BASE_QUERY)
docs = loader.load()
itinerary_summary = map_reduce_chain.run(docs)

CPU times: user 490 ms, sys: 43.7 ms, total: 533 ms
Wall time: 50.5 s


In [11]:
print(itinerary_summary)

The tour operator G Adventures offers a 5-day tour of Coastal Morocco, with a cost of £469.0. The tour is designed for young, budget-minded travelers and falls under the travel style of 18-to-Thirtysomethings. The physical grading is light, suitable for most fitness levels. The itinerary includes arriving in Marrakech and enjoying drinks at a rooftop bar, traveling through the High Atlas Mountains to Taroudant and Taghazout, having a surf lesson and free time for various activities in Taghazout, exploring Essaouira with optional Traditional Hammam Experience, and returning to Marrakech where the trip ends. The departure dates for this tour are 2024-04-12 and 2024-04-19. For more information and booking, visit the website at https://www.gadventures.com/trips/tour-coastal-morocco/DCKE/.


In [16]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

# Define prompt
prompt_template = """Write a summary and only include the itinerary name, tour length, a summary of departure dates, summary of costs, travel style, physical grading, a summary of the itinerary and present the url to the user.
Include a mention of any potential interests: surfing.
"{text}"
Produce a summary paragraph not a list or bullet points.
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain
stuff_chain = StuffDocumentsChain(
    llm_chain=llm_chain, document_variable_name="text"
)


In [17]:
%%time
loader = BigQueryLoader(BASE_QUERY)
docs = loader.load()
itinerary_summary = stuff_chain.run(docs)

CPU times: user 117 ms, sys: 4.33 ms, total: 121 ms
Wall time: 43.1 s


In [18]:
print(itinerary_summary)

G Adventures offers the Peru Panorama tour, which has three different itineraries to choose from: Lares Trek, Cusco Stay, and Inca Trail. The tour lasts for 15 days and visits Peru. The departure dates for the tour are April 6, 8, 13, and 15 in 2024. The cost of the tour is £2769. The travel style is Classic, offering all the highlights and cultural experiences at a great price. The physical grading is 4, meaning it includes some high-altitude hikes and strenuous activities but is accessible to most healthy travelers. 

For the Lares Trek itinerary, the tour starts with a visit to Puno and Lake Titicaca, followed by a guided tour and homestay in a small village. The tour then continues to Cusco, where a full-day guided tour of the Sacred Valley is included. The highlight of this itinerary is the Lares Trek, a demanding hike that takes travelers through scenic landscapes and ancient ruins, culminating in a visit to Machu Picchu. The tour ends with a visit to Puerto Maldonado and the G L