In [1]:
import pandas as pd
from langchain.chains import LLMChain, ReduceDocumentsChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chains import create_tagging_chain_pydantic
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.document_loaders import TextLoader
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.docstore.document import Document
from langchain.document_loaders.csv_loader import CSVLoader
import pandas as pd
from langchain.document_loaders import BigQueryLoader
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from google.cloud import bigquery
from pydantic import BaseModel

In [2]:
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Access environment variables
api_key = os.getenv("OPENAI_KEY")

In [3]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=api_key)

In [4]:
PROJECT = "wagon-bootcamp-377120"
DATASET = "g_adventures_dataset"
TABLE = "one_month"

In [5]:
class UserTravelDetails(BaseModel):
    country: str
    max_budget: float
    min_budget: float
    departing_after: str
    departing_before: str
    max_duration: int
    min_duration: int

In [6]:
# Example usage:
user_travel_details = UserTravelDetails(
    country="Morocco",
    max_budget=1000,
    min_budget=0,
    departing_after="2024-04-02",
    departing_before="2024-04-10",
    max_duration=10,
    min_duration=5
)

In [8]:
def list_all_column_names():
    client = bigquery.Client(project=PROJECT)
    dataset_ref = client.dataset(DATASET)
    table_ref = dataset_ref.table(TABLE)

    # Get the table schema
    table = client.get_table(table_ref)

    # Extract and return the column names
    column_names = [field.name for field in table.schema]

    return column_names

In [9]:
# room_categories = list_column_names()
# all_columns = list_all_column_names()

In [10]:
found_itineraries = ['Morocco Kasbahs & Desert', 'Moroccan Desert Adventure: River Canyons & Camels']

In [11]:
found_itineraries_string = ', '.join(["'" + item + "'" for item in found_itineraries])

In [12]:
def generate_sql_query(user_travel_details, found_itineraries):
    
    all_columns = list_all_column_names()
    all_columns_str = ",\n".join(all_columns)
    all_columns_str = all_columns_str + ","
    
    room_categories = [category for category in all_columns if "Adult" in category and "Promotion Description" not in category]
    
    category_cases = []

    for category in room_categories:
        category_case = f"CASE WHEN {category} > 0 THEN {category} ELSE 1000000 END"
        category_cases.append(category_case)

    category_cases_str = ",\n".join(category_cases)

    query = f"""
    SELECT
        MAX(tour_operator) AS tour_operator,
        tour_name,
        itinerary_name,
        MAX(visited_countries) AS visited_countries,
        MAX(currency) AS currency,
        ARRAY_AGG(DISTINCT cost) AS Costs,
        MAX(duration) AS duration,
        ARRAY_AGG(CAST(start_date AS STRING) ORDER BY start_date) AS start_dates,
        MAX(Travel_Style) AS Travel_Style,
        MAX(Service_Level) AS Service_Level,
        MAX(Physical_Grading) AS Physical_Grading,
        MAX(Merchandising) AS Merchandising,
        MAX(Trip_Type) AS Trip_Type,
        MAX(itinerary) AS itinerary,
        MAX(url) AS url
    FROM (
        SELECT
            {all_columns_str}
            LEAST(
                {category_cases_str}
            ) AS cost
        FROM {PROJECT}.{DATASET}.{TABLE}
    ) AS subquery
        WHERE 1 = 1
        """

    
    # Iterate through the provided filter criteria and add them to the query
    if user_travel_details.max_budget:
        query += f" AND cost <= {user_travel_details.max_budget}"

    if user_travel_details.departing_after:
        query += f" AND start_date >= '{user_travel_details.departing_after}'"

    if user_travel_details.departing_before:
        query += f" AND start_date <= '{user_travel_details.departing_before}'"

    query += f""" AND tour_name in ({found_itineraries_string})
    GROUP BY
        tour_name, itinerary_name;"""
    
    return query

In [13]:
combined_query = generate_sql_query(user_travel_details, found_itineraries)

In [14]:
# client = bigquery.Client(project="wagon-bootcamp-377120")
# query_job = client.query(combined_query)
# result = query_job.result()
# df = result.to_dataframe()
# df

In [18]:
from langchain.prompts import PromptTemplate
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain

interests = ["camels"]
valid_interests = [interest for interest in interests if interest is not None]
interests = ", ".join(valid_interests)
    

map_prompt = f"""Write a summary and only include the itinerary name, tour length, list departure dates, summary of costs, travel style, physical grading, a summary of the itinerary and present the url to the user.
Include a mention of any potential interests: {interests}.
"""
map_prompt += """"
{docs}"
Produce a summary paragraph not a list or bullet points.
CONCISE SUMMARY:"""
map_prompt = PromptTemplate.from_template(map_prompt)




reduce_template = """
You are an AI travel agent speaking to a user. The following is set of summaries that fits the users basic travel needs:
{doc_summaries}
"""
intermediate_template = f"""Take these and rank them based on the following user interests: {interests}.
Return the ranked itineraries as a summary paragraph and only include the itinerary name, tour length, departure dates, summary of costs, travel style, physical grading, a summary of the itinerary and present the url to the user.
Refer to the user as - you.
Helpful Answer:"""
reduce_template += intermediate_template

reduce_prompt = PromptTemplate.from_template(reduce_template)
    
    
# Map
map_chain = LLMChain(llm=llm, prompt=map_prompt)

# Run chain
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain, #_documents
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)


# loader = BigQueryLoader(combined_query)
# docs = loader.load()
# itinerary_summary = map_reduce_chain.run(docs)
       

In [19]:
%%time
loader = BigQueryLoader(combined_query)
docs = loader.load()
itinerary_summary = map_reduce_chain.run(docs)

CPU times: user 190 ms, sys: 7.03 ms, total: 197 ms
Wall time: 1min 3s


In [20]:
print(itinerary_summary)

Based on your interest in camels, I have ranked the itineraries for you:

1. The Morocco Kasbahs & Desert tour by G Adventures is an 8-day trip that offers camel rides in the desert. Departure dates are available on April 2nd, 4th, and 6th, 2024. The tour costs between £649.0 and £699.0. The travel style is Classic, offering a great price for all the cultural highlights and memorable experiences. The physical grading is 2, suitable for most fitness levels with light walking and hiking. The itinerary includes visits to ancient Roman ruins, the Fès medina, the Todra Gorge, and the Aït Ben Haddou Kasbah. The tour concludes with a guided visit to Marrakech. For more information, you can visit [https://www.gadventures.com/trips/morocco-kasbahs-and-desert/DCKD/](https://www.gadventures.com/trips/morocco-kasbahs-and-desert/DCKD/).

2. The Moroccan Desert Adventure: River Canyons & Camels tour by G Adventures is a 7-day trip that includes camel rides. Departing on April 6, 2024, this tour is d