In [1]:
import pandas as pd

In [2]:
from typing import Optional
from pydantic import BaseModel, Field

class TravelDetails(BaseModel):
    introduction: Optional[bool] = Field(
        False,
        description="Has francis introducted himself and asked if the user is interested in a group tour.",
    )
#     qualification: Optional[str] = Field(
#         ...,
#         description="Did the user confirm they are looking for a group tour or answer positivley when asked. If the user asks about a trip assume the answer is yes",
#         enum=["Yes", "No", "Unsure"]
#     )
    country: Optional[str] = Field(
        "",
        description="This is the name of the country the user is wanting to visit. If they name a place within a country always return the country",
        enum=["Cambodia", "Vietnam", "Thailand"]
    )
    departing_after: Optional[str] = Field(
        "",
        description="This is the first date from which the user can depart. If the user gives a month assume this is the first of the month. If not year if given return 2023. In the format '%d/%m/%Y'",
    )
    departing_before: Optional[str] = Field(
        "",
        description="This is the last date from which the user can depart. If the user gives a month assume this is the last day of the month. If not year if given return 2023. In the format '%d/%m/%Y'",
    )
    max_budget: Optional[int] = Field(
        0,
        description="This is the maximun amount of money the user is looking to spend on their trip.",
    )
    max_duration: Optional[int] = Field(
        None,
        description="This is the maximum duration of their trip."
    )
    min_duration: Optional[int] = Field(
        None,
        description="This is the minimum duration of their trip.",
    )

In [91]:
user_travel_details = TravelDetails(introduction=False,
                                # qualification="",
                                country="Morocco",
                                departing_after="2024-03-19",
                                departing_before="2024-04-30",
                                max_budget=None,
                                max_duration=None,
                                min_duration=None)

In [92]:
# Define a custom function to find the first non-null value in columns 9 to the end
def find_first_non_null(row):
    for value in row[11:]:  # Slice from the 9th column to the end
        if not pd.isna(value):
            return value
    return None


In [93]:
def get_filtered_df(df, user_travel_details):
    trip_details_dict = user_travel_details.dict()
    filled_out_dictionary = {k: v for k, v in user_travel_details.dict().items() if v not in [False, None, "",0]}
    # print(filled_out_dictionary)
    # convert dates to datetime format
    df['duration'] = df['duration'].str.replace(' days', '').astype(int)
    df['start_date'] = pd.to_datetime(df['start_date'], format='%Y-%m-%d')
    
    # Apply the custom function to each row to find cost
    df['first_non_null'] = df.apply(find_first_non_null, axis=1)

    # Filtering the DataFrame
    filtered_df = df.copy()  # Make a copy of the original DataFrame to keep it intact
    
    # Iterate through the list of potential inputs
    for input_column in filled_out_dictionary.keys():
        if input_column == 'country':
            filtered_df = filtered_df[filtered_df['visited_countries'] == trip_details_dict["country"]]
        elif input_column == 'max_budget':
            filtered_df = filtered_df[filtered_df['first_non_null'] <= trip_details_dict["max_budget"]]
        elif input_column == 'min_budget':
            filtered_df = filtered_df[filtered_df['first_non_null'] >= trip_details_dict["min_budget"]]
        elif input_column == 'departing_after':
            filtered_df = filtered_df[filtered_df['start_date'] >= trip_details_dict["departing_after"]]
        elif input_column == 'departing_before':
            filtered_df = filtered_df[filtered_df['start_date'] <= trip_details_dict["departing_before"]]
        elif input_column == 'max_duration':
            filtered_df = filtered_df[filtered_df['duration'] <= trip_details_dict["max_duration"]]
        elif input_column == 'min_duration':
            filtered_df = filtered_df[filtered_df['duration'] >= trip_details_dict["min_duration"]]

    return filtered_df

In [94]:
df = pd.read_csv("raw_data/one_day_test.csv")

In [95]:
filtered_df = get_filtered_df(df, user_travel_details)

In [96]:
filtered_df.head()

Unnamed: 0,tour_name,itinerary_name,visited_countries,start_date,duration,url,Travel Style,Service Level,Physical Grading,Merchandising,Trip Type,Standard - Adult,Standard - Double - D - Adult,Standard - Twin - D - Adult,first_non_null
16,Highlights of Morocco,,Morocco,2024-04-19,15,https://www.gadventures.com/trips/highlights-o...,Classic,Standard,2 - Light,Planeterra Project Book Your Bubble,Small Group,1119.0,,,1119.0
17,Coastal Morocco: Waves & Market Stalls,,Morocco,2024-04-19,5,https://www.gadventures.com/trips/tour-coastal...,18-to-Thirtysomethings,Basic,2 - Light,,Small Group,469.0,,,469.0


In [64]:
trip_details_dict = user_travel_details.dict()
filled_out_dictionary = {k: v for k, v in user_travel_details.dict().items() if v not in [False, None, "",0]}

In [65]:
filled_out_dictionary

{'country': 'Jordan',
 'departing_after': '2024-03-19',
 'departing_before': '2024-04-11'}

In [66]:
country_filtered_df = df[df['visited_countries'] == trip_details_dict["country"]]

In [79]:
# country suggestions
alternate_destinations = filtered_df['visited_countries'].unique()

In [80]:
# budget suggestions
min_budget = filtered_df['first_non_null'].min()
mean_budget = filtered_df['first_non_null'].mean()
max_budget = filtered_df['first_non_null'].max()
average_cost_per_day = round(filtered_df['first_non_null'].sum() / filtered_df['duration'].sum(),0)

  average_cost_per_day = round(filtered_df['first_non_null'].sum() / filtered_df['duration'].sum(),0)


In [81]:
# duration suggestions
min_duration = filtered_df['duration'].min()
mean_duration = filtered_df['duration'].mean()
max_duration = filtered_df['duration'].max()

In [82]:
# start_date suggestions
later_start_date = filtered_df[filtered_df['start_date'] > trip_details_dict['departing_before']]
earlier_start_date = filtered_df[filtered_df['start_date'] < trip_details_dict['departing_before']]

In [83]:
trip_details_dict = user_travel_details.dict()
filled_out_dictionary = {k: v for k, v in user_travel_details.dict().items() if v not in [False, None, "",0]}

# Check if the resulting DataFrame is empty
if len(filtered_df) == 0:
    # Provide alternate suggestions based on user criteria
    alternate_suggestions = {}
    
    # For the 'country' filter
    if 'country' not in filled_out_dictionary:
        alternate_suggestions['country'] = df['visited_countries'].unique()
    
    # For the 'budget' filter
    if 'budget' not in filled_out_dictionary:
        alternate_suggestions['budget'] = [min_budget, mean_budget, max_budget]
    
    # For the 'duration' filter
    if 'duration' not in filled_out_dictionary:
        alternate_suggestions['duration'] = [min_duration, mean_duration, max_duration]
    
    # For the 'start_date' filter
    if 'start_date' not in filled_out_dictionary:
        alternate_suggestions['start_date'] = {
            'later_start_date': later_start_date,
            'earlier_start_date': earlier_start_date
        }
    
    print("No results found for the given criteria. Consider the following alternate suggestions:")
    print(alternate_suggestions)
else:
    print("Results based on user criteria:")
    print(suggested_df)


No results found for the given criteria. Consider the following alternate suggestions:
{'budget': [nan, nan, nan], 'duration': [nan, nan, nan], 'start_date': {'later_start_date': Empty DataFrame
Columns: [tour_name, itinerary_name, visited_countries, start_date, duration, url, Travel Style, Service Level, Physical Grading, Merchandising, Trip Type, Standard - Adult, Standard - Double - D - Adult, Standard - Twin - D - Adult, first_non_null]
Index: [], 'earlier_start_date': Empty DataFrame
Columns: [tour_name, itinerary_name, visited_countries, start_date, duration, url, Travel Style, Service Level, Physical Grading, Merchandising, Trip Type, Standard - Adult, Standard - Double - D - Adult, Standard - Twin - D - Adult, first_non_null]
Index: []}}


In [84]:
alternate_suggestions

{'budget': [nan, nan, nan],
 'duration': [nan, nan, nan],
 'start_date': {'later_start_date': Empty DataFrame
  Columns: [tour_name, itinerary_name, visited_countries, start_date, duration, url, Travel Style, Service Level, Physical Grading, Merchandising, Trip Type, Standard - Adult, Standard - Double - D - Adult, Standard - Twin - D - Adult, first_non_null]
  Index: [],
  'earlier_start_date': Empty DataFrame
  Columns: [tour_name, itinerary_name, visited_countries, start_date, duration, url, Travel Style, Service Level, Physical Grading, Merchandising, Trip Type, Standard - Adult, Standard - Double - D - Adult, Standard - Twin - D - Adult, first_non_null]
  Index: []}}