In [1]:
import datasets

dataset = datasets.load_dataset("ashmib/SynthTRIPs")

print(dataset)




DatasetDict({
    test: Dataset({
        features: ['text'],
        num_rows: 80
    })
})


In [2]:
dataset.keys()

dict_keys(['test'])

In [3]:
dataset["test"][:3]


{'text': ['You are given a query and a list of filters. Your task is to identify how many filters are present in the query. Only return the number of matches and your explanation for this in the following format: ',
  '',
  'Matches: (number of matches found)']}

In [4]:
from datasets import get_dataset_config_names

configs = get_dataset_config_names("ashmib/SynthTRIPs")
print(configs)


['default']


In [5]:
import pandas as pd

cities = pd.DataFrame([
    {"city": "Paris", "avg_cost": 1500, "climate": "mild", "activities": ["culture","food","nightlife"], "best_months": ["May","Jun","Sep"]},
    {"city": "Reykjavik", "avg_cost": 1500, "climate": "cold", "activities": ["nature","hiking","adventure"], "best_months": ["Jun","Jul","Aug"]},
    {"city": "Rome", "avg_cost": 1000, "climate": "warm", "activities": ["culture","history","food"], "best_months": ["Apr","May","Sep"]},
    {"city": "Barcelona", "avg_cost": 1100, "climate": "warm", "activities": ["beach","nightlife","culture"], "best_months": ["Jun","Jul","Aug","Sep"]},
    {"city": "Innsbruck", "avg_cost": 1300, "climate": "cold", "activities": ["skiing","hiking","nature"], "best_months": ["Dec","Jan","Feb"]}
])

In [6]:
import random

age_groups = ["18-25", "26-35", "36-50", "51+"]
travel_styles = ["adventure", "relaxation", "culture", "nightlife"]
group_types = ["solo", "couple", "friends", "family"]
budget_levels = ["low", "medium", "high"]

In [10]:
def generate_user():
    return {
        "age_group": random.choice(age_groups),
        "style": random.choice(travel_styles),
        "group_type": random.choice(group_types),
        "budget": random.choice(budget_levels)
    }

generate_user()


{'age_group': '18-25',
 'style': 'nightlife',
 'group_type': 'solo',
 'budget': 'low'}

In [11]:
def generate_filters(user):
    budget_map = {"low": 800, "medium": 1500, "high": 3000}
    
    return {
        "budget_max": budget_map[user["budget"]],
        "activities": [user["style"]],  
        "duration_days": random.choice([3,5,7,10]),
        "preferred_month": random.choice(["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"])
    }

user = generate_user()
filters = generate_filters(user)
print(user)
print(filters)


{'age_group': '26-35', 'style': 'nightlife', 'group_type': 'couple', 'budget': 'high'}
{'budget_max': 3000, 'activities': ['nightlife'], 'duration_days': 3, 'preferred_month': 'May'}


In [12]:
def score_city(user, filters, city):
    score = 0

    # Check if the budget is okay
    if city["avg_cost"] <= filters["budget_max"]:
        score += 0.4
    
    # Activity match
    if any(act in city["activities"] for act in filters["activities"]):
        score += 0.3
    
    # Season match
    if filters["preferred_month"] in city["best_months"]:
        score += 0.2
    
    # Climate mapping (optional)
    style_to_climate = {"adventure":"cold","relaxation":"warm","culture":"mild","nightlife":"warm"}
    if style_to_climate[user["style"]] == city["climate"]:
        score += 0.1
    
    return round(score,2)

# Example
for idx,row in cities.iterrows():
    print(row["city"], score_city(user, filters, row))


Paris 0.9
Reykjavik 0.4
Rome 0.7
Barcelona 0.8
Innsbruck 0.4


In [13]:
dataset = []

for _ in range(5000):
    user = generate_user()
    filters = generate_filters(user)

    for idx, city in cities.iterrows():
        dataset.append({
            "user_age_group": user["age_group"],
            "user_style": user["style"],
            "user_group_type": user["group_type"],
            "user_budget": user["budget"],
            "city": city["city"],
            "budget_max": filters["budget_max"],
            "activities": filters["activities"],
            "preferred_month": filters["preferred_month"],
            "duration_days": filters["duration_days"],
            "score": score_city(user, filters, city)
        })

df = pd.DataFrame(dataset)
df.head(10)

Unnamed: 0,user_age_group,user_style,user_group_type,user_budget,city,budget_max,activities,preferred_month,duration_days,score
0,51+,adventure,couple,medium,Paris,1500,[adventure],Feb,5,0.4
1,51+,adventure,couple,medium,Reykjavik,1500,[adventure],Feb,5,0.8
2,51+,adventure,couple,medium,Rome,1500,[adventure],Feb,5,0.4
3,51+,adventure,couple,medium,Barcelona,1500,[adventure],Feb,5,0.4
4,51+,adventure,couple,medium,Innsbruck,1500,[adventure],Feb,5,0.7
5,18-25,nightlife,couple,high,Paris,3000,[nightlife],Mar,7,0.7
6,18-25,nightlife,couple,high,Reykjavik,3000,[nightlife],Mar,7,0.4
7,18-25,nightlife,couple,high,Rome,3000,[nightlife],Mar,7,0.5
8,18-25,nightlife,couple,high,Barcelona,3000,[nightlife],Mar,7,0.8
9,18-25,nightlife,couple,high,Innsbruck,3000,[nightlife],Mar,7,0.4
