In [47]:
import pandas as pd
import random
import numpy as np

# Load data
cities = pd.read_csv("cities_travel_data.csv")

cities['activities'] = cities['activities'].str.split('|')
cities['best_months'] = cities['best_months'].str.split('|')

In [49]:
cities.head(10)

Unnamed: 0,city,country,continent,avg_cost,climate,activities,best_months,avg_temp_range,popularity,safety_rating
0,Paris,France,Europe,1500,mild,"[culture, food, nightlife, museums, shopping]","[Apr, May, Jun, Sep, Oct]",5-25°C,9,8
1,Reykjavik,Iceland,Europe,2000,cold,"[nature, hiking, adventure, northern_lights, h...","[Jun, Jul, Aug]",-2-15°C,7,9
2,Rome,Italy,Europe,1200,warm,"[culture, history, food, museums, architecture]","[Apr, May, Sep, Oct]",8-30°C,9,7
3,Barcelona,Spain,Europe,1100,warm,"[beach, nightlife, culture, food, architecture]","[May, Jun, Jul, Aug, Sep]",10-28°C,9,7
4,Innsbruck,Austria,Europe,1300,cold,"[skiing, hiking, nature, mountains]","[Dec, Jan, Feb, Jul, Aug]",-5-20°C,6,9
5,Amsterdam,Netherlands,Europe,1400,mild,"[culture, museums, nightlife, biking, canals]","[Apr, May, Jun, Sep]",3-22°C,8,8
6,Prague,Czech Republic,Europe,900,mild,"[culture, history, nightlife, architecture, food]","[Apr, May, Jun, Sep]",-2-24°C,8,8
7,Santorini,Greece,Europe,1600,warm,"[beach, relaxation, food, photography, romance]","[May, Jun, Sep, Oct]",12-29°C,8,8
8,Dubrovnik,Croatia,Europe,1000,warm,"[beach, history, culture, sailing]","[May, Jun, Sep]",8-28°C,7,8
9,Edinburgh,Scotland,Europe,1300,cool,"[culture, history, festivals, hiking]","[Jun, Jul, Aug]",4-19°C,7,9


In [40]:
cities.shape

(103, 10)

In [41]:
age_groups = ["18-25", "26-35", "36-50", "51+"]
travel_styles = ["adventure", "relaxation", "culture", "nightlife", "food", "nature", "beach"]
group_types = ["solo", "couple", "friends", "family"]
budget_levels = ["low", "medium", "high", "luxury"]
climate_preferences = ["hot", "warm", "mild", "cool", "cold", "tropical", "any"]

# User profile weights based on demographics
age_preferences = {
    "18-25": {"nightlife": 1.3, "adventure": 1.2, "culture": 0.9, "budget_sensitivity": 1.4},
    "26-35": {"food": 1.2, "culture": 1.1, "nightlife": 1.1, "budget_sensitivity": 1.1},
    "36-50": {"culture": 1.3, "relaxation": 1.2, "food": 1.2, "budget_sensitivity": 0.9},
    "51+": {"culture": 1.4, "relaxation": 1.3, "history": 1.3, "budget_sensitivity": 0.8}
}

group_preferences = {
    "solo": {"culture": 1.2, "adventure": 1.1, "safety_weight": 1.3},
    "couple": {"relaxation": 1.2, "food": 1.2, "romance": 1.4},
    "friends": {"nightlife": 1.3, "adventure": 1.2, "beach": 1.1},
    "family": {"safety_weight": 1.5, "relaxation": 1.2, "nature": 1.1}
}

In [56]:
def generate_user():
    # Add secondary interests (1-2 additional activities)
    style = random.choice(travel_styles)
    secondary_interests = random.sample(
        [s for s in travel_styles if s != style], 
        k=random.randint(1, 2)
    )
    
    return {
        "age_group": random.choice(age_groups),
        "primary_style": style,
        "secondary_interests": secondary_interests,
        "group_type": random.choice(group_types),
        "budget": random.choice(budget_levels),
        "climate_preference": random.choice(climate_preferences),
        "safety_importance": random.uniform(0.5, 1.0),
        "popularity_preference": random.uniform(0.3, 1.0)
    }

def generate_filters(user):
    budget_map = {"low": 1000, "medium": 1500, "high": 2200, "luxury": 5000}
    
    # Combine primary and secondary interests
    all_activities = [user["primary_style"]] + user["secondary_interests"]
    
    return {
        "budget_max": budget_map[user["budget"]],
        "activities": all_activities,
        "duration_days": random.choice([3, 5, 7, 10, 14]),
        "preferred_month": random.choice(["Jan","Feb","Mar","Apr","May","Jun",
                                         "Jul","Aug","Sep","Oct","Nov","Dec"]),
        "climate_preference": user["climate_preference"],
        "continent_preference": random.choice([None, "Europe", "Asia", "North America", 
                                               "South America", "Africa", "Oceania"])
    }


In [57]:
def score_city(user, filters, city, previous_recommendations=None):
    score = 0
    max_score = 0
    
    # Budge score (25% weight)
    budget_weight = 0.25
    max_score += budget_weight
    
    if city["avg_cost"] <= filters["budget_max"]:
        # Bonus for being well under budget
        budget_efficiency = 1 - (city["avg_cost"] / filters["budget_max"])
        score += budget_weight * (0.7 + 0.3 * budget_efficiency)
    else:
        # Penalty for over budget (but not complete elimination)
        over_budget_penalty = min(0.5, (city["avg_cost"] - filters["budget_max"]) / filters["budget_max"])
        score += budget_weight * (0.3 - over_budget_penalty)
    
    # Activity match (30% weight)
    activity_weight = 0.30
    max_score += activity_weight
    
    city_activities = city["activities"]
    
    # Primary interest match
    if user["primary_style"] in city_activities:
        score += activity_weight * 0.6
    
    # Secondary interests match
    secondary_matches = sum(1 for interest in user["secondary_interests"] if interest in city_activities)
    if len(user["secondary_interests"]) > 0:
        score += activity_weight * 0.4 * (secondary_matches / len(user["secondary_interests"]))
    
    # Apply age-based activity preferences
    age_prefs = age_preferences.get(user["age_group"], {})
    for activity in city_activities:
        if activity in age_prefs:
            score += 0.05 * (age_prefs[activity] - 1)
    
    # Season match (15% weight)
    season_weight = 0.15
    max_score += season_weight
    
    if filters["preferred_month"] in city["best_months"]:
        score += season_weight
    elif len(set(city["best_months"]).intersection(get_adjacent_months(filters["preferred_month"]))) > 0:
        score += season_weight * 0.5  # Partial credit for adjacent months
    
    # Climate match (10% weight)
    climate_weight = 0.10
    max_score += climate_weight
    
    if filters["climate_preference"] == "any" or filters["climate_preference"] == city["climate"]:
        score += climate_weight
    
    # Safety (10% weight, scaled by user's safety importance)
    safety_weight = 0.10 * user["safety_importance"]
    max_score += safety_weight
    
    # Apply group-specific safety preferences
    group_prefs = group_preferences.get(user["group_type"], {})
    safety_multiplier = group_prefs.get("safety_weight", 1.0)
    
    score += safety_weight * (city["safety_rating"] / 10) * safety_multiplier
    
    # Popularity (5% weight, inversely scaled by user's popularity preference)
    popularity_weight = 0.05
    max_score += popularity_weight
    
    # Some users prefer less touristy destinations
    if user["popularity_preference"] > 0.7:
        score += popularity_weight * (city["popularity"] / 10)
    else:
        # Prefer hidden gems
        score += popularity_weight * (1 - city["popularity"] / 10)
    
    # Continent preference (5% weight)
    continent_weight = 0.05
    max_score += continent_weight
    
    if filters["continent_preference"] is None or filters["continent_preference"] == city["continent"]:
        score += continent_weight
    
    # Avoid recommending too similar destinations
    if previous_recommendations:
        # Check if this city is from a different continent/country
        different_continent = all(city["continent"] != prev["continent"] for prev in previous_recommendations)
        different_country = all(city["country"] != prev["country"] for prev in previous_recommendations)
        
        if different_continent:
            score += 0.05
        elif different_country:
            score += 0.03
    
    # small amount for diversity between runs, just so the recommendations aren't always the same
    score += random.uniform(-0.05, 0.05)
    
    # Normalize to 0-1 range
    return round(max(0, min(1, score)), 3)

def get_adjacent_months(month):
    months = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
    idx = months.index(month)
    return [months[(idx-1) % 12], months[(idx+1) % 12]]


In [58]:
def get_recommendations(user, filters, cities_df, top_n=5):
    recommendations = []
    
    for idx, row in cities_df.iterrows():
        score = score_city(user, filters, row, recommendations)
        recommendations.append({
            "city": row["city"],
            "country": row["country"],
            "continent": row["continent"],
            "score": score,
            "avg_cost": row["avg_cost"],
            "climate": row["climate"],
            "safety_rating": row["safety_rating"],
            "popularity": row["popularity"]
        })
    
    # Sort by score
    recommendations.sort(key=lambda x: x["score"], reverse=True)
    return recommendations[:top_n]

def print_recommendations(user, filters, recommendations):
    """Pretty print the recommendations"""
    print("="*80)
    print("USER PROFILE")
    print("="*80)
    print(f"Age Group: {user['age_group']}")
    print(f"Travel Style: {user['primary_style']} (also interested in: {', '.join(user['secondary_interests'])})")
    print(f"Group Type: {user['group_type']}")
    print(f"Budget: {user['budget']} (max ${filters['budget_max']})")
    print(f"Preferred Month: {filters['preferred_month']}")
    print(f"Climate Preference: {filters['climate_preference']}")
    print(f"Duration: {filters['duration_days']} days")
    print("\n" + "="*80)
    print("TOP RECOMMENDATIONS")
    print("="*80)
    
    for i, rec in enumerate(recommendations, 1):
        print(f"\n{i}. {rec['city']}, {rec['country']} ({rec['continent']})")
        print(f"   Match Score: {rec['score']:.1%}")
        print(f"   Cost: ${rec['avg_cost']} | Climate: {rec['climate']} | Safety: {rec['safety_rating']}/10")
    print("\n" + "="*80)

In [60]:
# Test with 3 different users
print("TESTING RECOMMENDATION DIVERSITY\n")

for test_num in range(3):
    user = generate_user()
    filters = generate_filters(user)
    recommendations = get_recommendations(user, filters, cities)
    
    print(f"\n{'#'*80}")
    print(f"TEST RUN #{test_num + 1}")
    print(f"{'#'*80}\n")
    
    print_recommendations(user, filters, recommendations)

TESTING RECOMMENDATION DIVERSITY


################################################################################
TEST RUN #1
################################################################################

USER PROFILE
Age Group: 18-25
Travel Style: relaxation (also interested in: beach, culture)
Group Type: solo
Budget: luxury (max $5000)
Preferred Month: Apr
Climate Preference: cold
Duration: 7 days

TOP RECOMMENDATIONS

1. Cancun, Mexico (North America)
   Match Score: 79.9%
   Cost: $1300 | Climate: tropical | Safety: 6/10

2. Fiji, Fiji (Oceania)
   Match Score: 79.7%
   Cost: $1650 | Climate: tropical | Safety: 8/10

3. Okinawa, Japan (Asia)
   Match Score: 79.3%
   Cost: $1500 | Climate: subtropical | Safety: 9/10

4. Maldives, Maldives (Asia)
   Match Score: 79.1%
   Cost: $2500 | Climate: tropical | Safety: 9/10

5. Perth, Australia (Oceania)
   Match Score: 75.2%
   Cost: $1850 | Climate: warm | Safety: 9/10


##############################################################