Import Libs

In [None]:
import pandas as pd
import numpy as np
import requests
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import joblib
import logging
import os
from dotenv import load_dotenv

# Set up logging with a concise format
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)

# Load API keys with validation
load_dotenv()
OPENWEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY")
GOOGLE_MAPS_API_KEY = os.getenv("GOOGLE_MAPS_API_KEY")

if not all([OPENWEATHER_API_KEY, GOOGLE_MAPS_API_KEY]):
    logger.error("One or both API keys are missing from .env file.")
    raise ValueError("API keys are required for OpenWeather and Google Maps.")

Load Dataset

In [1]:
def load_data(file_path='holidify.csv'):
    try:
        df = pd.read_csv(file_path)
        
        # Define weather conditions for simulation (consistent with later mapping)
        weather_conditions = [
            'Clear', 'Clouds', 'Drizzle', 'Rain', 'Thunderstorm', 'Snow', 'Mist',
            'Smoke', 'Haze', 'Dust', 'Fog', 'Sand', 'Ash', 'Squall', 'Tornado'
        ]
        
        # Simulate missing columns with varied values
        df['Temperature'] = np.random.uniform(10, 35, size=len(df))  # Realistic temperature range
        df['Weather Condition'] = np.random.choice(weather_conditions, size=len(df))
        df['Travel Time'] = np.random.uniform(10, 120, size=len(df))  # Minutes
        df['User Ratings Total'] = np.random.randint(50, 501, size=len(df))  # Number of ratings
        
        # Ensure 'Rating' is float and handle potential missing values
        df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce').fillna(3.5)
        
        logger.info(f"Loaded data with {len(df)} cities and simulated features.")
        return df
    except Exception as e:
        logger.error(f"Failed to load data from {file_path}: {e}")
        raise

Feature Engineering for Training

In [2]:
def engineer_features(df):
    try:
        # Weather quality mapping (consistent with dynamic phase)
        weather_map = {
            'Clear': 10, 'Clouds': 7, 'Drizzle': 6, 'Rain': 5, 'Thunderstorm': 4,
            'Snow': 3, 'Mist': 4, 'Smoke': 3, 'Haze': 4, 'Dust': 3,
            'Fog': 4, 'Sand': 3, 'Ash': 3, 'Squall': 4, 'Tornado': 2
        }
        df['Weather Quality'] = df['Weather Condition'].map(weather_map).fillna(5)
        
        # Vectorized feature calculations
        df['Traffic Level'] = (df['Travel Time'] / 10).clip(0, 10)
        df['Temp_Comfort'] = (10 - abs(df['Temperature'] - 24) / 2).clip(0, 10)
        
        # Compute Destination Score as a function of features
        df['Destination Score'] = (
            df['Rating'] * 10 + 
            df['Weather Quality'] + 
            df['Temp_Comfort'] - 
            df['Traffic Level'] + 
            df['User Ratings Total'] / 100
        )
        
        return df
    except Exception as e:
        logger.error(f"Feature engineering failed: {e}")
        raise

Model Training

In [3]:
def train_recommendation_models(df, features):
    try:
        X = df[features]
        y = df['Destination Score']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        model = RandomForestRegressor(n_estimators=50, random_state=42, n_jobs=-1)
        model.fit(X_train_scaled, y_train)
        
        score = model.score(X_test_scaled, y_test)
        logger.info(f"Model trained with R^2 score: {score:.2f} on test set (size={len(y_test)})")
        
        return model, features, scaler
    except Exception as e:
        logger.error(f"Model training failed: {e}")
        raise

Save Model

In [4]:
def save_enhanced_model(model, features, scaler, model_file='travel_model.pkl', features_file='features.pkl', scaler_file='scaler.pkl'):
    try:
        # Save all components in a single file for simplicity
        joblib.dump({'model': model, 'features': features, 'scaler': scaler}, model_file)
        logger.info(f"Model and components saved to {model_file}")
    except Exception as e:
        logger.error(f"Failed to save model to {model_file}: {e}")
        raise

API Functions

In [5]:
def get_destination_coords(destination):
    try:
        url = f"https://maps.googleapis.com/maps/api/geocode/json?address={destination}&key={GOOGLE_MAPS_API_KEY}"
        response = requests.get(url).json()
        if response.get('results'):
            location = response['results'][0]['geometry']['location']
            return location['lat'], location['lng']  # Corrected 'lon' to 'lng'
        logger.warning(f"No coordinates found for {destination}")
        return None, None
    except Exception as e:
        logger.error(f"Failed to fetch coordinates for {destination}: {e}")
        return None, None

In [6]:
def fetch_weather(lat, lon):
    try:
        url = f"http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={OPENWEATHER_API_KEY}"
        response = requests.get(url).json()
        if response.get('cod') == 200:
            temp = response['main']['temp'] - 273.15
            condition = response['weather'][0]['main']
            return {'temp': temp, 'condition': condition}
        logger.warning(f"Weather API failed for lat={lat}, lon={lon}: {response.get('message', 'Unknown error')}")
        return {'temp': 25, 'condition': 'Clear'}
    except Exception as e:
        logger.error(f"Weather fetch failed for lat={lat}, lon={lon}: {e}")
        return {'temp': 25, 'condition': 'Clear'}

In [7]:
def fetch_traffic(origin, destination_coords):
    """Deprecated: Use batched traffic fetching in engineer_features_dynamic instead."""
    try:
        url = f"https://maps.googleapis.com/maps/api/distancematrix/json?origins={origin}&destinations={destination_coords}&key={GOOGLE_MAPS_API_KEY}"
        response = requests.get(url).json()
        if response.get('rows') and response['rows'][0]['elements']:
            element = response['rows'][0]['elements'][0]
            if element['status'] == 'OK':
                return element['duration']['value'] / 60
        logger.warning(f"Traffic API failed for origin={origin}, dest={destination_coords}")
        return 30
    except Exception as e:
        logger.error(f"Traffic fetch failed for origin={origin}, dest={destination_coords}: {e}")
        return 30

In [8]:
def fetch_top_places(destination):
    try:
        url = f"https://maps.googleapis.com/maps/api/place/textsearch/json?query=top+tourism+places+in+{destination}&key={GOOGLE_MAPS_API_KEY}"
        response = requests.get(url).json()
        places = response.get('results', [])
        if not places:
            logger.warning(f"No tourism places found for {destination}")
        return places[:20]  # Limit to 20 to respect API limits
    except Exception as e:
        logger.error(f"Failed to fetch places for {destination}: {e}")
        return []

Dynamic Feature Engineering

In [9]:
def engineer_features_dynamic(places, traffic_origin, weather):
    try:
        # Batch fetch travel times for all places
        place_coords = [f"{p['geometry']['location']['lat']},{p['geometry']['location']['lng']}" for p in places]
        url = f"https://maps.googleapis.com/maps/api/distancematrix/json?origins={traffic_origin}&destinations={'|'.join(place_coords)}&key={GOOGLE_MAPS_API_KEY}"
        response = requests.get(url).json()
        travel_times = []
        if response.get('rows') and response['rows'][0]['elements']:
            elements = response['rows'][0]['elements']
            travel_times = [e['duration']['value'] / 60 if e['status'] == 'OK' else 30 for e in elements]
        else:
            logger.warning(f"Batch traffic API failed for origin={traffic_origin}")
            travel_times = [30] * len(places)
        
        # Extended weather mapping consistent with training
        weather_map = {
            'Clear': 10, 'Clouds': 7, 'Drizzle': 6, 'Rain': 5, 'Thunderstorm': 4,
            'Snow': 3, 'Mist': 4, 'Smoke': 3, 'Haze': 4, 'Dust': 3,
            'Fog': 4, 'Sand': 3, 'Ash': 3, 'Squall': 4, 'Tornado': 2
        }
        weather_quality = weather_map.get(weather['condition'], 5)
        
        # Build place data efficiently
        place_data = [
            {
                'Weather Quality': weather_quality,
                'Traffic Level': min(10, t / 10),
                'Temp_Comfort': max(0, min(10, 10 - abs(weather['temp'] - 24) / 2)),
                'Rating': p.get('rating'),
                'User Ratings Total': p.get('user_ratings_total'),
                'Travel Time': t
            }
            for p, t in zip(places, travel_times)
        ]
        
        df_places = pd.DataFrame(place_data)
        df_places['Rating'] = df_places['Rating'].fillna(df_places['Rating'].mean() if df_places['Rating'].notna().any() else 3.5)
        df_places['User Ratings Total'] = df_places['User Ratings Total'].fillna(
            df_places['User Ratings Total'].mean() if df_places['User Ratings Total'].notna().any() else 100
        )
        
        return df_places
    except Exception as e:
        logger.error(f"Dynamic feature engineering failed: {e}")
        raise

Dynamic Recommendations

In [10]:
def generate_smart_recommendations_dynamic(destination, user_location, model, features, scaler, num_recommendations=5):
    try:
        lat, lon = get_destination_coords(destination)
        traffic_origin = f"{lat},{lon}" if lat and lon else user_location
        weather = fetch_weather(lat, lon) if lat and lon else {'temp': 25, 'condition': 'Clear'}
        
        places = fetch_top_places(destination)
        if not places:
            return "No places found for this destination."
        
        df_places = engineer_features_dynamic(places, traffic_origin, weather)
        
        # Ensure feature alignment efficiently
        missing_features = [f for f in features if f not in df_places.columns]
        if missing_features:
            df_places = df_places.assign(**{f: 0 for f in missing_features})
        
        X_scaled = scaler.transform(df_places[features])
        predicted_scores = model.predict(X_scaled)
        
        # Add predictions to places efficiently
        for i, place in enumerate(places):
            place.update({
                'Predicted Score': predicted_scores[i],
                'Travel Time': df_places['Travel Time'].iloc[i],
                'Weather': weather['condition'],
                'Temperature': weather['temp']
            })
        
        top_places = sorted(places, key=lambda x: x['Predicted Score'], reverse=True)[:num_recommendations]
        
        return pd.DataFrame([
            {
                'Name': p['name'],
                'Rating': p.get('rating', 'N/A'),
                'Predicted Score': round(p['Predicted Score'], 2),
                'Address': p.get('formatted_address', 'N/A'),
                'Types': ', '.join(p.get('types', [])),
                'Weather': p['Weather'],
                'Temperature': round(p['Temperature'], 1),
                'Travel Time (min)': round(p['Travel Time'], 1)
            }
            for p in top_places
        ])
    except Exception as e:
        logger.error(f"Recommendation generation failed for {destination}: {e}")
        return "Error generating recommendations."

import logging

In [15]:
import logging
import pandas as pd  # ✅ Fix: Import pandas

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

def load_data(file_path="holidify.csv"):
    """Loads travel data from a CSV file."""
    try:
        df = pd.read_csv(file_path)  # ✅ Ensure 'pd' is defined
        logger.info("Data loaded successfully from %s", file_path)
        return df
    except Exception as e:
        logger.error(f"Failed to load data from {file_path}: {e}", exc_info=True)
        raise  # Re-raise exception to stop execution

def main():
    """Main function to run the travel recommendation process."""
    try:
        logger.info("Starting travel recommendation process")
        
        # Load and process data
        df = load_data()
        df_processed = engineer_features(df)
        
        # Define model features
        features = ['Weather Quality', 'Traffic Level', 'Temp_Comfort', 'Rating', 'User Ratings Total']
        
        # Train model
        model, features, scaler = train_recommendation_models(df_processed, features)
        save_enhanced_model(model, features, scaler)
        
        # Define user location and destination
        user_location = "48.8566,2.3522"  # Paris coordinates
        destination = "Paris"
        
        # Generate recommendations
        recommendations = generate_smart_recommendations_dynamic(destination, user_location, model, features, scaler)
        
        # Display results
        logger.info("Top Recommendations:\n%s", recommendations)
        print("Top Recommendations:")
        print(recommendations)

        logger.info("Process completed successfully")
    
    except Exception as e:
        logger.error(f"Main process failed: {e}", exc_info=True)

if __name__ == "__main__":
    main()


2025-03-23 12:43:41,107 - INFO - Starting travel recommendation process
2025-03-23 12:43:41,322 - INFO - Data loaded successfully from holidify.csv
2025-03-23 12:43:41,326 - ERROR - Feature engineering failed: 'Weather Condition'
2025-03-23 12:43:41,329 - ERROR - Main process failed: 'Weather Condition'
Traceback (most recent call last):
  File "c:\Users\pavan\anaconda3\Lib\site-packages\pandas\core\indexes\base.py", line 3805, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc
  File "index.pyx", line 196, in pandas._libs.index.IndexEngine.get_loc
  File "pandas\\_libs\\hashtable_class_helper.pxi", line 7081, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas\\_libs\\hashtable_class_helper.pxi", line 7089, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Weather Condition'

The above exception was the direct cause of the following exce