In [43]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
from dotenv import load_dotenv
import os
import requests

In [2]:
business_df = pd.read_csv('data/business_data_cleaned.csv')
review_df = pd.read_csv('data/review_data.csv')

In [3]:
business_df.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,business_rating,business_review_count,...,upscale_ambience,classy_ambience,casual_ambience,price_range_1,price_range_2,price_range_3,price_range_4,alcohol_beer_and_wine,alcohol_full_bar,alcohol_no_alcohol
0,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,...,0,0,0,1,0,0,0,0,0,1
1,CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,615 S Main St,Ashland City,TN,37015,36.269593,-87.058943,2.0,6,...,0,0,0,1,0,0,0,0,0,1
2,bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,2312 Dickerson Pike,Nashville,TN,37207,36.208102,-86.76817,1.5,10,...,0,0,0,1,0,0,0,0,0,1
3,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,North Jefferson Street,Tampa Bay,FL,33602,27.955269,-82.45632,4.0,10,...,0,0,0,0,1,0,0,0,0,1
4,il_Ro8jwPlHresjw9EGmBg,Denny's,8901 US 31 S,Indianapolis,IN,46227,39.637133,-86.127217,2.5,28,...,0,0,1,1,0,0,0,0,0,1


In [4]:
review_df.head()

Unnamed: 0,review_id,user_id,business_id,review_rating
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0
1,BiTunyQ73aT9WBnpR9DZGw,OyoGAe7OKpv6SyGZT5g77Q,7ATYjTIgM3jUlt4UM3IypQ,5.0
2,saUsX_uimxRlCVr67Z4Jig,8g_iMtfSiwikVnbP2etR0A,YjUWPpI6HXG530lwP-fb2A,3.0
3,AqPFMleE6RsU23_auESxiA,_7bHUi9Uuf5__HHc_Q8guQ,kxX2SOes4o-D3ZQBkiMRfA,5.0
4,Sx8TMOWLNuJBWer-0pcmoA,bcjbaE6dDog4jkNY91ncLQ,e4Vwtrqf-wpJfwesgvdgxQ,4.0


In [5]:
# Scale the `business_review_count` column
scaler = MinMaxScaler()
scaled_features = pd.DataFrame(
    scaler.fit_transform(business_df[['business_review_count']]),
    columns=['business_review_count']
)

# Columns to be dropped from the original dataframe
columns_to_be_dropped = [
    'name', 'address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'business_review_count'
]

# Drop unnecessary columns including the original `business_review_count`
business_df_cleaned = business_df.drop(columns=columns_to_be_dropped).reset_index(drop=True)

# Combine scaled features with the cleaned dataframe
scaled_business_df = pd.concat([
    business_df_cleaned, 
    scaled_features.reset_index(drop=True)
], axis=1)

# Verify the dataframe
scaled_business_df.head()

Unnamed: 0,business_id,business_rating,is_open,Acai Bowls,Accessories,Accountants,Active Life,Adult,Adult Education,Adult Entertainment,...,classy_ambience,casual_ambience,price_range_1,price_range_2,price_range_3,price_range_4,alcohol_beer_and_wine,alcohol_full_bar,alcohol_no_alcohol,business_review_count
0,MTSW4McQd7CbVtyjqoe9mw,4.0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0.009917
1,CF33F8-E6oudUQ46HnavjQ,2.0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0.000132
2,bBDDEgkFA1Otx9Lfe7BZUQ,1.5,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0.000661
3,eEOYSgkmpB90uNA7lDOMRA,4.0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0.000661
4,il_Ro8jwPlHresjw9EGmBg,2.5,1,0,0,0,0,0,0,0,...,0,1,1,0,0,0,0,0,1,0.003041


In [6]:
aggregated_review_df = review_df.groupby(['user_id', 'business_id'], as_index=False).agg({
    'review_rating': 'mean'
})
aggregated_review_df.shape

(6745760, 3)

In [7]:
#Split train and test data
train_df, test_df = train_test_split(aggregated_review_df,test_size=0.2,random_state=42)
print(f"Train size: {len(train_df)}, Test size: {len(test_df)}")

Train size: 5396608, Test size: 1349152


In [8]:
print(f"Unique user IDs in train_df:{len(train_df['user_id'].unique())}")
print(f"Unique business IDs in train_df:{len(train_df['business_id'].unique())}")

Unique user IDs in train_df:1742551
Unique business IDs in train_df:150339


In [9]:
# Define the number of users and businesses to sample
sampled_users = train_df['user_id'].sample(n=500000, random_state=42).unique() 
sampled_businesses = train_df['business_id'].sample(n=50000, random_state=42).unique() 

# Filter the train_df for the sampled users and businesses
train_df_small = train_df[
    train_df['user_id'].isin(sampled_users) & train_df['business_id'].isin(sampled_businesses)
].reset_index(drop=True)

print(f"Downsized Train DataFrame Shape: {train_df_small.shape}")


Downsized Train DataFrame Shape: (1685083, 3)


In [10]:
train_business_ids = set(train_df_small['business_id'])
scaled_business_ids = set(scaled_business_df['business_id'])
intersection = train_business_ids & scaled_business_ids
missing_in_scaled = train_business_ids - scaled_business_ids
print(f"Train Business IDs: {len(train_business_ids)}")
print(f"Scaled Business IDs: {len(scaled_business_ids)}")
print(f"Common Business IDs: {len(intersection)}")
print(f"Missing Business IDs: {len(missing_in_scaled)}")
if missing_in_scaled:
    print(f"Removing {len(missing_in_scaled)} missing business IDs from train_df.")
    train_df = train_df_small[train_df_small['business_id'].isin(intersection)]
else:
    print("No missing business IDs. Proceeding with the full train_df.")

Train Business IDs: 29493
Scaled Business IDs: 34987
Common Business IDs: 12405
Missing Business IDs: 17088
Removing 17088 missing business IDs from train_df.


In [11]:
train_df = train_df.copy()
user_id_map = {uid: idx for idx, uid in enumerate(train_df['user_id'].unique())}
business_id_map = {bid: idx for idx, bid in enumerate(train_df['business_id'].unique())}
train_df['user_idx'] = train_df['user_id'].map(user_id_map)
train_df['business_idx'] = train_df['business_id'].map(business_id_map)
if 'business_id' in scaled_business_df.columns:
    scaled_business_df = scaled_business_df.set_index('business_id')
business_features = scaled_business_df.loc[train_df['business_id']].values
print(len(business_features))


1143089


In [12]:
X = torch.tensor(train_df[['user_idx', 'business_idx']].values, dtype=torch.long)
X_features = torch.tensor(business_features, dtype=torch.float32)
y = torch.tensor(train_df['review_rating'].values, dtype=torch.float32)

In [13]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X, X_features, y = X.to(device), X_features.to(device), y.to(device)

# Split indices for train and validation sets
random_state = 42
torch.manual_seed(random_state)
indices = torch.randperm(len(X), device=device)

train_size = int(0.8 * len(X))
train_indices = indices[:train_size]
val_indices = indices[train_size:]

X_train, X_val = X[train_indices], X[val_indices]
X_features_train, X_features_val = X_features[train_indices], X_features[val_indices]
y_train, y_val = y[train_indices], y[val_indices]

# Print dataset shapes
print(f"X_train shape: {X_train.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"X_features_train shape: {X_features_train.shape}")
print(f"X_features_val shape: {X_features_val.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_val shape: {y_val.shape}")

X_train shape: torch.Size([914471, 2])
X_val shape: torch.Size([228618, 2])
X_features_train shape: torch.Size([914471, 715])
X_features_val shape: torch.Size([228618, 715])
y_train shape: torch.Size([914471])
y_val shape: torch.Size([228618])


In [14]:
class HybridRecommender(nn.Module):
    def __init__(self, num_users, num_businesses, feature_dim, embedding_dim=50):
        super(HybridRecommender, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.business_embedding = nn.Embedding(num_businesses, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim * 2 + feature_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, user_ids=None, business_ids=None, features=None):
        """
        Forward method for Hybrid Recommender.
        If user_ids and business_ids are None, only features are used (cold-start scenario).
        """
        if user_ids is not None and business_ids is not None:
            user_embeds = self.user_embedding(user_ids)
            business_embeds = self.business_embedding(business_ids)
        else:
            # Cold-start: Use zero embeddings for users and businesses
            user_embeds = torch.zeros(features.size(0), self.user_embedding.embedding_dim, device=features.device)
            business_embeds = torch.zeros(features.size(0), self.business_embedding.embedding_dim, device=features.device)

        x = torch.cat([user_embeds, business_embeds, features], dim=1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x).squeeze()


In [15]:
# Model initialization
num_users = len(user_id_map)
num_businesses = len(business_id_map)
feature_dim = X_features.shape[1]
model = HybridRecommender(num_users, num_businesses, feature_dim).to(device)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [28]:
# Training loop with learning rate scheduling
def train_model(model, criterion, optimizer, X_train, X_features_train, y_train, X_val, X_features_val, y_val, epochs=10, save_path="best_model.pth"):
    best_val_loss = float('inf')  # Initialize the best validation loss as infinity
    best_model_state = None  # To store the state of the best model
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)  # Learning rate scheduler

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train[:, 0], X_train[:, 1], X_features_train)
        loss = criterion(predictions, y_train)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_predictions = model(X_val[:, 0], X_val[:, 1], X_features_val)
            val_loss = criterion(val_predictions, y_val)
            val_rmse = sqrt(mean_squared_error(y_val.cpu(), val_predictions.cpu()))

        # Check if the current validation loss is the best
        if val_loss.item() < best_val_loss:
            best_val_loss = val_loss.item()
            best_model_state = model.state_dict()  # Save the model's state dict
            torch.save(best_model_state, save_path)  # Save to file
            print(f"Epoch {epoch+1}: New best validation loss: {best_val_loss:.4f}. Model saved.")

        # Update the scheduler with the validation loss
        scheduler.step(val_loss.item())

        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val RMSE: {val_rmse:.4f}")

# Train the model
torch.cuda.empty_cache()
train_model(model, criterion, optimizer, X_train, X_features_train, y_train, X_val, X_features_val, y_val, epochs=50, save_path="best_hybrid_model.pth")



Epoch 1: New best validation loss: 1.4555. Model saved.
Epoch 1/50 - Train Loss: 1.4329, Val Loss: 1.4555, Val RMSE: 1.2064
Epoch 2: New best validation loss: 1.4546. Model saved.
Epoch 2/50 - Train Loss: 1.4319, Val Loss: 1.4546, Val RMSE: 1.2061
Epoch 3: New best validation loss: 1.4538. Model saved.
Epoch 3/50 - Train Loss: 1.4310, Val Loss: 1.4538, Val RMSE: 1.2057
Epoch 4: New best validation loss: 1.4531. Model saved.
Epoch 4/50 - Train Loss: 1.4302, Val Loss: 1.4531, Val RMSE: 1.2054
Epoch 5: New best validation loss: 1.4524. Model saved.
Epoch 5/50 - Train Loss: 1.4295, Val Loss: 1.4524, Val RMSE: 1.2052
Epoch 6: New best validation loss: 1.4518. Model saved.
Epoch 6/50 - Train Loss: 1.4287, Val Loss: 1.4518, Val RMSE: 1.2049
Epoch 7: New best validation loss: 1.4512. Model saved.
Epoch 7/50 - Train Loss: 1.4281, Val Loss: 1.4512, Val RMSE: 1.2047
Epoch 8: New best validation loss: 1.4506. Model saved.
Epoch 8/50 - Train Loss: 1.4275, Val Loss: 1.4506, Val RMSE: 1.2044
Epoch 9:

In [30]:
def load_trained_model(model, save_path):
    model.load_state_dict(torch.load(save_path))
    model.eval()  # Set the model to evaluation mode
    print("Trained model loaded successfully.")

In [None]:
# Helper function for Precision@K
def precision_at_k(true_ratings, predicted_ratings, k):
    top_k_indices = np.argsort(predicted_ratings)[-k:]  # Get top K indices
    relevant = np.sum([1 for i in top_k_indices if true_ratings[i] >= 4])  # Count relevant items (e.g., rating >= 4)
    return relevant / k

In [31]:
# Helper function for Mean Average Precision (MAP)
def mean_average_precision(true_ratings, predicted_ratings):
    sorted_indices = np.argsort(predicted_ratings)[::-1]  # Indices sorted by predicted scores in descending order
    relevant_count = 0
    precision_sum = 0.0

    for i, idx in enumerate(sorted_indices):
        if true_ratings[idx] >= 4:  # Define "relevant" as ratings >= 4
            relevant_count += 1
            precision_sum += relevant_count / (i + 1)

    return precision_sum / relevant_count if relevant_count > 0 else 0.0

In [32]:
# Evaluate the model on the test set
def evaluate_model(model, test_df, scaled_business_df, user_id_map, business_id_map, k=5):
    model.eval()
    user_predictions = {}
    user_true_ratings = {}
    rmse_ratings = []

    with torch.no_grad():
        for _, row in test_df.iterrows():
            user_id = row['user_id']
            business_id = row['business_id']
            true_rating = row['review_rating']

            if user_id not in user_id_map or business_id not in business_id_map:
                continue

            user_idx = user_id_map[user_id]
            business_idx = business_id_map[business_id]
            business_features = torch.tensor(
                scaled_business_df.loc[business_id].values, dtype=torch.float32, device=device
            ).unsqueeze(0)

            prediction = model(
                torch.tensor([user_idx], dtype=torch.long, device=device),
                torch.tensor([business_idx], dtype=torch.long, device=device),
                business_features
            )

            # Store predictions and true ratings for each user
            if user_id not in user_predictions:
                user_predictions[user_id] = []
                user_true_ratings[user_id] = []

            user_predictions[user_id].append(prediction.item())
            user_true_ratings[user_id].append(true_rating)

            rmse_ratings.append((true_rating, prediction.item()))

    # Calculate RMSE
    true_ratings, predicted_ratings = zip(*rmse_ratings)
    rmse = sqrt(mean_squared_error(true_ratings, predicted_ratings))
    print(f"Test RMSE: {rmse:.4f}")

    # Calculate MAP and Precision@K for all users
    all_precisions = []
    all_maps = []
    for user_id in user_predictions:
        preds = user_predictions[user_id]
        trues = user_true_ratings[user_id]

        # Calculate Precision@K
        if len(preds) >= k:
            p_at_k = precision_at_k(trues, preds, k)
            all_precisions.append(p_at_k)

        # Calculate MAP
        map_score = mean_average_precision(trues, preds)
        all_maps.append(map_score)

    mean_p_at_k = np.mean(all_precisions) if all_precisions else 0.0
    mean_map = np.mean(all_maps) if all_maps else 0.0

    print(f"Precision@{k}: {mean_p_at_k:.4f}")
    print(f"Mean Average Precision (MAP): {mean_map:.4f}")

load_trained_model(model,save_path='best_hybrid_model.pth')
evaluate_model(model, test_df, scaled_business_df, user_id_map, business_id_map, k=5)

  model.load_state_dict(torch.load(save_path))


Trained model loaded successfully.
Test RMSE: 1.1619
Precision@5: 0.7467
Mean Average Precision (MAP): 0.7714


In [18]:
train_df.head()

Unnamed: 0,user_id,business_id,review_rating,user_idx,business_idx
1,cJCiN7dtlGLW2-zApamFQA,6N44KZ0qDcY1n1Rz0Nw3bg,3.0,0,0
2,viWkK6--5ChiN3hlZ8XqTg,vhDWGF-8BfsxvS7Zo5Wv2w,4.0,1,1
3,DXY-aDr5b6pdY55AGee2Yg,w1rUV5W_TJ0M0MRwwrseWQ,5.0,2,2
5,cOeR53mlpxkKQXyD_ucRiQ,gzln_L7ch8z-Ob6autmumw,4.0,3,3
7,btzyD8sU1n7dlgHRBECgzw,8fLI8HwccIfQWi0UyH-1Fw,3.5,4,4


In [33]:
def recommend_for_group_with_model(group_user_ids, user_id_map, business_id_map, model, scaled_business_df, business_df, top_n=5, aggregation_method='average'):
    """
    Generate group recommendations using the trained hybrid model.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Ensure all group users are in the user_id_map
    missing_users = [user for user in group_user_ids if user not in user_id_map]
    if missing_users:
        raise ValueError(f"Missing embeddings for users: {missing_users}")

    # Retrieve user embeddings
    user_indices = [user_id_map[user] for user in group_user_ids]
    user_embeddings = model.user_embedding(torch.tensor(user_indices, dtype=torch.long, device=device))

    # Aggregate user embeddings to create a group embedding
    if aggregation_method == 'average':
        group_embedding = torch.mean(user_embeddings, dim=0, keepdim=True)
    elif aggregation_method == 'min':
        group_embedding, _ = torch.min(user_embeddings, dim=0, keepdim=True)
    elif aggregation_method == 'max':
        group_embedding, _ = torch.max(user_embeddings, dim=0, keepdim=True)
    else:
        raise ValueError("Unsupported aggregation method. Use 'average', 'min', or 'max'.")

    # Align business features with business_id_map
    ordered_business_ids = list(business_id_map.keys())
    scaled_business_df_aligned = scaled_business_df.loc[ordered_business_ids]
    business_features = torch.tensor(scaled_business_df_aligned.values, dtype=torch.float32, device=device)

    # Retrieve business embeddings
    business_indices = torch.arange(len(business_id_map), device=device)
    business_embeddings = model.business_embedding(business_indices)

    # Combine business embeddings and features
    feature_dim = business_features.shape[1]
    business_combined = torch.cat([business_embeddings, business_features[:, :feature_dim]], dim=1)

    # Ensure group_embedding matches combined feature dimensions
    group_embedding = torch.cat([group_embedding, torch.zeros(1, business_combined.shape[1] - group_embedding.shape[1], device=device)], dim=1)

    # Compute similarity scores between group embedding and businesses
    scores = F.cosine_similarity(group_embedding, business_combined)

    # Get top N businesses
    top_business_indices = scores.argsort(descending=True)[:top_n].cpu().numpy()
    recommended_business_ids = [ordered_business_ids[i] for i in top_business_indices]

    # Fetch business details
    recommendations = business_df[business_df["business_id"].isin(recommended_business_ids)].copy()
    recommendations['predicted_score'] = scores[top_business_indices].cpu().detach().numpy()

    return recommendations[["business_id", "name", "latitude", "longitude", "business_rating", "predicted_score"]]


In [34]:
group_user_ids = ["cJCiN7dtlGLW2-zApamFQA", "viWkK6--5ChiN3hlZ8XqTg", "ET8n-r7glWYqZhuR6GcdNw"]
group_recommendations = recommend_for_group_with_model(
    group_user_ids=group_user_ids,
    user_id_map=user_id_map,
    business_id_map=business_id_map,
    model=model,
    scaled_business_df=scaled_business_df,
    business_df=business_df,
    top_n=5,
    aggregation_method='average'
)
group_recommendations


Unnamed: 0,business_id,name,latitude,longitude,business_rating,predicted_score
3390,wifZgJ9IUjNxsmebpCRiRw,Red Lobster,36.0474,-86.65692,3.0,0.417023
7777,FygWE7b7ZN7JKKxbqKtMZw,Lemongrass Thai,36.129159,-86.855532,4.0,0.406376
10215,cAbdvzqtFLaAAMFIyPf2AA,Holdren's Steaks & Seafood,34.417114,-119.695856,4.0,0.39477
11786,KiARl7ZzvTWYAqiqC3BeTw,Chapala Mexican Restaurant,43.593324,-116.213963,3.5,0.379131
23012,ZBzqid0R71sjjfNFSMaLDA,Route 130 Diner,40.01599,-74.95482,4.0,0.377889


In [35]:
def recommend_for_cold_start_group(
    model, group_preferences, scaled_business_df, business_df, top_n=5, aggregation_method='average', group_weight=0.5
):
    """
    Recommends restaurants for a group based on aggregated preferences and scaled business features.
    
    Parameters:
    - model: Trained HybridRecommender model.
    - group_preferences: List of dictionaries containing feature preferences for each user in the group.
    - scaled_business_df: DataFrame containing scaled business features.
    - business_df: Original DataFrame with business details.
    - top_n: Number of recommendations to return.
    - aggregation_method: Method to aggregate group preferences ('average', 'min', 'max').
    - group_weight: Relative weight for the group profile's contribution.
    
    Returns:
    - DataFrame with recommended businesses including name, latitude, longitude, and rating.
    """
    # Aggregate group preferences into a single group profile
    aggregated_preferences = pd.DataFrame(group_preferences).reindex(columns=scaled_business_df.columns, fill_value=0)
    if aggregation_method == 'average':
        group_profile = aggregated_preferences.mean().values
    elif aggregation_method == 'min':
        group_profile = aggregated_preferences.min().values
    elif aggregation_method == 'max':
        group_profile = aggregated_preferences.max().values
    else:
        raise ValueError("Invalid aggregation method. Choose from 'average', 'min', or 'max'.")

    # Normalize the group profile
    group_profile = torch.tensor(group_profile, dtype=torch.float32, device=model.fc1.weight.device)
    group_profile /= (group_profile.norm() + 1e-8)

    # Prepare business features
    business_features = torch.tensor(
        scaled_business_df.values, dtype=torch.float32, device=model.fc1.weight.device
    )
    business_features /= (business_features.norm(dim=1, keepdim=True) + 1e-8)

    # Combine group profile and business features with weighted average
    combined_features = group_weight * group_profile.unsqueeze(0) + (1 - group_weight) * business_features

    # Remove extra batch dimension
    combined_features = combined_features.squeeze(0)

    # Use the model to predict scores
    with torch.no_grad():
        # Pass only combined features, no user_ids or business_ids needed
        predictions = model(user_ids=None, business_ids=None, features=combined_features)

    # Get top N recommendations
    top_indices = predictions.argsort(descending=True)[:top_n]
    recommended_business_ids = scaled_business_df.iloc[top_indices.cpu().numpy()].index

    # Retrieve business details
    recommendations = business_df[business_df["business_id"].isin(recommended_business_ids)]
    return recommendations[["name", "latitude", "longitude", "business_rating"]]


In [44]:
# Example preferences for a group
group_preferences = [
    {"business_rating": 5.0, "price_range_3": 1, "casual_ambience": 1},
    {"business_rating": 3.0, "price_range_2": 1, "classy_ambience": 1},
    {"business_rating": 4.0, "price_range_1": 1, "alcohol_full_bar": 1}
]


group_recommendations = recommend_for_cold_start_group(
    model=model,
    group_preferences=group_preferences,
    scaled_business_df=scaled_business_df,
    business_df=business_df,
    top_n=5,
    aggregation_method='average',
    group_weight=10 #Adjust this to weight the gorup preferences
)

group_recommendations



Unnamed: 0,name,latitude,longitude,business_rating
5709,Philadelphia Taxi Cab,40.001564,-75.079558,1.5
6426,Walmart Supercenter,39.858291,-85.975949,2.0
6918,St Clair Bowl,38.584796,-89.962776,3.0
21193,Walmart Supercenter,28.283328,-82.718215,2.0
29770,Lieser Skaff Alexander,27.947962,-82.4827,4.5


In [58]:
def get_restaurant_details(restaurant_name,lat,lon,api_key):
    """
    Get restaurant details using Google Places API
    Parameters:
    - restaurant_name: name of the restaurant.
    - lat: latitude
    - lon: longitude
    - api_key: Google Places API key

    Returns:
    Dictionary with restaurant details like 'name','address', 'is_open'
    """
    base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    params = {
        "key" : api_key,
        "location" : f"{lat},{lon}",
        "radius" : 100,
        "keyword" : restaurant_name,
    }

    response = requests.get(base_url,params=params)
    data = response.json()

    if data.get("status") == "OK" and data.get("results"):
        restaurant = data["results"][0]

        details = {
            "name" : restaurant.get("name"),
            "address" : restaurant.get("vicinity"),
            "is_open" : restaurant.get("opening_hours",{}).get("open_now","Unknown"),
            "place_id" : restaurant.get("place_id")
        }
        return details
    else:
        return {"error" : "Failed to fetch API details"}


In [59]:
#Function to load api key
def configure():
    load_dotenv()

In [60]:
def get_open_status(row,api_key):
    """
    Function to get the current open status of the restaurant.
    
    Parameters:
    - row: dataframe row
    - api_key: Google Places API key
    """
    try:
        details = get_restaurant_details(row['name'],row['latitude'],row['longitude'],api_key=api_key)
        return details.get("is_open","Unknown")
    except Exception as e:
        print(f"Error process row:{row}: error:{e}")
        return "Error"

In [61]:
api_key = os.getenv('api_key')
group_recommendations['open_status'] = group_recommendations.apply(get_open_status,axis=1,api_key=api_key)

group_recommendations

Unnamed: 0,name,latitude,longitude,business_rating,open_status
5709,Philadelphia Taxi Cab,40.001564,-75.079558,1.5,Unknown
6426,Walmart Supercenter,39.858291,-85.975949,2.0,False
6918,St Clair Bowl,38.584796,-89.962776,3.0,True
21193,Walmart Supercenter,28.283328,-82.718215,2.0,False
29770,Lieser Skaff Alexander,27.947962,-82.4827,4.5,False
