# Hybrid Method

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split as surprise_split

## Load Data

In [2]:
# Step 1: Load Datasets
restaurant_data = pd.read_csv("BangaloreZomatoData_with_rest_id.csv")
user_data = pd.read_csv("UserOrdersData.csv")

In [3]:
# Inspect Datasets
print("Restaurant Data Columns:", restaurant_data.columns)
print("User Data Columns:", user_data.columns)


Restaurant Data Columns: Index(['Name', 'URL', 'Cuisines', 'Area', 'Timing', 'Full_Address',
       'PhoneNumber', 'IsHomeDelivery', 'isTakeaway', 'isIndoorSeating',
       'isVegOnly', 'Dinner Ratings', 'Dinner Reviews', 'Delivery Ratings',
       'Delivery Reviews', 'KnownFor', 'PopularDishes', 'PeopleKnownFor',
       'AverageCost', 'rest_id'],
      dtype='object')
User Data Columns: Index(['user_id', 'rest_id', 'cost', 'rating', 'location'], dtype='object')


In [4]:
restaurant_data.head(2)

Unnamed: 0,Name,URL,Cuisines,Area,Timing,Full_Address,PhoneNumber,IsHomeDelivery,isTakeaway,isIndoorSeating,isVegOnly,Dinner Ratings,Dinner Reviews,Delivery Ratings,Delivery Reviews,KnownFor,PopularDishes,PeopleKnownFor,AverageCost,rest_id
0,Sri Udupi Park,https://www.zomato.com/bangalore/sri-udupi-par...,"South Indian, North Indian, Chinese, Street Fo...","Indiranagar, Bangalore",7am – 11pm (Today),"273, Monalisa, 6th Main, 100 Feet Road, Indira...",919945977774,1,1,1,1,4.0,462,4.1,16000,,"Filtered Coffee, Sambhar, Pav Bhaji, Gobi Manc...","Economical, Prompt Service, Hygiene, Quality F...",450,R0001
1,Meghana Foods,https://www.zomato.com/bangalore/meghana-foods...,"Biryani, Andhra, North Indian, Seafood","Indiranagar, Bangalore",Opens at 6:30pm,"544, First Floor, CMH Road, Near Indiranagar M...",918041135050,1,1,1,0,4.3,1654,4.3,28600,Spicy Chicken Biryani,"Authentic Hyderabadi Biryani, Paneer Biryani, ...","Boneless Chicken Biryani, Ample Seating Area, ...",700,R0002


In [5]:
user_data.head(2)

Unnamed: 0,user_id,rest_id,cost,rating,location
0,U0350,R6892,1000,4,"Electronic City, Bangalore"
1,U0825,R7304,800,2,"JP Nagar, Bangalore"


In [6]:
# Preprocess Restaurant Data
restaurant_data['Cuisines'] = restaurant_data['Cuisines'].fillna('Unknown')
restaurant_data['KnownFor']= restaurant_data['KnownFor'].fillna('Unknown')

## Content-Based Filtering

In [7]:
# Step 2: Content-Based Filtering
# Combine relevant features
restaurant_data['CombinedFeatures'] = restaurant_data['Cuisines'] + " " + restaurant_data['KnownFor']

In [8]:
# Vectorize features
vectorizer = TfidfVectorizer(stop_words='english')
feature_matrix = vectorizer.fit_transform(restaurant_data['CombinedFeatures'])

In [9]:
# Compute similarity matrix
similarity_matrix = cosine_similarity(feature_matrix)

In [10]:
# Function to shortlist restaurants
def get_similar_restaurants(rest_id, top_n=10):
    idx = restaurant_data[restaurant_data['rest_id'] == rest_id].index[0]
    similar_indices = similarity_matrix[idx].argsort()[::-1][1:top_n + 1]
    return restaurant_data.iloc[similar_indices]['rest_id'].tolist()

## Collaborative Filtering

In [11]:
# Step 3: Collaborative Filtering
# Prepare user-item interaction data for Surprise
reader = Reader(rating_scale=(1, 5))  # Adjust rating scale as needed
interaction_data = Dataset.load_from_df(user_data[['user_id', 'rest_id', 'rating']], reader)
trainset, testset = surprise_split(interaction_data, test_size=0.2, random_state=42)

In [12]:
# Train SVD model
svd_model = SVD()
svd_model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x155cc690ce0>

In [13]:
# Function to rank restaurants for a user
def rank_restaurants(user_id, shortlisted_restaurants):
    predictions = [svd_model.predict(user_id, rest_id) for rest_id in shortlisted_restaurants]
    ranked = sorted(predictions, key=lambda x: x.est, reverse=True)
    return [pred.iid for pred in ranked]

## Recommendation

In [14]:
def recommend_restaurants(user_id, rest_id, top_n=5):
    # Step 4.1: Content-Based Filtering
    shortlisted = get_similar_restaurants(rest_id, top_n)
    # Step 4.2: Collaborative Filtering
    ranked = rank_restaurants(user_id, shortlisted)

    # Get additional restaurant information
    recommended_restaurants = restaurant_data[restaurant_data['rest_id'].isin(ranked)]



    #Create a dictionary to store the results
    results = []
    for rest_id_rec in ranked:
        restaurant_info = restaurant_data[restaurant_data['rest_id']==rest_id_rec]
        if not restaurant_info.empty:
            restaurant_name = restaurant_info['Name'].iloc[0]
            price = restaurant_info['AverageCost'].iloc[0]
            cuisines = restaurant_info['Cuisines'].iloc[0]
            results.append({'rest_id': rest_id_rec, 'RestaurantName': restaurant_name, 'price': price, 'cuisines': cuisines })
        else:
            results.append({'rest_id': rest_id_rec, 'RestaurantName': 'Not found', 'price': 'Not found', 'cuisines': 'Not found'})

    # Convert the results list to a DataFrame
    df_results = pd.DataFrame(results)

    #Get user's previous restaurant
    user_prev_rest = user_data[user_data['user_id']==user_id]['rest_id'].tolist()
    #Print the previous restaurants of the user
    print("Previous Restaurants of user", user_id,":",user_prev_rest)

    return df_results


# Ranked name in collaborative filtering is 'rest_id' as used in the rank_restaurants function.

## System Testing

In [15]:
# Example usage (replace with your actual user_id and rest_id)
user_id = 'U0851'

In [16]:
# Display past orders and cuisines
past_orders = user_data[user_data['user_id'] == user_id]
if not past_orders.empty:
    print(f"Past orders for user {user_id}:")
    for index, row in past_orders.iterrows():
        restaurant_info = restaurant_data[restaurant_data['rest_id'] == row['rest_id']]
        if not restaurant_info.empty:
          cuisine = restaurant_info['Cuisines'].iloc[0]
          print(f"- Restaurant ID: {row['rest_id']}, Cuisine: {cuisine}")
        else:
          print(f"- Restaurant ID: {row['rest_id']}, Cuisine information not found.")
else:
    print(f"No past orders found for user {user_id}")

Past orders for user U0851:
- Restaurant ID: R2908, Cuisine: South Indian, North Indian, Chinese, Street Food, Shake
- Restaurant ID: R4015, Cuisine: Beverages, Salad, Healthy Food, Shake
- Restaurant ID: R2093, Cuisine: Fast Food


In [17]:
rest_id = 'R4015'

In [18]:
recommendations_df = recommend_restaurants(user_id, rest_id)

Previous Restaurants of user U0851 : ['R2908', 'R4015', 'R2093']


In [19]:
recommendations_df

Unnamed: 0,rest_id,RestaurantName,price,cuisines
0,R1067,Nutrivores,150,"Healthy Food, Salad"
1,R2600,O4H - Order For Health,250,"Healthy Food, Salad, Beverages"
2,R0255,The Salad Studio,400,"Salad, Healthy Food"
3,R2425,Garden Fresh,200,"Juices, Healthy Food, Salad, Beverages, Shake"
4,R3522,The Salad Studio,250,"Salad, Healthy Food"


In [20]:
print(recommendations_df.values)

[['R1067' 'Nutrivores' 150 'Healthy Food, Salad']
 ['R2600' 'O4H - Order For Health' 250 'Healthy Food, Salad, Beverages']
 ['R0255' 'The Salad Studio' 400 'Salad, Healthy Food']
 ['R2425' 'Garden Fresh' 200
  'Juices, Healthy Food, Salad, Beverages, Shake']
 ['R3522' 'The Salad Studio' 250 'Salad, Healthy Food']]
