In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.neighbors import NearestNeighbors
from sklearn.impute import SimpleImputer
import pickle
import warnings
warnings.filterwarnings('ignore')

processed_data = "../processed_data"

restaurant_data = pd.read_csv(f'{processed_data}/processed_restaurant_data.csv')
content_features = pd.read_csv(f'{processed_data}/content_features.csv')
text_features = pd.read_csv(f'{processed_data}/text_features.csv')
hybrid_features = pd.read_csv(f'{processed_data}/hybrid_features.csv')

print(f"Restaurant data shape: {restaurant_data.shape}")
print(f"Content features shape: {content_features.shape}")
print(f"Text features shape: {text_features.shape}")
print(f"Hybrid features shape: {hybrid_features.shape}")

# Validate that all datasets have the same number of rows
shapes = [restaurant_data.shape[0], content_features.shape[0], 
            text_features.shape[0], hybrid_features.shape[0]]

if len(set(shapes)) != 1:
    raise ValueError(f"Shape mismatch! Shapes: {shapes}")

# Check for NaN values
print(f"\n NaN VALIDATION ")
print(f"NaNs in restaurant_data: {restaurant_data.isna().sum().sum()}")
print(f"NaNs in content_features: {content_features.isna().sum().sum()}")
print(f"NaNs in text_features: {text_features.isna().sum().sum()}")
print(f"NaNs in hybrid_features: {hybrid_features.isna().sum().sum()}")

Restaurant data shape: (15515, 9)
Content features shape: (15515, 241)
Text features shape: (15515, 80)
Hybrid features shape: (15515, 58)

 NaN VALIDATION 
NaNs in restaurant_data: 0
NaNs in content_features: 0
NaNs in text_features: 0
NaNs in hybrid_features: 0


In [11]:
import numpy as np
import pandas as pd

print("\nINFINITE VALUES CHECK")

for name, arr in [
    ("content_features", content_features),
    ("text_features", text_features),
    ("hybrid_features", hybrid_features),
]:
    # Ensure it's numeric (coerce errors -> NaN)
    if isinstance(arr, pd.DataFrame) or isinstance(arr, pd.Series):
        arr = pd.to_numeric(arr.stack(), errors="coerce").unstack()

    numeric_arr = np.array(arr, dtype=float)
    print(f"Inf in {name}: {np.isinf(numeric_arr).sum()}")



INFINITE VALUES CHECK


Inf in content_features: 0
Inf in text_features: 0
Inf in hybrid_features: 0


In [12]:
# Initialize all recommenders
import sys, os
sys.path.append(os.path.abspath(".."))  # add parent directory

from src.text_based_recommender import TextBasedRecommender
from src.content_base_recommender import ContentBasedRecommender
from src.hybrid_recommender import HybridRecommender

print("INITIALIZING RECOMMENDERS")
recommenders = {}
    
# Initialize content-based recommender
try:
    content_recommender = ContentBasedRecommender(restaurant_data, content_features)
    recommenders['content'] = content_recommender
    print("# Content-based recommender initialized")
except Exception as e:
    print(f"- Content-based recommender failed: {e}")
    recommenders['content'] = None

# Initialize text-based recommender  
try:
    text_recommender = TextBasedRecommender(restaurant_data, text_features)
    recommenders['text'] = text_recommender
    print("# Text-based recommender initialized")
except Exception as e:
    print(f"- Text-based recommender failed: {e}")
    recommenders['text'] = None

# Initialize hybrid recommender
try:
    hybrid_recommender = HybridRecommender(restaurant_data, content_features, text_features)
    recommenders['hybrid'] = hybrid_recommender
    print("# Hybrid recommender initialized")
except Exception as e: 
    print(f"- Hybrid recommender failed: {e}")
    recommenders['hybrid'] = None

# Check if at least one recommender works
working_recommenders = [k for k, v in recommenders.items() if v is not None]
print(f"\nWorking recommenders: {working_recommenders}")

if not working_recommenders:
    raise ValueError("No recommenders could be initialized!")

INITIALIZING RECOMMENDERS
Computing cosine similarity matrix...
Similarity computation complete!
# Content-based recommender initialized
Computing cosine similarity matrix...
Similarity computation complete!
# Text-based recommender initialized
Computing cosine similarity matrix...
Similarity computation complete!
Computing cosine similarity matrix...
Similarity computation complete!
# Hybrid recommender initialized

Working recommenders: ['content', 'text', 'hybrid']


### Testing and Validation

In [13]:
print(" TESTING RECOMMENDERS ")
    
# Get a sample restaurant for testing
sample_restaurant = restaurant_data['name'].iloc[0]
print(f"Testing with restaurant: '{sample_restaurant}'")

# Test each recommender
for rec_type, recommender in recommenders.items():
    if recommender is None:
        print(f"\n{rec_type.upper()}: SKIPPED (not initialized)")
        continue
        
    print(f"\n{rec_type.upper()} RECOMMENDATIONS:")
    try:
        if rec_type == 'hybrid':
            
            recs = recommender.get_hybrid_recommendations(sample_restaurant, 5)
            print(recs)
            if isinstance(recs, pd.DataFrame) and not recs.empty:
                print(recs[['name', 'cuisines', 'location', 'hybrid_score']].to_string(index=False))
            else:
                print(f"Result: {recs}")
        else:
            recs = recommender.get_recommendations(sample_restaurant, 5)
            if isinstance(recs, pd.DataFrame) and not recs.empty:
                print(recs[['name', 'cuisines', 'location', 'similarity_score']].to_string(index=False))
            else:
                print(f"Result: {recs}")
                
    except Exception as e:
        print(f"Error testing {rec_type}: {e}")

 TESTING RECOMMENDERS 
Testing with restaurant: 'Kalingas'

CONTENT RECOMMENDATIONS:
                  name           cuisines location  similarity_score
              Kalingas   Oriya, Fast Food      BTM          1.820000
              Kalingas   Oriya, Fast Food      BTM          1.819969
              Kalingas   Oriya, Fast Food      BTM          1.819932
Aramane Donne Biriyani Biryani, Fast Food      BTM          1.411885
Aramane Donne Biriyani Biryani, Fast Food      BTM          1.411885

TEXT RECOMMENDATIONS:
      name                       cuisines location  similarity_score
  Kalingas               Oriya, Fast Food      BTM           1.82000
  Kalingas               Oriya, Fast Food      BTM           1.82000
  Kalingas               Oriya, Fast Food      BTM           1.82000
Desi Vdesi Fast Food, North Indian, Rolls      BTM           1.31921
Desi Vdesi Fast Food, North Indian, Rolls      BTM           1.31921

HYBRID RECOMMENDATIONS:
                          name         

In [6]:
print("\n INTERACTIVE RECOMMENDATION SYSTEM ")
print("Available recommenders:", [k for k, v in recommenders.items() if v is not None])

# Get user input
restaurant_name = input("\nEnter restaurant name: ").strip()
if not restaurant_name:
    print("Please enter a restaurant name.")
else:

    method = input("Choose method (content/text/hybrid): ").strip().lower()
    if method not in recommenders or recommenders[method] is None:
        print(f"Method '{method}' not available. Using hybrid.")
        method = 'hybrid'

    try:
        n_recs = int(input("Number of recommendations (default 10): ") or "10")
    except:
        n_recs = 10

    print(f"\nGetting {method} recommendations for '{restaurant_name}'...")

    # Get recommendations
    try:
        recommender = recommenders[method]
        if method == 'hybrid':
            results = recommender.get_hybrid_recommendations(restaurant_name, n_recs)
        else:
            results = recommender.get_recommendations(restaurant_name, n_recs)
        
        if isinstance(results, str):
            print(f"Error: {results}")
        elif isinstance(results, pd.DataFrame) and not results.empty:
            print(f"\nRecommendations:")
            print(results.to_string(index=False))
        else:
            print("No recommendations found.")
            
    except Exception as e:
        print(f"Error: {e}")


 INTERACTIVE RECOMMENDATION SYSTEM 
Available recommenders: ['content', 'text', 'hybrid']

Getting hybrid recommendations for 'kfc'...

Recommendations:
             name                                 cuisines                   location  rating  cost_for_two     rest_type online_order book_table  hybrid_score
       McDonald's                        Burger, Fast Food                        HSR     3.8         500.0   Quick Bites          Yes         No      6.222232
              KFC                        Burger, Fast Food      Koramangala 6th Block     3.8         450.0   Quick Bites          Yes         No      5.563127
 Kamat Bugle Rock                  South Indian, Fast Food               Basavanagudi     3.9         300.0   Quick Bites          Yes         No      2.428624
        Chaatimes                   Street Food, Fast Food               Basavanagudi     3.8         200.0   Quick Bites          Yes         No      2.373598
Five Star Chicken                        Fast 

In [15]:
print("=== SAVING MODELS ===")
import os

# Create output directory
os.makedirs('../trained_model', exist_ok=True)
saved_models = []

for rec_type, recommender in recommenders.items():
    if recommender is not None:
        # print(recommender)
        # if rec_type =="hybrid":
        try:
            filename = f'../trained_model/{rec_type}_recommender.pkl'
            with open(filename, 'wb') as f:
                pickle.dump(recommender, f)
            saved_models.append(filename)
            print(f"✓ Saved {filename}")
        except Exception as e:
            print(f"✗ Failed to save {rec_type}: {e}")

print(f"\nSuccessfully saved {len(saved_models)} models: {saved_models}")


=== SAVING MODELS ===
✓ Saved ../trained_model/content_recommender.pkl
✓ Saved ../trained_model/text_recommender.pkl
✓ Saved ../trained_model/hybrid_recommender.pkl

Successfully saved 3 models: ['../trained_model/content_recommender.pkl', '../trained_model/text_recommender.pkl', '../trained_model/hybrid_recommender.pkl']


In [8]:
def api_get_recommendations(restaurant_name, method='hybrid', n_recommendations=10):
    """API-friendly recommendation function"""
    
    try:
        if method not in recommenders or recommenders[method] is None:
            available_methods = [k for k, v in recommenders.items() if v is not None]
            return {
                'status': 'error',
                'message': f'Method {method} not available. Available methods: {available_methods}'
            }
        recommender = recommenders[method]
        
        if method == 'hybrid':
            recs = recommender.get_hybrid_recommendations(restaurant_name, n_recommendations)
            score_col = 'hybrid_score'
        else:
            recs = recommender.get_recommendations(restaurant_name, n_recommendations)
            score_col = 'similarity_score'
        
        if isinstance(recs, str):
            return {'status': 'error', 'message': recs}
        
        if isinstance(recs, pd.DataFrame) and not recs.empty:
            recommendations = []
            for _, row in recs.iterrows():
                rec = {
                    'name': row['name'],
                    'cuisines': row['cuisines'],
                    'location': row['location'],
                    'rating': float(row['rating']) if pd.notna(row['rating']) else None,
                    'cost_for_two': float(row['cost_for_two']) if pd.notna(row['cost_for_two']) else None,
                    'restaurant_type': row.get('rest_type', 'Unknown'),
                    'online_order': row.get('online_order', 'Unknown'),
                    'table_booking': row.get('book_table', 'Unknown'),
                    'score': float(row[score_col])
                }
                recommendations.append(rec)
            
            return {
                'status': 'success',
                'input_restaurant': restaurant_name,
                'method_used': method,
                'count': len(recommendations),
                'recommendations': recommendations
            }
        else:
            return {
                'status': 'success',
                'input_restaurant': restaurant_name,
                'method_used': method,
                'count': 0,
                'recommendations': []
            }
            
    except Exception as e:
        return {
            'status': 'error',
            'message': f'Unexpected error: {str(e)}'
        }
api_get_recommendations("KFC")

{'status': 'success',
 'input_restaurant': 'KFC',
 'method_used': 'hybrid',
 'count': 10,
 'recommendations': [{'name': 'KFC',
   'cuisines': 'Burger, Fast Food',
   'location': 'Koramangala 6th Block',
   'rating': 3.8,
   'cost_for_two': 450.0,
   'restaurant_type': 'Quick Bites',
   'online_order': 'Yes',
   'table_booking': 'No',
   'score': 4.57238682441994},
  {'name': 'Kamat Bugle Rock',
   'cuisines': 'South Indian, Fast Food',
   'location': 'Basavanagudi',
   'rating': 3.9,
   'cost_for_two': 300.0,
   'restaurant_type': 'Quick Bites',
   'online_order': 'Yes',
   'table_booking': 'No',
   'score': 3.7886534535299554},
  {'name': 'Chaatimes',
   'cuisines': 'Street Food, Fast Food',
   'location': 'Basavanagudi',
   'rating': 3.8,
   'cost_for_two': 200.0,
   'restaurant_type': 'Quick Bites',
   'online_order': 'Yes',
   'table_booking': 'No',
   'score': 3.7028122895494087},
  {'name': 'Five Star Chicken',
   'cuisines': 'Fast Food, Burger',
   'location': 'Bellandur',
   'r

In [9]:
print("\n" + "="*60)
print("MODEL TRAINING COMPLETE!")
print("="*60)

print(f" Restaurant data loaded: {len(restaurant_data)} restaurants")
print(f" Feature matrices prepared with no NaN values")
print(f" Recommenders initialized: {list(k for k, v in recommenders.items() if v is not None)}")


print(f"\nDATASET SUMMARY:")
print(f"   - Total restaurants: {len(restaurant_data)}")
print(f"   - Content features: {content_features.shape[1]} dimensions")  
print(f"   - Text features: {text_features.shape[1]} dimensions")
print(f"   - Hybrid features: {hybrid_features.shape[1]} dimensions")

print(f"\n READY TO USE..")

print(f"\n All systems ready for evaluation phase!")


MODEL TRAINING COMPLETE!
 Restaurant data loaded: 15515 restaurants
 Feature matrices prepared with no NaN values
 Recommenders initialized: ['content', 'text', 'hybrid']

DATASET SUMMARY:
   - Total restaurants: 15515
   - Content features: 241 dimensions
   - Text features: 80 dimensions
   - Hybrid features: 58 dimensions

 READY TO USE..

 All systems ready for evaluation phase!
