# **Step-1: Importing LIbraries**

In [29]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

# **Step-2: Load dataset**

In [30]:
df = pd.read_csv("/content/Dataset .csv")
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu 

In [32]:
df.describe()


Unnamed: 0,Restaurant ID,Country Code,Longitude,Latitude,Average Cost for two,Price range,Aggregate rating,Votes
count,9551.0,9551.0,9551.0,9551.0,9551.0,9551.0,9551.0,9551.0
mean,9051128.0,18.365616,64.126574,25.854381,1199.210763,1.804837,2.66637,156.909748
std,8791521.0,56.750546,41.467058,11.007935,16121.183073,0.905609,1.516378,430.169145
min,53.0,1.0,-157.948486,-41.330428,0.0,1.0,0.0,0.0
25%,301962.5,1.0,77.081343,28.478713,250.0,1.0,2.5,5.0
50%,6004089.0,1.0,77.191964,28.570469,400.0,2.0,3.2,31.0
75%,18352290.0,1.0,77.282006,28.642758,700.0,2.0,3.7,131.0
max,18500650.0,216.0,174.832089,55.97698,800000.0,4.0,4.9,10934.0


# **STEP 3: Select Features for Recommendation**

In [33]:
df_features = df[['Cuisines', 'Price range', 'City']]
df_features.head()
df_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Cuisines     9542 non-null   object
 1   Price range  9551 non-null   int64 
 2   City         9551 non-null   object
dtypes: int64(1), object(2)
memory usage: 224.0+ KB


# **STEP 4: Handle Missing Values**

In [34]:
df_features.isnull().sum()        # Check missing values
df_features = df_features.fillna("Unknown")
df_features.isnull().sum()        # Verify no missing values

Unnamed: 0,0
Cuisines,0
Price range,0
City,0


# **STEP 5: Encode Categorical Variables**

In [35]:
# Convert text data into numerical format
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(df_features)

encoded_features   # This is the numerical feature matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 28653 stored elements and shape (9551, 1971)>

# **STEP 6: Compute Similarity using Cosine**

In [36]:
similarity_matrix = cosine_similarity(encoded_features)
similarity_matrix

array([[1.        , 0.66666667, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.66666667, 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.33333333, 0.33333333,
        0.        ],
       ...,
       [0.        , 0.        , 0.33333333, ..., 1.        , 0.66666667,
        0.33333333],
       [0.        , 0.        , 0.33333333, ..., 0.66666667, 1.        ,
        0.33333333],
       [0.        , 0.        , 0.        , ..., 0.33333333, 0.33333333,
        1.        ]])

# **STEP 7: Create Recommendation Function**

In [37]:
def recommend_restaurants(cuisine, price_range, city, top_n=5):

    # Create a dataframe of user input
    user_pref = pd.DataFrame({
        'Cuisines': [cuisine],
        'Price range': [price_range],
        'City': [city]
    })

    # Encode user input using same encoder
    user_encoded = encoder.transform(user_pref)

    # Calculate similarity between user and all restaurants
    similarity_scores = cosine_similarity(user_encoded, encoded_features)[0]

    # Get indices of top similar restaurants
    top_indexes = similarity_scores.argsort()[::-1][:top_n]

    # Return top N recommended restaurants
    return df.iloc[top_indexes][
        ['Restaurant Name', 'Cuisines', 'Price range', 'City', 'Aggregate rating']
    ]

# **STEP 8: Test the Recommendation System**

In [38]:
result = recommend_restaurants(
    cuisine="Italian",
    price_range=4,
    city="Makati City",
    top_n=5
)

result

Unnamed: 0,Restaurant Name,Cuisines,Price range,City,Aggregate rating
3767,Artusi Ristorante e Bar,Italian,4,New Delhi,4.1
802,Virgin Courtyard,Italian,4,Chandigarh,4.4
1523,Bella Cucina - Le Meridien Gurgaon,Italian,4,Gurgaon,4.1
357,Maggiano's Little Italy,Italian,4,Orlando,4.4
9320,Ombra,Italian,4,Wellington City,4.5
