# STEP 1: Import Required Libraries

In [62]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor

# STEP 2: Load the Dataset

In [63]:
df = pd.read_csv("Dataset .csv")
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


# STEP 3: Standardize Column Names

In [64]:
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')


# STEP 4: Checking Missing values

In [65]:
df['cuisines'] = df['cuisines'].fillna('Unknown')
df['price_range'] = df['price_range'].fillna(df['price_range'].mode()[0])
df['aggregate_rating'] = df['aggregate_rating'].fillna(df['aggregate_rating'].mean())


# STEP 5: Encode Categorical Variables

In [66]:
le_cuisine = LabelEncoder()
df['cuisine_encoded'] = le_cuisine.fit_transform(df['cuisines'])


# STEP 6: Select Features & Target Variable 

In [68]:
X = df[['cuisine_encoded', 'price_range']]
y = df['aggregate_rating']


# STEP 7: Split Data into Training & Testing Sets

In [69]:
from sklearn.model_selection import train_test_split


In [70]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# STEP 8: Train Decision Tree Regression Model

In [71]:
from sklearn.tree import DecisionTreeRegressor
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)



In [58]:
dt_pred = dt_model.predict(X_test)

In [59]:
print("Decision Tree MSE:", mean_squared_error(y_test, dt_pred))
print("Decision Tree R2:", r2_score(y_test, dt_pred))


Decision Tree MSE: 1.7767728073954965
Decision Tree R2: 0.21938149053490275


# STEP 9: Interpret Feature Importance 

In [60]:
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': dt_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

feature_importance


Unnamed: 0,Feature,Importance
0,cuisine_encoded,0.51305
1,price_range,0.48695
