<a href="https://colab.research.google.com/github/sankalp294/cognifyz-ds-internship/blob/main/07_Predictive_Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

# 1. Load the cleaned dataset
df = pd.read_csv("cleaned_dataset (1).csv", encoding="utf-8")

# 2. Feature engineering
df["Name_len"]      = df["Restaurant Name"].astype(str).str.len()
df["Has_Table"]     = df["Has Table booking"].map({"Yes": 1, "No": 0})
df["Has_Delivery"]  = df["Has Online delivery"].map({"Yes": 1, "No": 0})

# 3. Select features & target
features = ["Price range", "Votes", "Name_len", "Has_Table", "Has_Delivery"]
X = df[features].fillna(0)
y = df["Aggregate rating"].fillna(df["Aggregate rating"].median())

# 4. Train–test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 5. Train model (Random Forest)
model = RandomForestRegressor(n_estimators=300, random_state=42)
model.fit(X_train, y_train)

# 6. Evaluate model
y_pred = model.predict(X_test)
r2   = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("✅ Model Evaluation")
print(f"R² Score : {r2:.3f}")
print(f"RMSE     : {rmse:.3f}")

# 7. Predict aggregate rating for a sample restaurant
sample_input = pd.DataFrame({
    "Price range" : [2],     # 1=low … 4=high
    "Votes"       : [200],   # number of votes
    "Name_len"    : [14],    # length of restaurant name
    "Has_Table"   : [1],     # 1 if table booking, else 0
    "Has_Delivery": [1]      # 1 if online delivery, else 0
})

predicted_rating = model.predict(sample_input)[0]
print(f"\n🎯 Predicted Aggregate Rating (sample): {predicted_rating:.2f}")


✅ Model Evaluation
R² Score : 0.938
RMSE     : 0.377

🎯 Predicted Aggregate Rating (sample): 3.31
