In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


# Load the dataset
data = pd.read_csv('Dataset .csv')
print(data.head())

   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 

In [None]:
# Feature selection: Exclude 'Cuisines' and 'Rating color' columns
features = data.drop(columns=['Votes', 'Cuisines'])
target = data['Aggregate rating']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Identify categorical and numerical columns
categorical_columns = ['Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu',
                       'Rating color', 'Rating text']
numerical_columns = ['Average Cost for two', 'Price range']

In [None]:
# Swapping 'Yes' with 'No' and 'No' with 'Yes' in the relevant columns
X_train_swapped = X_train.copy()  # Make a copy of the original data

# Columns to swap values for
swap_columns = ['Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']

# Replace 'Yes' with 'No' and 'No' with 'Yes'
X_train_swapped[swap_columns] = X_train_swapped[swap_columns].replace({'Yes': 'No', 'No': 'Yes'})


In [None]:
# Create a column transformer for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_columns)  # Handle unknown categories
    ]
)

# Create the pipeline with the preprocessor and the regressor
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])


In [None]:
# Re-train the model on the swapped dataset
pipeline.fit(X_train_swapped, y_train)

# Make predictions
y_pred = pipeline.predict(X_test)




In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.2f}')
print(f'R^2 Score: {r2:.2f}')


Mean Squared Error: 0.04
R^2 Score: 0.98


In [None]:
# Example for predicting a full star rating restaurant (with all ideal inputs)
full_star_restaurant = pd.DataFrame({
    'Average Cost for two': [100],  # Higher cost implies luxury or better services
    'Price range': [2],  # The highest price range
    'Has Table booking': ['No'],  # Yes, indicating a higher level of service
    'Has Online delivery': ['No'],  # Yes, indicating a higher level of convenience
    'Is delivering now': ['No'],  # Yes, as it enhances customer satisfaction
    'Switch to order menu': ['Yes'],  # Yes, indicating a comprehensive service
    'Rating color': ['Green'],
    'Rating text': ['Poor']  # Best rating text category
})

# Predict rating for the full-star restaurant using the pipeline
predicted_full_star_rating = pipeline.predict(full_star_restaurant)
print(f'Predicted rating for the full-star restaurant: {predicted_full_star_rating[0]:.2f}')


Predicted rating for the full-star restaurant: 3.30
