In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, f1_score

# Load the datasets
country_code_df = pd.read_excel("C:/Users/Hiremath/OneDrive/Desktop/Country-Code.xlsx")
zomato_df = pd.read_csv("C:/Users/Hiremath/OneDrive/Desktop/raw.githubusercontent.com_dsrscientist_dataset4_main_zomato.csv", encoding='latin-1')

# Merge datasets using the 'Country Code' column
zomato_merged_df = zomato_df.merge(country_code_df, on='Country Code', how='left')

# Feature Selection
# For Regression (Predicting 'Average Cost for two')
selected_features_reg = [
    'Longitude', 'Latitude', 'Price range', 'Aggregate rating', 'Votes'
]

# For Classification (Predicting 'Price range')
selected_features_class = [
    'Longitude', 'Latitude', 'Average Cost for two', 'Aggregate rating', 'Votes'
]

# Selecting categorical columns for one-hot encoding
categorical_cols = ['Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']

# One-hot encoding categorical columns
zomato_merged_df = pd.get_dummies(zomato_merged_df, columns=categorical_cols, drop_first=True)

# Split data into training and testing sets for both tasks
X_reg = zomato_merged_df[selected_features_reg]
y_reg = zomato_merged_df['Average Cost for two']
X_class = zomato_merged_df[selected_features_class]
y_class = zomato_merged_df['Price range']

X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

# Hyperparameter Tuning for Regression Model (Random Forest)
param_grid_reg = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

regressor = RandomForestRegressor(random_state=42)
grid_search_reg = GridSearchCV(regressor, param_grid_reg, cv=5, n_jobs=-1)
grid_search_reg.fit(X_reg_train, y_reg_train)

# Get the best hyperparameters from the grid search
best_params_reg = grid_search_reg.best_params_

# Train the regression model with the best hyperparameters
best_regressor = RandomForestRegressor(**best_params_reg, random_state=42)
best_regressor.fit(X_reg_train, y_reg_train)
y_reg_pred = best_regressor.predict(X_reg_test)

# Calculate Mean Squared Error for Regression
mse = mean_squared_error(y_reg_test, y_reg_pred)
print("Mean Squared Error for Regression:", mse)

# Hyperparameter Tuning for Classification Model (Random Forest)
param_grid_class = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

classifier = RandomForestClassifier(random_state=42)
grid_search_class = GridSearchCV(classifier, param_grid_class, cv=5, n_jobs=-1)
grid_search_class.fit(X_class_train, y_class_train)

# Get the best hyperparameters from the grid search
best_params_class = grid_search_class.best_params_

# Train the classification model with the best hyperparameters
best_classifier = RandomForestClassifier(**best_params_class, random_state=42)
best_classifier.fit(X_class_train, y_class_train)
y_class_pred = best_classifier.predict(X_class_test)

# Calculate F1 Score for Classification
f1 = f1_score(y_class_test, y_class_pred, average='weighted')
print("Weighted F1 Score for Classification:", f1)

# Print the best hyperparameters for regression and classification
print("Best Hyperparameters for Regression:", best_params_reg)
print("Best Hyperparameters for Classification:", best_params_class)


Mean Squared Error for Regression: 138493366.78193775
Weighted F1 Score for Classification: 0.9757790780124174
Best Hyperparameters for Regression: {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}
Best Hyperparameters for Classification: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
