# Importing Libraries

In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Loading and exploring the dataset.

Load the dataset

In [27]:
file_path = 'Dataset .csv'
data = pd.read_csv(file_path)

data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


Dropping irrelevant columns

In [28]:
data = data[['Cuisines', 'Country Code', 'City', 'Longitude', 'Latitude', 'Price range', 'Aggregate rating', 'Votes']]

# Preprocessing

Handling missing values

In [29]:
data.dropna(subset=['Cuisines'], inplace=True)

Encoding target variable

In [30]:
label_encoder = LabelEncoder()
data['Cuisines'] = label_encoder.fit_transform(data['Cuisines'])

# Splitting the data

Features and target variable

In [31]:
X = data.drop('Cuisines', axis=1)
y = data['Cuisines']

Split the data into training and testing sets

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Defining preprocessing for numeric and categorical features

In [33]:
numeric_features = ['Longitude', 'Latitude', 'Price range', 'Aggregate rating', 'Votes']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

categorical_features = ['Country Code', 'City']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

Bundle preprocessing for numeric and categorical features

In [34]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Model selection

Define the model

In [35]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

Train the model

In [36]:
model.fit(X_train, y_train)

# Model evaluation

Make predictions

In [37]:
y_pred = model.predict(X_test)

Evaluating the Model

In [38]:
accuracy = accuracy_score(y_test, y_pred)
all_classes = label_encoder.classes_
report = classification_report(y_test, y_pred, labels=range(len(all_classes)), target_names=all_classes, zero_division=0)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)

Accuracy: 0.06
Classification Report:
                                                                                               precision    recall  f1-score   support

                                                                                      Afghani       0.00      0.00      0.00         0
                                                                    Afghani, Mughlai, Chinese       0.00      0.00      0.00         1
                                                                        Afghani, North Indian       0.00      0.00      0.00         0
                                                    Afghani, North Indian, Pakistani, Arabian       0.00      0.00      0.00         0
                                                                                      African       0.00      0.00      0.00         0
                                                                          African, Portuguese       0.00      0.00      0.00         0
                

# Analyzing the results

In [39]:
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
results['Actual'] = label_encoder.inverse_transform(results['Actual'])
results['Predicted'] = label_encoder.inverse_transform(results['Predicted'])
print(results.head())

                    Actual                           Predicted
7142  Chinese, Street Food                            Desserts
1860        Asian, Seafood                            Japanese
4425          North Indian                           Fast Food
3219               Italian  North Indian, Chinese, Continental
8308    Chinese, Fast Food          Bakery, Fast Food, Chinese
