# Task: Cuisine Classification
# Objective: Develop a machine learning model to classify restaurants based on their cuisines.
     Steps:
     Preprocess the dataset by handling missing values
     and encoding categorical variables.
     Split the data into training and testing sets.
     Select a classification algorithm (e.g., logistic
     regression, random forest) and train it on the
     training data.
     Evaluate the model's performance using
     appropriate classification metrics (e.g., accuracy,
     precision, recall) on the testing data.
     Analyze the model's performance across different
     cuisines and identify any challenges or biases.



In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix , accuracy_score, classification_report,recall_score
from sklearn.preprocessing import LabelEncoder


In [24]:
data = pd.read_csv(r'C:\Users\tirth\Desktop\Cognifyz\Dataset .csv')


# Preproccesing the Data

In [25]:
data.drop_duplicates(inplace=True) # drop duplicates
data.drop(['Restaurant ID', 'Restaurant Name'], axis=1, inplace=True) # drop unnecessary columns
data.isna().sum() # check for missing values



# output has 0 missing values

Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

# Label Encoding

In [26]:
# label encoding
le = LabelEncoder()
for column in data.columns:
    if data[column].dtype == type(object):
        data[column] = le.fit_transform(data[column])

# Spliting the data into training and testing data 

In [27]:
# spliting the data into training and testing data for  Develop a machine learning model to classify restaurants based on their cuisines.

X = data.drop('Cuisines', axis=1) # dependent variable
y = data['Cuisines'] # independent variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)



In [None]:
# use random forest for classification and train the model
#  i have total of 9800 rows in the dataset, so i will use 80% of the data for training and 20% for testing

from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=100, random_state=0)
classifier.fit(X_train, y_train)







In [29]:
# predict the test set results
y_pred = classifier.predict(X_test)
y_pred

array([1306, 1514,  335, ...,  201, 1306, 1590], shape=(1911,))

In [31]:
print(confusion_matrix(y_test, y_pred)) # confusion matrix to evaluate the accuracy of a classification model


[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [32]:
print(classification_report(y_test, y_pred)) # classification report to evaluate the quality of predictions


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         1
           6       0.17      0.20      0.18         5
           9       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         1
          16       0.00      0.00      0.00         0
          18       0.00      0.00      0.00         2
          21       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         1
          29       0.00      0.00      0.00         0
          30       0.00      0.00      0.00         1
          31       0.00      0.00      0.00         1
          32       0.00      0.00      0.00         0
          40       0.00      0.00      0.00         1
          42       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [33]:
print(accuracy_score(y_test, y_pred)) # accuracy score to evaluate the accuracy of the model


0.1119832548403977


In [34]:
recall_score(y_test, y_pred, average=None) # recall score to evaluate the ability of the classifier to find all the positive samples


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


array([0.        , 0.        , 0.        , 0.2       , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.30769231, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     