In [9]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

# Load dataset
data = pd.read_csv("zomato.csv")

# Print column names to verify
print(data.columns)

# Select features & target
# Target: online_order (Yes/No)
y = data['online_order'].dropna()

# Encode target (Yes=1, No=0)
le = LabelEncoder()
y = le.fit_transform(y)

# Features: use numeric + some categorical
# Corrected column names based on inspecting the DataFrame
X = data[['num of ratings', 'avg cost (two people)', 'table booking']].copy()

# Clean numeric column "approx_cost(for two people)" (remove commas & convert to float)
X['avg cost (two people)'] = (
    X['avg cost (two people)']
    .astype(str)
    .str.replace(",", "", regex=True)
    .astype(float)
)

# Encode categorical 'book_table' (Yes/No)
X['table booking'] = le.fit_transform(X['table booking'].astype(str))

# Handle missing values
X = X.fillna(0)

# Models
clf1 = DecisionTreeClassifier(random_state=42)
clf2 = LogisticRegression(max_iter=1000)

# K-Fold Cross Validation
k_folds = KFold(n_splits=10, shuffle=True, random_state=42)
scores1 = cross_val_score(clf1, X, y, cv=k_folds)
scores2 = cross_val_score(clf2, X, y, cv=k_folds)

# Results
print("Decision Tree CV Scores: ", scores1)
print("Logistic Regression CV Scores: ", scores2)
print("Decision Tree Avg Score: ", scores1.mean())
print("Logistic Regression Avg Score: ", scores2.mean())
print("Number of CV Splits: ", len(scores1))

Index(['Unnamed: 0.1', 'Unnamed: 0', 'restaurant name', 'restaurant type',
       'rate (out of 5)', 'num of ratings', 'avg cost (two people)',
       'online_order', 'table booking', 'cuisines type', 'area',
       'local address'],
      dtype='object')
Decision Tree CV Scores:  [0.60056259 0.56540084 0.57805907 0.57946554 0.5583685  0.59295775
 0.58169014 0.57464789 0.6084507  0.6       ]
Logistic Regression CV Scores:  [0.55133615 0.54149086 0.54711674 0.57805907 0.54571027 0.56197183
 0.54366197 0.55070423 0.55492958 0.53802817]
Decision Tree Avg Score:  0.5839603018957628
Logistic Regression Avg Score:  0.5513008854816663
Number of CV Splits:  10
