In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [4]:
df = pd.read_csv("RRDinerCoffeeData.csv")

In [6]:
df_encoded = pd.get_dummies(df, drop_first=True)

In [8]:
df_cleaned = df_encoded.dropna(subset=["Decision"])

In [10]:
X = df_cleaned.drop("Decision", axis=1)
y = df_cleaned["Decision"]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [14]:
dt_gini_full = DecisionTreeClassifier(criterion="gini", random_state=42)
dt_gini_full.fit(X_train, y_train)
gini_full_preds = dt_gini_full.predict(X_test)

In [16]:
dt_gini_depth = DecisionTreeClassifier(criterion="gini", max_depth=4, random_state=42)
dt_gini_depth.fit(X_train, y_train)
gini_depth_preds = dt_gini_depth.predict(X_test)

In [18]:
dt_entropy_full = DecisionTreeClassifier(criterion="entropy", random_state=42)
dt_entropy_full.fit(X_train, y_train)
entropy_full_preds = dt_entropy_full.predict(X_test)

In [20]:
dt_entropy_depth = DecisionTreeClassifier(criterion="entropy", max_depth=4, random_state=42)
dt_entropy_depth.fit(X_train, y_train)
entropy_depth_preds = dt_entropy_depth.predict(X_test)

In [22]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

In [26]:
def evaluate_model(name, y_true, y_pred):
    print(f"\nModel: {name}")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))

In [28]:
evaluate_model("Decision Tree - Gini (full)", y_test, gini_full_preds)
evaluate_model("Decision Tree - Gini (depth=4)", y_test, gini_depth_preds)
evaluate_model("Decision Tree - Entropy (full)", y_test, entropy_full_preds)
evaluate_model("Decision Tree - Entropy (depth=4)", y_test, entropy_depth_preds)
evaluate_model("Random Forest", y_test, rf_preds)



Model: Decision Tree - Gini (full)
Accuracy: 0.965034965034965
Confusion Matrix:
 [[44  5]
 [ 0 94]]
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      0.90      0.95        49
         1.0       0.95      1.00      0.97        94

    accuracy                           0.97       143
   macro avg       0.97      0.95      0.96       143
weighted avg       0.97      0.97      0.96       143


Model: Decision Tree - Gini (depth=4)
Accuracy: 0.9440559440559441
Confusion Matrix:
 [[41  8]
 [ 0 94]]
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      0.84      0.91        49
         1.0       0.92      1.00      0.96        94

    accuracy                           0.94       143
   macro avg       0.96      0.92      0.94       143
weighted avg       0.95      0.94      0.94       143


Model: Decision Tree - Entropy (full)
Accuracy: 0.951048951048951
Confusion Matrix:
 [[43