In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [1]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"

In [3]:
columns = [
    "class", "cap-shape", "cap-surface", "cap-color", "bruises", "odor",
    "gill-attachment", "gill-spacing", "gill-size", "gill-color",
    "stalk-shape", "stalk-root", "stalk-surface-above-ring",
    "stalk-surface-below-ring", "stalk-color-above-ring",
    "stalk-color-below-ring", "veil-type", "veil-color",
    "ring-number", "ring-type", "spore-print-color",
    "population", "habitat"
]

df = pd.read_csv(url, header=None, names=columns)

In [4]:
mushrooms = df[["class", "odor", "cap-color"]].rename(columns={
    "class": "edibility",
    "odor": "odor",
    "cap-color": "cap_color"
})

In [5]:
for col in ["edibility", "odor", "cap_color"]:
    mushrooms[col] = pd.factorize(mushrooms[col])[0]

mushrooms.head()

Unnamed: 0,edibility,odor,cap_color
0,0,0,0
1,1,1,1
2,1,2,2
3,0,0,2
4,1,3,3


In [6]:
X = mushrooms[["odor", "cap_color"]]
y = mushrooms["edibility"]

X.head(), y.head()

(   odor  cap_color
 0     0          0
 1     1          1
 2     2          2
 3     0          2
 4     3          3,
 0    0
 1    1
 2    1
 3    0
 4    1
 Name: edibility, dtype: int64)

In [7]:
X_encoded = pd.get_dummies(X, columns=["odor", "cap_color"])

X_encoded.head()

Unnamed: 0,odor_0,odor_1,odor_2,odor_3,odor_4,odor_5,odor_6,odor_7,odor_8,cap_color_0,cap_color_1,cap_color_2,cap_color_3,cap_color_4,cap_color_5,cap_color_6,cap_color_7,cap_color_8,cap_color_9
0,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
1,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
2,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False
3,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False
4,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.3, random_state=42
)

In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [10]:
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [11]:
y_pred = clf.predict(X_test)

In [12]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy: {accuracy:.3f}")

Decision Tree Accuracy: 0.987


In [13]:
feature_importances = pd.Series(clf.feature_importances_, index=X_encoded.columns)
feature_importances.sort_values(ascending=False)

Unnamed: 0,0
odor_3,0.65509
odor_2,0.17404
odor_1,0.15307
cap_color_1,0.011735
cap_color_5,0.002612
cap_color_6,0.002421
cap_color_2,0.00098
cap_color_0,5.2e-05
odor_0,0.0
odor_6,0.0


In [14]:
from sklearn.model_selection import cross_val_score

In [15]:
X_odor = pd.get_dummies(X[["odor"]])
scores_odor = cross_val_score(DecisionTreeClassifier(random_state=42), X_odor, y, cv=5)
print("Odor Accuracy:", np.mean(scores_odor))

Odor Accuracy: 0.9537224706328156


In [16]:
X_cap = pd.get_dummies(X[["cap_color"]])
scores_cap = cross_val_score(DecisionTreeClassifier(random_state=42), X_cap, y, cv=5)
print("Cap Color Accuracy:", np.mean(scores_cap))

Cap Color Accuracy: 0.4251615005683972


Overall, odor with an accuracy score of 0.9537 is stronger at predicting whether a mushroom is poisonous or edible. Compared to cap color, which is more general and overlaps between both types, odor provides clearer separation. In addition, for further analysis, I would try using a different classifier to see if the accuracy improves.