In [20]:
#MUSHROOM Dataset
import pandas as pd
import numpy as np

# Load Mushroom dataset from local file
file_path = "/kaggle/input/mushrooms/agaricus-lepiota.data"  
columns = ["class","cap-shape","cap-surface","cap-color","bruises","odor",
           "gill-attachment","gill-spacing","gill-size","gill-color","stalk-shape",
           "stalk-root","stalk-surface-above-ring","stalk-surface-below-ring",
           "stalk-color-above-ring","stalk-color-below-ring","veil-type","veil-color",
           "ring-number","ring-type","spore-print-color","population","habitat"]

# Load CSV
mushrooms = pd.read_csv(file_path, names=columns)

# Quick look
print("Shape:", mushrooms.shape)
print(mushrooms.head())


Shape: (8124, 23)
  class cap-shape cap-surface cap-color bruises odor gill-attachment  \
0     p         x           s         n       t    p               f   
1     e         x           s         y       t    a               f   
2     e         b           s         w       t    l               f   
3     p         x           y         w       t    p               f   
4     e         x           s         g       f    n               f   

  gill-spacing gill-size gill-color  ... stalk-surface-below-ring  \
0            c         n          k  ...                        s   
1            c         b          k  ...                        s   
2            c         b          n  ...                        s   
3            c         n          n  ...                        s   
4            w         b          k  ...                        s   

  stalk-color-above-ring stalk-color-below-ring veil-type veil-color  \
0                      w                      w         p     

In [21]:
#PREPROCESSING
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

# Separate target and features
X = mushrooms.drop("class", axis=1)
y = mushrooms["class"]

# Encode target
le = LabelEncoder()
y = le.fit_transform(y)  # e=0, p=1

# Encode features using One-Hot Encoding
X = pd.get_dummies(X)  # all categorical features

print("Features shape after encoding:", X.shape)

Features shape after encoding: (8124, 117)


In [22]:
# Split into train/test 80-20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("X_train:", X_train.shape)
print("X_test:", X_test.shape)


X_train: (6499, 117)
X_test: (1625, 117)


In [23]:
#Feature Scaling
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [24]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Logistic Regression (no regularization, solver for multi-class: lbfgs)
lr = LogisticRegression(penalty=None, max_iter=1000)
lr.fit(X_train, y_train)

# Predictions
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

# Evaluate
print("Train Accuracy:", accuracy_score(y_train, y_train_pred))
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print("Confusion Matrix (Test):\n", confusion_matrix(y_test, y_test_pred))


Train Accuracy: 1.0
Test Accuracy: 1.0
Confusion Matrix (Test):
 [[842   0]
 [  0 783]]
