In [9]:
from SplittingCriterion import SplittingCriterion
from TreeClassifier import TreeClassifier, Node
from CrossValidation import cross_validation
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math

In [10]:
dataset_path = "MushroomDataset/secondary_data.csv"

In [11]:
dataset = pd.read_csv(dataset_path, sep=";")
dataset = dataset.sample(n=len(dataset))

In [3]:
X = dataset.drop(columns="class")
y = [0 if i == "p" else 1 for i in dataset["class"]]

In [4]:
entropy = SplittingCriterion("gini")

In [5]:
classifier = TreeClassifier(max_depth = 5, min_samples_split = 600, splitting_criterion=entropy)

In [None]:
results = cross_validation(classifier, X, y)
print(results)

In [6]:
X_train = X[:51000]
y_train = y[:51000]

In [7]:
X_test = X[51000:]
y_test = y[51000:]

In [11]:
classifier.fit(X_train, y_train)

In [12]:
classifier.evaluate(X_train, y_train)

{'zero-one loss': 0.023431372549019606,
 'precision': 0.9796646450231894,
 'recall': 0.9674549698330911}

In [13]:
classifier.evaluate(X_test, y_test)

{'zero-one loss': 0.025722514648922435,
 'precision': 0.978433598183882,
 'recall': 0.9633437639696022}

In [None]:
classifier.predict(X_test)

### SCALED ENTROPY with max_depth=5 and min_sample_split=600

In [22]:
classifier.print_tree()

Feature stem-width < 6.56?	[0.957298672784439]
|   Left:
|   Feature gill-spacing < d?	[0.7639808610444332]
|   |   Left:
|   |   Feature stem-height < 4.06?	[0.6681854635024104]
|   |   |   Left:
|   |   |   Feature stem-width < 2.64?	[0.4116234349788579]
|   |   |   |   Left:
|   |   |   |   Feature cap-diameter < 1.7?	[0.15841741451860453]
|   |   |   |   |   Left:
|   |   |   |   |   Predict: 0
|   |   |   |   |   Right:
|   |   |   |   |   Predict: 0
|   |   |   |   Right:
|   |   |   |   Predict: 1
|   |   |   Right:
|   |   |   Feature gill-attachment < x?	[0.3438056023846032]
|   |   |   |   Left:
|   |   |   |   Predict: 0
|   |   |   |   Right:
|   |   |   |   Feature cap-shape < s?	[0.10698883562710391]
|   |   |   |   |   Left:
|   |   |   |   |   Predict: 1
|   |   |   |   |   Right:
|   |   |   |   |   Predict: 1
|   |   Right:
|   |   Feature cap-surface < g?	[0.6765715704248989]
|   |   |   Left:
|   |   |   Feature cap-shape < x?	[0.762568399802961]
|   |   |   |   Lef

### GINI FUNCTION with max_depth=5 and min_sample_split=600

In [9]:
classifier.print_tree() GINI

Feature stem-width < 6.56?	[0.4712601162278436]
|   Left:
|   Feature gill-spacing < d?	[0.3466487593194746]
|   |   Left:
|   |   Feature stem-height < 4.06?	[0.2906437495523697]
|   |   |   Left:
|   |   |   Feature cap-surface < s?	[0.12637562541381567]
|   |   |   |   Left:
|   |   |   |   Predict: 1
|   |   |   |   Right:
|   |   |   |   Feature cap-shape < b?	[0.04861767493237433]
|   |   |   |   |   Left:
|   |   |   |   |   Predict: 1
|   |   |   |   |   Right:
|   |   |   |   |   Predict: 0
|   |   |   Right:
|   |   |   Feature cap-surface < l?	[0.028248389123706665]
|   |   |   |   Left:
|   |   |   |   Predict: 0
|   |   |   |   Right:
|   |   |   |   Feature cap-surface < d?	[0.0]
|   |   |   |   |   Left:
|   |   |   |   |   Predict: 0
|   |   |   |   |   Right:
|   |   |   |   |   Predict: 1
|   |   Right:
|   |   Feature cap-surface < g?	[0.2964753962346178]
|   |   |   Left:
|   |   |   Feature gill-color < g?	[0.3394311318916813]
|   |   |   |   Left:
|   |   |   |   

### GINI FUNCTION with max_depth=None and min_sample_split=600

In [14]:
classifier.print_tree()

Feature stem-width < 6.56?	[0.4710148909696018]
|   Left:
|   Feature gill-spacing < d?	[0.3472378110548957]
|   |   Left:
|   |   Feature stem-height < 4.04?	[0.2947198854986637]
|   |   |   Left:
|   |   |   Feature cap-surface < s?	[0.11198222100577375]
|   |   |   |   Left:
|   |   |   |   Predict: 1
|   |   |   |   Right:
|   |   |   |   Feature cap-shape < b?	[0.04288905132169081]
|   |   |   |   |   Left:
|   |   |   |   |   Predict: 1
|   |   |   |   |   Right:
|   |   |   |   |   Feature cap-shape < f?	[0.02052829262129681]
|   |   |   |   |   |   Left:
|   |   |   |   |   |   Predict: 1
|   |   |   |   |   |   Right:
|   |   |   |   |   |   Feature cap-surface < h?	[0.009378245203197636]
|   |   |   |   |   |   |   Left:
|   |   |   |   |   |   |   Predict: 1
|   |   |   |   |   |   |   Right:
|   |   |   |   |   |   |   Feature cap-surface < y?	[0.00472249713968849]
|   |   |   |   |   |   |   |   Left:
|   |   |   |   |   |   |   |   Predict: 1
|   |   |   |   |   |   |   |

### STANDARD DEVIATION with max_depth=None and min_sample_split=600

In [14]:
classifier.print_tree()

Feature ring-type < z?	[0.48131282405736514]
|   Left:
|   Predict: 0
|   Right:
|   Feature stem-surface < g?	[0.48434695993939664]
|   |   Left:
|   |   Predict: 0
|   |   Right:
|   |   Feature stem-width < 6.36?	[0.48285455144949624]
|   |   |   Left:
|   |   |   Feature gill-spacing < d?	[0.4174856871560925]
|   |   |   |   Left:
|   |   |   |   Feature stem-height < 4.05?	[0.3768078049049821]
|   |   |   |   |   Left:
|   |   |   |   |   Feature cap-surface < s?	[0.2307172217467386]
|   |   |   |   |   |   Left:
|   |   |   |   |   |   Predict: 1
|   |   |   |   |   |   Right:
|   |   |   |   |   |   Feature cap-shape < b?	[0.15044880849178544]
|   |   |   |   |   |   |   Left:
|   |   |   |   |   |   |   Predict: 1
|   |   |   |   |   |   |   Right:
|   |   |   |   |   |   |   Feature cap-color < w?	[0.0956114119040109]
|   |   |   |   |   |   |   |   Left:
|   |   |   |   |   |   |   |   Predict: 0
|   |   |   |   |   |   |   |   Right:
|   |   |   |   |   |   |   |   Predict: 