In [162]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

from sklearn import tree
from sklearn.tree import export_text

import six
import sys
import graphviz
sys.modules['sklearn.externals.six'] = six
from id3 import Id3Estimator
from id3 import export_graphviz

from sklearn.cluster import KMeans

from sklearn.linear_model import LogisticRegression

from sklearn.neural_network import MLPClassifier

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [123]:
df = pd.read_csv('PlayTennis.csv')
df

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [124]:
col_name = df.columns.tolist()
feat_name = col_name[:-1]
class_name = col_name[-1]

In [125]:
# Label Encoding
le = LabelEncoder()
df = df.apply(le.fit_transform)
df

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,2,1,0,1,0
1,2,1,0,0,0
2,0,1,0,1,1
3,1,2,0,1,1
4,1,0,1,1,1
5,1,0,1,0,0
6,0,0,1,0,1
7,2,2,0,1,0
8,2,0,1,1,1
9,1,2,1,1,1


In [126]:
# Split train and test data
# X = df[["Outlook", "Temperature", "Humidity", "Wind"]]
# y = df[["Play Tennis"]]
X = df[feat_name]
y = df[[class_name]]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [127]:
X_train

Unnamed: 0,Outlook,Temperature,Humidity,Wind
2,0,1,0,1
10,2,2,1,0
4,1,0,1,1
1,2,1,0,0
12,0,1,1,1
0,2,1,0,1
13,1,2,0,0
9,1,2,1,1
8,2,0,1,1
11,0,2,0,0


In [128]:
y_train

Unnamed: 0,Play Tennis
2,1
10,1
4,1
1,0
12,1
0,0
13,0
9,1
8,1
11,1


In [129]:
X_test

Unnamed: 0,Outlook,Temperature,Humidity,Wind
3,1,2,0,1
7,2,2,0,1
6,0,0,1,0


In [130]:
y_test

Unnamed: 0,Play Tennis
3,1
7,0
6,1


In [131]:
# DecisionTreeClassifier
clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)

DecisionTreeClassifier()

In [132]:
# Export tree
r = export_text(clf, feature_names=feat_name)
print(r)

|--- Outlook <= 0.50
|   |--- class: 1
|--- Outlook >  0.50
|   |--- Humidity <= 0.50
|   |   |--- class: 0
|   |--- Humidity >  0.50
|   |   |--- Wind <= 0.50
|   |   |   |--- Outlook <= 1.50
|   |   |   |   |--- class: 0
|   |   |   |--- Outlook >  1.50
|   |   |   |   |--- class: 1
|   |   |--- Wind >  0.50
|   |   |   |--- class: 1



In [133]:
# accuracy & f1
pred = clf.predict(X_test)
acc = accuracy_score(y_test, pred)
f1 = f1_score(y_test, pred)
print("DecisionTreeClassifier")
print("Accuracy: ", acc)
print("f1_score: ", f1)

DecisionTreeClassifier
Accuracy:  0.6666666666666666
f1_score:  0.6666666666666666


In [140]:
# Id3Estimator
id3 = Id3Estimator()
id3.fit(X_train, y_train)

Id3Estimator()

In [145]:
# Accuracy & F1
pred = id3.predict(X_test)
acc = accuracy_score(y_test, pred)
f1 = f1_score(y_test, pred)
print("Id3Estimator")
print("Accuracy: ", acc)
print("f1_score: ", f1)

Id3Estimator
Accuracy:  0.6666666666666666
f1_score:  0.6666666666666666


In [153]:
# KMeans
kmeans = KMeans(n_clusters=4)
kmeans.fit(X_train, y_train)

KMeans(n_clusters=4)

In [154]:
# accuracy & F1
pred = kmeans.predict(X_test)
pred = [0 if label!=1 else 1 for label in pred]
acc = accuracy_score(y_test, pred)
f1 = f1_score(y_test, pred)
print("KMeans")
print("Accuracy: ", acc)
print("f1_score: ", f1)

KMeans
Accuracy:  0.6666666666666666
f1_score:  0.6666666666666666


In [155]:
# LogisticRegression
log = LogisticRegression(max_iter=100)
log.fit(X_train, y_train)

LogisticRegression()

In [156]:
# accuracy & F1
pred = log.predict(X_test)
acc = accuracy_score(y_test, pred)
f1 = f1_score(y_test, pred)
print("LogisticRegression")
print("Accuracy: ", acc)
print("f1_score: ", f1)

LogisticRegression
Accuracy:  0.6666666666666666
f1_score:  0.8


In [158]:
# MLPClassifier
mlp = MLPClassifier(max_iter=300)
mlp.fit(X_train, y_train)

MLPClassifier(max_iter=300)

In [159]:
# accuracy & F1
pred = mlp.predict(X_test)
acc = accuracy_score(y_test, pred)
f1 = f1_score(y_test, pred)
print("MLPClassifier")
print("Accuracy: ", acc)
print("f1_score: ", f1)

MLPClassifier
Accuracy:  1.0
f1_score:  1.0


In [163]:
# SVM
svm = make_pipeline(StandardScaler(), SVC(gamma='auto'))
svm.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(gamma='auto'))])

In [164]:
# accuracy & f1
pred = svm.predict(X_test)
acc = accuracy_score(y_test, pred)
f1 = f1_score(y_test, pred)
print("SVM")
print("Accuracy: ", acc)
print("f1_score: ", f1)

SVM
Accuracy:  1.0
f1_score:  1.0
