
# Logistic Regression 3-class Classifier


Show below is a logistic-regression classifiers decision boundaries on the
`iris <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The
datapoints are colored according to their labels.



In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, datasets

print("Loading Iris data ...")
iris = datasets.load_iris()
iris_X = iris.data
iris_Y = iris.target
total_size = len(iris_Y)
train_size = int(0.5 * total_size)
np.random.seed(0)
indices = np.random.permutation(len(iris_Y))
train_x = iris_X[indices[:train_size]]
train_y = iris_Y[indices[:train_size]]

test_x = iris_X[indices[train_size:]]
test_y = iris_Y[indices[train_size:]]
print("Target distribution")
print(zip(np.unique(iris.target), np.bincount(iris.target)))
print("Done")

Loading Iris data ...
Target distribution
[(0, 50), (1, 50), (2, 50)]
Done


In [17]:
print train_x.shape

(75, 4)


In [19]:
log_reg = linear_model.LogisticRegression(C=1e5)
log_reg.fit(train_x, train_y)
pred_y = log_reg.predict(test_x)
mis_cnt = len(filter(lambda x: x[0] != x[1], zip(pred_y, test_y)))
print("Misclassified %d out of %d samples" % (mis_cnt , test_y.size))
print(log_reg.coef_)
print(log_reg.intercept_)

Misclassified 2 out of 75 samples
[[  1.25644948   4.89685397  -7.11875799  -3.41855163]
 [ -0.02837947  -1.81867988   1.23581439  -2.68041785]
 [ -0.34362878  -6.3992516    5.53572612  15.3610706 ]]
[  0.7997799    3.65161857 -31.47325   ]


In [7]:
perp = linear_model.Perceptron()
perp.fit(train_x, train_y)
pred_y = perp.predict(test_x)
mis_cnt = len(filter(lambda x: x[0] != x[1], zip(pred_y, test_y)))
print("Misclassified %d out of %d samples" % (mis_cnt , test_y.size))

Misclassified 20 out of 75 samples


In [8]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
pred_y = gnb.fit(train_x, train_y).predict(test_x)
mis_cnt = len(filter(lambda x: x[0] != x[1], zip(pred_y, test_y)))
print("Misclassified %d out of %d samples" % (mis_cnt , test_y.size))

Misclassified 2 out of 75 samples


In [9]:
from sklearn import tree
dt = tree.DecisionTreeClassifier()
pred_y = dt.fit(train_x, train_y).predict(test_x)
mis_cnt = len(filter(lambda x: x[0] != x[1], zip(pred_y, test_y)))
print("Misclassified %d out of %d samples" % (mis_cnt , test_y.size))

Misclassified 3 out of 75 samples


In [13]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=10)
pred_y = rf.fit(train_x, train_y).predict(test_x)
mis_cnt = len(filter(lambda x: x[0] != x[1], zip(pred_y, test_y)))
print("Misclassified %d out of %d samples" % (mis_cnt , test_y.size))
rf.feature_importances_

Misclassified 4 out of 75 samples


array([ 0.06296882,  0.05113253,  0.55826489,  0.32763375])

In [11]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
pred_y = bagging.fit(train_x, train_y).predict(test_x)
mis_cnt = len(filter(lambda x: x[0] != x[1], zip(pred_y, test_y)))
print("Misclassified %d out of %d samples" % (mis_cnt , test_y.size))

Misclassified 3 out of 75 samples


In [20]:
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier()
pred_y = ada.fit(train_x, train_y).predict(test_x)
mis_cnt = len(filter(lambda x: x[0] != x[1], zip(pred_y, test_y)))
print("Misclassified %d out of %d samples" % (mis_cnt , test_y.size))

Misclassified 3 out of 75 samples
