# Logistic Regression

Building decision boundaries for each class that rely on finding linear separations between pairs of attributes; we will use two attributes for simplicity. 

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn import tree

iris = load_iris()
X = iris.data
y = iris.target

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)

# Build a decision tree and fit the model
clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)

<ins>Determine Score</ins>

In [2]:
# Count the number of correctly predicted labels
y_hat = clf.predict(X_test) # Predictions

count = 0.0
for i in range(len(y_hat)):
    if y_hat[i] == y_test[i]:
        count += 1

score = count/len(y_hat) * 100
print("Accuracy: %f" % score)

Accuracy: 97.368421


In [3]:
# Using sklearn score function

print('training set score: %f' % clf.score(X_train, y_train))
print('test set score: %f' % clf.score(X_test, y_test))

training set score: 1.000000
test set score: 0.973684


In [5]:
# Using accuracy score function
from sklearn import metrics

print('accuracy score: %f' % metrics.accuracy_score(y_test, y_hat))

accuracy score: 0.973684


<ins>Confusion Matrix</ins>

In [6]:
print(metrics.confusion_matrix(y_test, y_hat))

[[13  0  0]
 [ 0 15  1]
 [ 0  0  9]]


Precision, Recall, F1

For, respectively, Setosa, Versicolor, Virginica predictions

In [8]:
# Precision
print('Precision score: ', metrics.precision_score(y_test, y_hat, average=None))
# Recall
print('Recall score: ', metrics.recall_score(y_test, y_hat, average=None))
# F1
print('F1 score: ', metrics.f1_score(y_test, y_hat, average=None))

Precision score:  [1.  1.  0.9]
Recall score:  [1.     0.9375 1.    ]
F1 score:  [1.         0.96774194 0.94736842]


# Decision Tree 

In [16]:
# Decision Tree classifiers return a data structure that describes a decision tree, listing nodes in the tree, starting with the root.
# Each node in the tree has an attribute that determines how samples are split at that node.
# The tree structure is stored in the tree_ attribute of the classifier.

decision_path = clf.decision_path(iris.data)
print(decision_path)

# This is ugly and hard to interpret, so we 

from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier, export_graphviz
import graphviz

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Initialize and train the decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X, y)

# Export as dot file
dot_data = export_graphviz(clf, out_file=None, 
                           feature_names=iris.feature_names,  
                           class_names=iris.target_names,
                           filled=True, rounded=True,  
                           special_characters=True)

# Use graphviz to render the decision tree
graph = graphviz.Source(dot_data)
graph.render("data/decision_tree")  # Saves the visual representation as a PDF file


  (0, 0)	1
  (0, 1)	1
  (1, 0)	1
  (1, 1)	1
  (2, 0)	1
  (2, 1)	1
  (3, 0)	1
  (3, 1)	1
  (4, 0)	1
  (4, 1)	1
  (5, 0)	1
  (5, 1)	1
  (6, 0)	1
  (6, 1)	1
  (7, 0)	1
  (7, 1)	1
  (8, 0)	1
  (8, 1)	1
  (9, 0)	1
  (9, 1)	1
  (10, 0)	1
  (10, 1)	1
  (11, 0)	1
  (11, 1)	1
  (12, 0)	1
  :	:
  (143, 16)	1
  (144, 0)	1
  (144, 2)	1
  (144, 12)	1
  (144, 16)	1
  (145, 0)	1
  (145, 2)	1
  (145, 12)	1
  (145, 16)	1
  (146, 0)	1
  (146, 2)	1
  (146, 12)	1
  (146, 16)	1
  (147, 0)	1
  (147, 2)	1
  (147, 12)	1
  (147, 16)	1
  (148, 0)	1
  (148, 2)	1
  (148, 12)	1
  (148, 16)	1
  (149, 0)	1
  (149, 2)	1
  (149, 12)	1
  (149, 16)	1


ExecutableNotFound: failed to execute PosixPath('dot'), make sure the Graphviz executables are on your systems' PATH