# Decision Tree 

The [iris](https://archive.ics.uci.edu/ml/datasets/Iris) dataset was used to run this decision.

---

## Importing Data

In [19]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score as accuracy
from sklearn import tree
import graphviz 

In [20]:
names = ["Sepal Width", "Sepal Length", "Petal Width", "Petal Length", "Flower Type"]
irisData = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", names = names)

X = irisData[["Sepal Width", "Sepal Length"]]
Y = irisData["Flower Type"]
irisData.head()

Unnamed: 0,Sepal Width,Sepal Length,Petal Width,Petal Length,Flower Type
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [21]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((105, 2), (45, 2), (105,), (45,))

### Creating Decision Tree

In [29]:
clf = DecisionTreeClassifier()
clf = clf.fit(X_train, Y_train)

predict = clf.predict(X_test)
predict

array(['Iris-setosa', 'Iris-versicolor', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-versicolor', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-versicolor',
       'Iris-virginica', 'Iris-versicolor', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-setosa',
       'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor',
       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-setosa', 'Iris-virginica', 'Iris-setosa',
       'Iris-virginica', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-virginica', 'Iris-setosa',
       'Iris-versicolor', 'Iris-virginica', 'Iris-virginica',
       'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [None]:
dot_data = tree.export_graphviz(clf, out_file=None) 
graph = graphviz.Source(dot_data) 
graph.render("iris") 

In [None]:
dot_data = tree.export_graphviz(clf, out_file=None, 
                     feature_names=X.columns,  
                     class_names=Y.columns,  
                     filled=True, rounded=True,  
                     special_characters=True)  
graph = graphviz.Source(dot_data)  
graph 


#### Varying Parameters

In [33]:
score_list = []
for x in range(1, 100, 1):
    for y in range(2, 100):
        clf = tree.DecisionTreeClassifier(max_depth = x, min_samples_split = y)
        clf.fit(X_train, Y_train)
        score = clf.score(X_test, Y_test)
        score_list.append([x, y, score])
        y += 0.1

temp_score = []
temp_depth = []
temp_split = []
for lis in score_list:
    depth = lis[0]
    split = lis[1]
    score = lis[2]
    temp_score.append(score)
    temp_depth.append(depth)
    temp_split.append(split)

max_score_index = temp_score.index(max(temp_score))
max_depth = temp_depth[max_score_index]
max_split = temp_split[max_score_index]
max_score = temp_score[max_score_index]

print("The best max depth is: " + str(max_depth))
print("The best min split is: " + str(max_split))
print("The best score is: " + str(max_score))

The best max depth is: 3
The best min split is: 2
The best score is: 0.7555555555555555


## Statement of Collaboration

#### a. Whom I worked with:
I mainly did this one by myself, but I did get a little help from Kobly, Matt, Tucker and you. 
#### b. Resources Used:
The main resource I used for this assignment was the decision tree page on sklearn.      
https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html