In [1]:
import pandas as pd
from IPython.display import display_html
from sklearn import tree
from sklearn.tree import export_graphviz
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [2]:

def toy_dataset():
    animal = [['human',1, 1, 0, 0,1,0,'mammals'],['python',0,0,0,0,0,1,'reptiles'],
              ['salmon',0,0,1,0,0,0,'fishes'],['whale',1,1,1,0,0,0,'mammals'], 
              ['frog',0,0,1,0,1,1,'amphibians'],['komodo',0,0,0,0,1,0,'reptiles'], 
              ['bat',1,1,0,1,1,1,'mammals'],['pigeon',1,0,0,1,1,0,'birds'], 
              ['cat',1,1,0,0,1,0,'mammals'],['leopard shark',0,1,1,0,0,0,'fishes'], 
              ['turtle',0,0,1,0,1,0,'reptiles'],['penguin',1,0,1,0,1,0,'birds'], 
              ['porcupine',1,1,0,0,1,1,'mammals'],['eel',0,0,1,0,0,0,'fishes'], 
              ['salamander',0,0,1,0,1,1,'amphibians']]
    titles=['Name','Warm_blooded','Give_birth','Aquatic_creature','Aerial_creature','Has_legs','Hibernates','Class']
    data = pd.DataFrame(animal,columns=titles)
    data['Class'] = data['Class'].replace(['fishes','birds','amphibians','reptiles'],'non-mammals') 
    print("Do you want to view data?")
    choice=input()
    if choice=='yes':
        display_html(data)
    return data

In [3]:
def build_model(data):
    Y = data['Class']
    X = data.drop(['Name', 'Class'],axis=1)
    clf = tree.DecisionTreeClassifier(criterion='entropy',max_depth=3)
    clf = clf.fit(X, Y)
    return clf

In [4]:
def prediction_using_model(clf):
    testData = [['gila monster',0,0,0,0,1,1,'non-mammals'],
                ['platypus',1,0,0,0,1,1,'mammals'],
                ['dolphin',1,1,1,0,0,0,'mammals'],
                ['owl',1,0,0,1,1,0,'non-mammals']]
    titles=['Name','Warm_blooded','Give_birth','Aquatic_creature','Aerial_creature','Has_legs','Hibernates', 'Class'] 
    testData = pd.DataFrame(testData, columns=titles)
    print("Do you want to view test data?")
    choice=input()
    if choice=='yes':
        display_html(testData)
    #Splitting test data
    y_test= testData['Class']
    x_test = testData.drop(['Name', 'Class'],axis=1) 
    y_pred = clf.predict(x_test)
    predictions = pd.concat([testData['Name'],pd.Series(y_pred,name='Predicted Class')], axis=1)
    print("Prediction for your test data is:")
    display_html(predictions)
    #Model evaluation
    print("Do you want to view Evaluation of model?")
    choice=input() 
    if choice=='yes':
        model_evaluation(y_pred,y_test)
    else:
        quit()


In [5]:
def model_evaluation(y_pred,y_test):
    print("Confusion Matrix:")
    report=(confusion_matrix(y_test, y_pred))
    cf=pd.DataFrame (report).transpose()
    display_html(cf)
    score = accuracy_score(y_test,y_pred) 
    print('Decision Tree Accuracy :',score) 
    print("Classification report:")
    report=(classification_report(y_test, y_pred, output_dict=True))
    df = pd.DataFrame(report).transpose()
    display_html(df[['precision', 'recall','f1-score']].head (2))


In [6]:
def main():
    data=toy_dataset()
    model= build_model(data)
    # to visualize tree install both graphviz and pydotplus
    dot_data=tree.export_graphviz(model,'tree.dot',class_names=True)
    ! "C:\Program Files\Graphviz\bin\dot.exe" -Tpng tree.dot -o tree.png
    print("Your decision tree constructed successfully, check the current directory for tree.png") 
    prediction_using_model(model)
main()


Do you want to view data?
yes


Unnamed: 0,Name,Warm_blooded,Give_birth,Aquatic_creature,Aerial_creature,Has_legs,Hibernates,Class
0,human,1,1,0,0,1,0,mammals
1,python,0,0,0,0,0,1,non-mammals
2,salmon,0,0,1,0,0,0,non-mammals
3,whale,1,1,1,0,0,0,mammals
4,frog,0,0,1,0,1,1,non-mammals
5,komodo,0,0,0,0,1,0,non-mammals
6,bat,1,1,0,1,1,1,mammals
7,pigeon,1,0,0,1,1,0,non-mammals
8,cat,1,1,0,0,1,0,mammals
9,leopard shark,0,1,1,0,0,0,non-mammals


Your decision tree constructed successfully, check the current directory for tree.png
Do you want to view test data?
yes


Unnamed: 0,Name,Warm_blooded,Give_birth,Aquatic_creature,Aerial_creature,Has_legs,Hibernates,Class
0,gila monster,0,0,0,0,1,1,non-mammals
1,platypus,1,0,0,0,1,1,mammals
2,dolphin,1,1,1,0,0,0,mammals
3,owl,1,0,0,1,1,0,non-mammals


Prediction for your test data is:


Unnamed: 0,Name,Predicted Class
0,gila monster,non-mammals
1,platypus,non-mammals
2,dolphin,mammals
3,owl,non-mammals


Do you want to view Evaluation of model?
yes
Confusion Matrix:


Unnamed: 0,0,1
0,1,0
1,1,2


Decision Tree Accuracy : 0.75
Classification report:


Unnamed: 0,precision,recall,f1-score
mammals,1.0,0.5,0.666667
non-mammals,0.666667,1.0,0.8
