# Decision Tree Practice

## Import packages

In [1]:
import pandas as pd
import numpy as np
import random
import csv
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
import pydotplus
from sklearn.externals.six import StringIO   

## Data Generation
Create data of 100 basketball teams.<br>
Each of them has 5 attributes(affecting whether the team can make the playoffs):<br>
- 例行賽勝率(win_rate)
- 例行賽平均得分(win_pts)
- 例行賽平均失分(lose_pts)
- 例行賽平均洋將得分(foreign_pts)
- 中華隊球員數(CT_player)

## Data Rules
！[Rules](57364707_2137257826565002_2895220484817813504_n.jpg)

In [2]:
def enter_playoff(data):
    if data[0] > 0.6:
        if data[1] > 85:
            if data[2] < 80:
                return 1
            elif data[3] > 35:
                return 1
            else:
                return 0
        elif data[2] < 80:
            if data[3] > 40:
                return 1
            else:
                return 0
        else:
            return 0
    elif data[2] < 83:
        if data[3] > 30:
            if data[4] > 3:
                return 1
            else:
                return 0
        else:
            return 0
    else:
        return 0

def create_data(writer):
    win_rate = round(random.uniform(0.3, 0.7), 2)
    win_pts = round(random.uniform(80, 90), 2)
    lose_pts = round(random.uniform(82, 90), 2)
    foreign_pts = round(random.uniform(25, 45), 2)
    CT_player = round(random.randint(0, 5), 2)
    row = [win_rate, win_pts, lose_pts, foreign_pts, CT_player]
    enter = enter_playoff(row)
    row.append(enter)
    writer.writerow(row)

with open('team.csv', 'w') as csvfile:
    filewriter = csv.writer(csvfile, delimiter=',')
    filewriter.writerow(['勝率', '得分', '失分', '洋將得分', '中華隊球員', '進季後賽'])
    for i in range(1000):
        create_data(filewriter)

In [3]:
data = pd.read_csv('team.csv')
data.head()

Unnamed: 0,勝率,得分,失分,洋將得分,中華隊球員,進季後賽
0,0.49,82.78,82.69,26.29,5,0
1,0.52,85.13,83.99,25.08,3,0
2,0.46,84.36,85.95,33.38,0,0
3,0.61,81.46,87.45,31.62,0,0
4,0.46,88.64,86.87,41.62,1,0


In [4]:
answer = data['進季後賽'].values
data = data.drop('進季後賽', 1)

In [5]:
train_x = data[:700]
test_x = data[700:]
train_y = answer[:700]
test_y = answer[700:]

In [6]:
dtree=DecisionTreeClassifier(max_depth=4)
dtree.fit(train_x,train_y)

dot_data = StringIO()
export_graphviz(dtree, 
                out_file=dot_data,  
                filled=True, 
                feature_names=list(data),
                class_names=['Not Enter','Enter'],
                special_characters=True)

graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
graph.write_pdf("tree.pdf")

True

In [7]:
y_predict = dtree.predict(test_x)

In [8]:
from sklearn.metrics import accuracy_score

accuracy_score(test_y, y_predict)

0.9933333333333333