In [64]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [65]:
# loading the datasets to memory
krk_dfs = [pd.read_csv('550-p1-cset-krk-{}.csv'.format(_+1), names=['col1','col2','col3','col4','col5','col6','class_type']) for _ in range(2)]
# krk_df_1, krk_df_2 = pd.read_csv('550-p1-cset-krk-1.csv'), pd.read_csv('550-p1-cset-krk-2.csv')

In [66]:

int_classes = {
    'draw' : -1 ,
    'zero' : 0,
    'one' : 1,
    'two':2,
    'three':3,
    'four':4,
    'five':5,
    'six':6,
    'seven':7,
    'eight':8,
    'nine':9,
    
    'a': 0,
    'b': 1,
    'c': 2,
    'd': 3,
    'e': 4,
    'f': 5,
    'g': 6,
    'h': 7,
}
for df in krk_dfs:
    df.col1 = [int_classes[_] for _ in df.col1]
    df.col3 = [int_classes[_] for _ in df.col3]
    df.col5 = [int_classes[_] for _ in df.col5]
    df.class_type = [int_classes[_] for _ in df.class_type]
# dataset.class_type = [Class_Type_in_integer[item] for item in dataset.class_type] 

krk_dfs[0]

Unnamed: 0,col1,col2,col3,col4,col5,col6,class_type
0,3,1,6,3,7,2,-1
1,2,1,4,2,3,3,-1
2,1,1,3,7,2,7,-1
3,3,2,6,1,6,2,-1
4,3,4,6,6,5,6,-1
...,...,...,...,...,...,...,...
215,3,3,3,7,0,7,9
216,1,1,1,4,6,1,9
217,3,4,4,8,6,2,9
218,2,2,3,5,5,1,9


In [67]:
krk_dfs[1]

Unnamed: 0,col1,col2,col3,col4,col5,col6,class_type
0,3,1,5,3,4,4,-1
1,0,1,5,3,6,3,-1
2,2,2,1,8,1,7,-1
3,3,1,6,4,7,3,-1
4,1,1,6,7,7,8,-1
...,...,...,...,...,...,...,...
215,2,3,5,6,7,1,9
216,3,3,0,8,5,3,9
217,2,2,3,7,7,8,9
218,2,2,7,4,4,3,9


In [68]:
validation_df = krk_dfs[0].iloc[len(krk_dfs[0])-1:, 0:-1]

In [69]:
validation_df

Unnamed: 0,col1,col2,col3,col4,col5,col6
219,3,4,2,6,4,1


In [70]:
X = krk_dfs[0].iloc[:, 0:6]  # selecting features

In [71]:
X

Unnamed: 0,col1,col2,col3,col4,col5,col6
0,3,1,6,3,7,2
1,2,1,4,2,3,3
2,1,1,3,7,2,7
3,3,2,6,1,6,2
4,3,4,6,6,5,6
...,...,...,...,...,...,...
215,3,3,3,7,0,7
216,1,1,1,4,6,1
217,3,4,4,8,6,2
218,2,2,3,5,5,1


In [72]:
y = krk_dfs[0].iloc[:, 6]  # just the target

In [73]:
y

0     -1
1     -1
2     -1
3     -1
4     -1
      ..
215    9
216    9
217    9
218    9
219    9
Name: class_type, Length: 220, dtype: int64

In [74]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, test_size=0.2)  # test_size 0.2 means 20% testing data

In [75]:
classifier = DecisionTreeClassifier()

In [76]:
classifier.fit(X_train, y_train)

DecisionTreeClassifier()

In [77]:
y_pred = classifier.predict(X_test)

In [78]:
confusion_matrix(y_test, y_pred)

array([[5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
       [0, 0, 4, 0, 1, 0, 0, 0, 0, 2, 0],
       [0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1],
       [1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1],
       [1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2]])

In [82]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

          -1       0.62      0.83      0.71         6
           0       0.00      0.00      0.00         2
           1       0.80      0.57      0.67         7
           2       0.50      0.67      0.57         3
           3       0.60      0.60      0.60         5
           4       0.50      0.20      0.29         5
           5       0.00      0.00      0.00         1
           6       0.50      1.00      0.67         1
           7       0.40      0.50      0.44         4
           8       0.33      0.20      0.25         5
           9       0.50      0.40      0.44         5

    accuracy                           0.48        44
   macro avg       0.43      0.45      0.42        44
weighted avg       0.51      0.48      0.48        44

