## Decision Tree Model:

In [1]:
import pandas as pd

# Loading the CSV files into pandas dataframes
features_df = pd.read_csv('features.csv')
targets_df = pd.read_csv('targets.csv')

# # Checking the first few rows of each dataframe to understand their structure
# features_head = features_df.head()
# targets_head = targets_df.head()

# features_head, targets_head

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix

In [3]:
# Merging the features and targets dataframes
data_df = pd.concat([features_df, targets_df], axis=1)

# Splitting the data into features (X) and target (y)
X = data_df.drop('Class', axis=1)
y = data_df['Class']

# Creating the decision tree classifier with max_leaf_nodes set to 17 to limit to 16 partitions
clf = DecisionTreeClassifier(criterion='gini', max_leaf_nodes=17, random_state=42)

# Performing 10-fold cross-validation
cv_scores = cross_val_score(clf, X, y, cv=10)

# Calculating the mean and standard deviation of the cross-validation scores
cv_mean = cv_scores.mean()
cv_std = cv_scores.std()

print(f"Cross-Validation Mean Accuracy: {cv_mean:.2f}")
print(f"Cross-Validation Standard Deviation: {cv_std:.2f}")

# Optionally: Train the classifier and generate a confusion matrix
clf.fit(X, y)
y_pred = clf.predict(X)
conf_matrix = confusion_matrix(y, y_pred)

# Assuming your class labels are as follows
class_labels = ['Barbunya', 'Bombay', 'Cali', 'Dermason', 'Horoz', 'Seker', 'Sira']
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_labels, columns=class_labels)

# Printing the confusion matrix
print(conf_matrix_df)


Cross-Validation Mean Accuracy: 0.77
Cross-Validation Standard Deviation: 0.17
          Barbunya  Bombay  Cali  Dermason  Horoz  Seker  Sira
Barbunya      1052       1   177         0     60      7    25
Bombay           0     515     7         0      0      0     0
Cali            56       0  1507         0     57      1     9
Dermason         0       0     0      3249      3     66   228
Horoz           25       0    24        14   1816      0    49
Seker            4       0     3        35      0   1891    94
Sira             3       0     1       324     65     35  2208


So our confusion matrix results look like:

| Class    | Barbunya | Bombay | Cali | Dermason | Horoz | Seker | Sira |
|----------|----------|--------|------|----------|-------|-------|------|
| Barbunya | 1052     | 1      | 177  | 0        | 60    | 7     | 25   |
| Bombay   | 0        | 515    | 7    | 0        | 0     | 0     | 0    |
| Cali     | 56       | 0      | 1507 | 0        | 57    | 1     | 9    |
| Dermason | 0        | 0      | 0    | 3249     | 3     | 66    | 228  |
| Horoz    | 25       | 0      | 24   | 14       | 1816  | 0     | 49   |
| Seker    | 4        | 0      | 3    | 35       | 0     | 1891  | 94   |
| Sira     | 3        | 0      | 1    | 324      | 65    | 35    | 2208 |


Confusion matrix in the paper:

| Actual/Predict | Barbunya | Bombay | Cali | Dermason | Horoz | Seker | Sira  |
|----------------|----------|--------|------|----------|-------|-------|-------|
| Barbunya       | 904      | 1      | 140  | 0        | 2     | 7     | 2     |
| Bombay         | 1        | 352    | 0    | 517      | 3     | 0     | 0     |
| Cali           | 0        | 1455   | 0    | 0        | 0     | 102   | 15    |
| Dermason       | 0        | 0      | 3209 | 0        | 0     | 0     | 62    |
| Horoz          | 0        | 28     | 258  | 1        | 1709  | 0     | 7     |
| Seker          | 12       | 0      | 1    | 73       | 1     | 1877  | 45    |
| Sira           | 50       | 0      | 10   | 263      | 99    | 81    | 2296  |


In [4]:
# # to visualize Dt graph
# !pip install graphviz

# # sudo apt-get update
# # sudo apt-get install graphviz


In [5]:
from sklearn.tree import export_graphviz
import graphviz

# Export as dot file
dot_data = export_graphviz(clf, out_file=None, 
                           feature_names=X.columns,  
                           class_names=y.unique(),
                           filled=True, rounded=True,
                           special_characters=True)  

# Use graphviz to create the graph
graph = graphviz.Source(dot_data)  

# To render the image, you can save it to a file or directly display it depending on your setup
graph.render("decision_tree")  # Saves the decision tree to a file "decision_tree.pdf"

'decision_tree.pdf'

In [6]:

# Creating the decision tree classifier with max_leaf_nodes set to 17 to limit to 16 partitions
clf = DecisionTreeClassifier(criterion='gini', max_leaf_nodes=10, random_state= 1442)

# Performing 10-fold cross-validation
cv_scores = cross_val_score(clf, X, y, cv=10)

# Calculating the mean and standard deviation of the cross-validation scores
cv_mean = cv_scores.mean()
cv_std = cv_scores.std()

print(f"Cross-Validation Mean Accuracy: {cv_mean:.2f}")
print(f"Cross-Validation Standard Deviation: {cv_std:.2f}")

# Optionally: Train the classifier and generate a confusion matrix
clf.fit(X, y)
y_pred = clf.predict(X)
conf_matrix = confusion_matrix(y, y_pred)

# Assuming your class labels are as follows
class_labels = ['Barbunya', 'Bombay', 'Cali', 'Dermason', 'Horoz', 'Seker', 'Sira']
conf_matrix_df = pd.DataFrame(conf_matrix, index=class_labels, columns=class_labels)

# Printing the confusion matrix
print(conf_matrix_df)

dot_data = export_graphviz(clf, out_file=None, 
                           feature_names=X.columns,  
                           class_names=y.unique(),
                           filled=True, rounded=True,
                           special_characters=True)  

# Use graphviz to create the graph
graph = graphviz.Source(dot_data)  

# To render the image, you can save it to a file or directly display it depending on your setup
graph.render("decision_tree_2")  # Saves the decision tree to a file "decision_tree.pdf"

Cross-Validation Mean Accuracy: 0.82
Cross-Validation Standard Deviation: 0.19
          Barbunya  Bombay  Cali  Dermason  Horoz  Seker  Sira
Barbunya      1118       1   139         0      3      5    56
Bombay           0     515     7         0      0      0     0
Cali           147       0  1457         0     16      1     9
Dermason         0       0     0      3270      1     41   234
Horoz           25       0    78        14   1712      0    99
Seker            7       0     0        99      0   1733   188
Sira             4       0    24       324      7     18  2259


'decision_tree_2.pdf'

### More detailed analysis: