In [5]:

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Generate a synthetic dataset
data = []

for _ in range(3000):  # Generate 3000 samples
    a = np.random.choice([0, 1])
    b = np.random.choice([0, 1])
    c = np.random.choice([0, 1])
    c1 = np.random.choice([0, 1])
    c2 = np.random.choice([0, 1])
    c3 = np.random.choice([0, 1])
    d = np.random.choice([0, 1])
    d1 = np.random.choice([0, 1])
    d2 = np.random.choice([0, 1])
    e = np.random.choice([0, 1])

    # Rule logic to determine the message
    if  a == 0 and b == 1:
        result = 'M2'
    elif b == 0 and c == 1:
        result = 'M3'
    elif c == 0 and c1 == 0:
        result = 'M4'
    elif c1 == 1 and c2 == 0:
        result = 'M5'
    elif c2 == 1 and c3 == 0:
        result = 'M6'
    elif c3 == 1 and d == 0:
        result = 'M7'
    elif d == 1 and d1 == 0:
        result = 'M8'
    elif d1 == 1 and d2 == 1:
        result = 'M9'
    elif d2 == 0 and e == 1:
        result = 'M10'
    elif e == 0:
        result = 'M11'
    else:
        result = 'M1'  # Add additional rules if needed

    # Store data
    data.append([a, b, c, c1, c2, c3, d, d1, d2, e, result])

# Create DataFrame
df = pd.DataFrame(data, columns=['a', 'b', 'c', 'c1', 'c2', 'c3', 'd', 'd1', 'd2', 'e', 'Result'])

# Save the DataFrame to Excel file
df.to_excel('synthetic_dataset.xlsx', index=False)

# Separate features and labels
X = df.drop(columns=['Result'])  # Features
y = df['Result']  # Labels

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create decision tree model
tree_model = DecisionTreeClassifier()

# Train the model
tree_model.fit(X_train, y_train)

# Make predictions
y_pred = tree_model.predict(X_test)

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Compute accuracy
accuracy = accuracy_score(y_test, y_pred)

# Generate classification report
report = classification_report(y_test, y_pred)

# Display metrics
print("Confusion Matrix:")
print(conf_matrix)
print("\nAccuracy:", accuracy)
print("\nClassification Report:")
print(report)

# Perform 10-fold cross-validation
cv_scores = cross_val_score(tree_model, X, y, cv=10)

print("\nCross-validation scores:", cv_scores)
print("Mean cross-validation accuracy:", cv_scores.mean())


Confusion Matrix:
[[  3   0   0   0   0   0   0   0   0   0   0]
 [  0  13   0   0   0   0   0   0   0   0   0]
 [  0   0   5   0   0   0   0   0   0   0   0]
 [  0   0   0 217   0   0   0   0   0   0   0]
 [  0   0   0   0 231   0   0   0   0   0   0]
 [  0   0   0   0   0 178   0   0   0   0   0]
 [  0   0   0   0   0   0 116   0   0   0   0]
 [  0   0   0   0   0   0   0  65   0   0   0]
 [  0   0   0   0   0   0   0   0  40   0   0]
 [  0   0   0   0   0   0   0   0   0  24   0]
 [  0   0   0   0   0   0   0   0   0   0   8]]

Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

          M1       1.00      1.00      1.00         3
         M10       1.00      1.00      1.00        13
         M11       1.00      1.00      1.00         5
          M2       1.00      1.00      1.00       217
          M3       1.00      1.00      1.00       231
          M4       1.00      1.00      1.00       178
          M5       1.00      1.00      1.00   

