In [1]:
# ðŸ“˜ Decision Tree and Random Forest Classification

# Step 1: Import libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 2: Load labelled dataset (Iris)
iris = load_iris()
X, y = iris.data, iris.target




In [2]:
# Step 3: Split into training and testing data (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 4: Train Decision Tree model (using Gini Impurity)
dt_model = DecisionTreeClassifier(criterion='gini', random_state=42)
dt_model.fit(X_train, y_train)

# Step 5: Train Random Forest model (ensemble of Decision Trees)
rf_model = RandomForestClassifier(n_estimators=100, criterion='gini', random_state=42)
rf_model.fit(X_train, y_train)

# Step 6: Predict and evaluate both models
y_pred_dt = dt_model.predict(X_test)
y_pred_rf = rf_model.predict(X_test)

# Step 7: Evaluate accuracy
acc_dt = accuracy_score(y_test, y_pred_dt)
acc_rf = accuracy_score(y_test, y_pred_rf)



In [3]:
print("âœ… Decision Tree Accuracy:", round(acc_dt*100, 2), "%")
print("âœ… Random Forest Accuracy:", round(acc_rf*100, 2), "%")



âœ… Decision Tree Accuracy: 100.0 %
âœ… Random Forest Accuracy: 100.0 %


In [4]:
# Step 8: Display confusion matrices
print("\nðŸ“Š Decision Tree Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_dt))




ðŸ“Š Decision Tree Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]


In [5]:
print("\nðŸ“Š Random Forest Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))




ðŸ“Š Random Forest Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]


In [6]:
# Step 9: Detailed classification report
print("\nðŸ“„ Classification Report (Decision Tree):")
print(classification_report(y_test, y_pred_dt, target_names=iris.target_names))




ðŸ“„ Classification Report (Decision Tree):
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [7]:
print("\nðŸ“„ Classification Report (Random Forest):")
print(classification_report(y_test, y_pred_rf, target_names=iris.target_names))


ðŸ“„ Classification Report (Random Forest):
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [11]:
# ðŸ“˜ Decision Tree vs Random Forest using CSV Dataset

# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 2: Load your dataset (replace filename with your actual file)
# Example: "retail_customers.csv" with columns like Age, Income, Spending, Category
data = pd.read_csv("CM_retail.csv")
print("ðŸ“Š Dataset Preview:")
print(data.head())

# Step 3: Separate features and target (label)
# Assuming last column is the label (change if needed)
X = data.iloc[:, :-1]  # features
y = data.iloc[:, -1]   # target

# Encode labels if they are categorical
if y.dtype == 'object':
    le = LabelEncoder()
    y = le.fit_transform(y)

# Step 4: Split dataset into training and test sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Train Decision Tree model (Gini Impurity)
dt_model = DecisionTreeClassifier(criterion='gini', random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

# Step 6: Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, criterion='gini', random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# Step 7: Evaluate both models
acc_dt = accuracy_score(y_test, y_pred_dt)
acc_rf = accuracy_score(y_test, y_pred_rf)

print("\nâœ… Model Accuracies:")
print(f"Decision Tree Accuracy: {acc_dt*100:.2f}%")
print(f"Random Forest Accuracy: {acc_rf*100:.2f}%")

print("\nðŸ“Š Decision Tree Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_dt))

print("\nðŸ“Š Random Forest Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))

print("\nðŸ“„ Classification Report (Decision Tree):")
print(classification_report(y_test, y_pred_dt))

print("\nðŸ“„ Classification Report (Random Forest):")
print(classification_report(y_test, y_pred_rf))


ðŸ“Š Dataset Preview:
   Age  Income  SpendingScore Category
0   22   30000             45      Low
1   25   35000             60   Medium
2   30   40000             75     High
3   35   50000             65     High
4   40   45000             55   Medium

âœ… Model Accuracies:
Decision Tree Accuracy: 50.00%
Random Forest Accuracy: 25.00%

ðŸ“Š Decision Tree Confusion Matrix:
[[1 0 0]
 [0 0 1]
 [1 0 1]]

ðŸ“Š Random Forest Confusion Matrix:
[[1 0 0]
 [0 0 1]
 [1 1 0]]

ðŸ“„ Classification Report (Decision Tree):
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1
           2       0.50      0.50      0.50         2

    accuracy                           0.50         4
   macro avg       0.33      0.50      0.39         4
weighted avg       0.38      0.50      0.42         4


ðŸ“„ Classification Report (Random Forest):
              precision    recall  f1-score   support

 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
