# Some Imports

In [1]:
import pandas as pd
import numpy as np
import warnings

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import BaggingClassifier
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import Perceptron

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)

# Part I : Select a dataset

In [2]:
# target_url = ("https://archive.ics.uci.edu/dataset/545/rice+cammeo+and+osmancik") # You have to download and extract
target_url = 'docs/Rice_Cammeo_Osmancik.csv'  # if you work local dataset, faster than a link
rice_df = pd.read_csv(target_url)
rice_df.describe()

Unnamed: 0,Area,Perimeter,Major_Axis_Length,Minor_Axis_Length,Eccentricity,Convex_Area,Extent
count,3810.0,3810.0,3810.0,3810.0,3810.0,3810.0,3810.0
mean,12667.727559,454.23918,188.776222,86.31375,0.886871,12952.49685,0.661934
std,1732.367706,35.597081,17.448679,5.729817,0.020818,1776.972042,0.077239
min,7551.0,359.100006,145.264465,59.532406,0.777233,7723.0,0.497413
25%,11370.5,426.144753,174.353855,82.731695,0.872402,11626.25,0.598862
50%,12421.5,448.852493,185.810059,86.434647,0.88905,12706.5,0.645361
75%,13950.0,483.683746,203.550438,90.143677,0.902588,14284.0,0.726562
max,18913.0,548.445984,239.010498,107.54245,0.948007,19099.0,0.86105


In [3]:
X, y = rice_df.drop(columns='Class'), rice_df['Class']
y = y.map({'Cammeo': 0, 'Osmancik': 1})  # Encode labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Split
print(X_train.shape, X_test.shape)

(3048, 7) (762, 7)


In [4]:
scaler = StandardScaler()  # Standardize
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Part II: Train a multi-layer perceptron

In [5]:
# Only MLP
mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
mlp_classifier.fit(X_train, y_train)
y_pred_mlp = mlp_classifier.predict(X_test)

print("Only MLP Classifier Accuracy:", accuracy_score(y_test, y_pred_mlp))
print("Classification Report for Only MLP Classifier:\n", classification_report(y_test, y_pred_mlp))
print("\n")

#AdaBoost With MLP estimator
ada_boost = BaggingClassifier(base_estimator=mlp_classifier, n_estimators=50, random_state=42)
ada_boost.fit(X_train, y_train)
y_pred_ada_boost = ada_boost.predict(X_test)

print("AdaBoost with MLP Classifier Accuracy:", accuracy_score(y_test, y_pred_ada_boost))
print("Classification Report for AdaBoost with MLP Classifier:\n", classification_report(y_test, y_pred_ada_boost))

Only MLP Classifier Accuracy: 0.9291338582677166
Classification Report for Only MLP Classifier:
               precision    recall  f1-score   support

           0       0.93      0.92      0.92       350
           1       0.93      0.94      0.93       412

    accuracy                           0.93       762
   macro avg       0.93      0.93      0.93       762
weighted avg       0.93      0.93      0.93       762



AdaBoost with MLP Classifier Accuracy: 0.926509186351706
Classification Report for AdaBoost with MLP Classifier:
               precision    recall  f1-score   support

           0       0.92      0.92      0.92       350
           1       0.93      0.93      0.93       412

    accuracy                           0.93       762
   macro avg       0.93      0.93      0.93       762
weighted avg       0.93      0.93      0.93       762



## Part III: Train a random decision forest with perceptron

In [6]:
# Custom decision forest tree with each node uses a Perceptron
class PerceptronDecisionForest(BaseEstimator, ClassifierMixin):
    def __init__(self, max_depth=3, min_samples_split=2):
        self.maximum_depth = max_depth
        self.minimum_samples_split = min_samples_split
        self.tree_ = None

    def fit(self, X, y):
        self.tree_ = self._fit(X, y, depth=0)
        return self

    def _fit(self, X, y, depth):
        if len(set(y)) == 1 or len(y) < self.minimum_samples_split or depth == self.maximum_depth:
            return np.argmax(np.bincount(y))

        perceptron = Perceptron()
        perceptron.fit(X, y)

        y_pred = perceptron.predict(X)
        left_mask = y_pred == 0
        right_mask = ~left_mask

        if np.sum(left_mask) == 0 or np.sum(right_mask) == 0:
            return np.argmax(np.bincount(y))

        node = {
            'perceptron': perceptron,
            'left': self._fit(X[left_mask], y[left_mask], depth + 1),
            'right': self._fit(X[right_mask], y[right_mask], depth + 1)
        }
        return node

    def predict(self, X):
        return np.array([self._predict_one(x, self.tree_) for x in X])

    def _predict_one(self, x, node):
        if isinstance(node, dict):
            perceptron = node['perceptron']
            y_pred = perceptron.predict([x])[0]
            if y_pred == 0:
                return self._predict_one(x, node['left'])
            else:
                return self._predict_one(x, node['right'])
        else:
            return node

In [8]:
# Perceptron Decision Forest Tree
perceptron_decision_forest = PerceptronDecisionForest(max_depth=5, min_samples_split=10)
perceptron_decision_forest.fit(X_train, y_train)
y_pred_perceptron_dt = perceptron_decision_forest.predict(X_test)

print("Random Decision Forest With Perceptron Classifier Accuracy:", accuracy_score(y_test, y_pred_perceptron_dt))
print("Classification Report for Random Decision Forest With Perceptron Classifier:\n", classification_report(y_test, y_pred_perceptron_dt))

Random Decision Forest With Perceptron Classifier Accuracy: 0.9238845144356955
Classification Report for Random Decision Forest With Perceptron Classifier:
               precision    recall  f1-score   support

           0       0.93      0.91      0.92       350
           1       0.92      0.94      0.93       412

    accuracy                           0.92       762
   macro avg       0.92      0.92      0.92       762
weighted avg       0.92      0.92      0.92       762

