## Import Necessary Libraries

In [1]:
import numpy as np 
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import classification_report



import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

## Read Datasets

Reading Raisin Dataset

In [2]:
raisin_data = pd.read_excel("Raisin_Dataset/Raisin_Dataset.xlsx",engine='openpyxl')
raisin_data.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
0,87524,442.246011,253.291155,0.819738,90546,0.758651,1184.04,Kecimen
1,75166,406.690687,243.032436,0.801805,78789,0.68413,1121.786,Kecimen
2,90856,442.267048,266.328318,0.798354,93717,0.637613,1208.575,Kecimen
3,45928,286.540559,208.760042,0.684989,47336,0.699599,844.162,Kecimen
4,79408,352.19077,290.827533,0.564011,81463,0.792772,1073.251,Kecimen


In [3]:
raisin_data.shape

(900, 8)

In [4]:
raisin_data.columns

Index(['Area', 'MajorAxisLength', 'MinorAxisLength', 'Eccentricity',
       'ConvexArea', 'Extent', 'Perimeter', 'Class'],
      dtype='object')

In [5]:
raisin_data.isnull().sum()

Area               0
MajorAxisLength    0
MinorAxisLength    0
Eccentricity       0
ConvexArea         0
Extent             0
Perimeter          0
Class              0
dtype: int64

In [6]:
raisin_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Area             900 non-null    int64  
 1   MajorAxisLength  900 non-null    float64
 2   MinorAxisLength  900 non-null    float64
 3   Eccentricity     900 non-null    float64
 4   ConvexArea       900 non-null    int64  
 5   Extent           900 non-null    float64
 6   Perimeter        900 non-null    float64
 7   Class            900 non-null    object 
dtypes: float64(5), int64(2), object(1)
memory usage: 56.4+ KB


In [7]:
label_encoder = LabelEncoder()
raisin_data['Class'] = label_encoder.fit_transform(raisin_data['Class'])
raisin_data.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
0,87524,442.246011,253.291155,0.819738,90546,0.758651,1184.04,1
1,75166,406.690687,243.032436,0.801805,78789,0.68413,1121.786,1
2,90856,442.267048,266.328318,0.798354,93717,0.637613,1208.575,1
3,45928,286.540559,208.760042,0.684989,47336,0.699599,844.162,1
4,79408,352.19077,290.827533,0.564011,81463,0.792772,1073.251,1


In [8]:
raisin_data.describe()


Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
count,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0
mean,87804.127778,430.92995,254.488133,0.781542,91186.09,0.699508,1165.906636,0.5
std,39002.11139,116.035121,49.988902,0.090318,40769.290132,0.053468,273.764315,0.500278
min,25387.0,225.629541,143.710872,0.34873,26139.0,0.379856,619.074,0.0
25%,59348.0,345.442898,219.111126,0.741766,61513.25,0.670869,966.41075,0.0
50%,78902.0,407.803951,247.848409,0.798846,81651.0,0.707367,1119.509,0.5
75%,105028.25,494.187014,279.888575,0.842571,108375.75,0.734991,1308.38975,1.0
max,235047.0,997.291941,492.275279,0.962124,278217.0,0.835455,2697.753,1.0


In [9]:
raisin_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Area             900 non-null    int64  
 1   MajorAxisLength  900 non-null    float64
 2   MinorAxisLength  900 non-null    float64
 3   Eccentricity     900 non-null    float64
 4   ConvexArea       900 non-null    int64  
 5   Extent           900 non-null    float64
 6   Perimeter        900 non-null    float64
 7   Class            900 non-null    int64  
dtypes: float64(5), int64(3)
memory usage: 56.4 KB


In [10]:
# Report perfomance

def ReportPerformance(y_test, y_pred):
    report = classification_report(y_test, y_pred,zero_division=0)
    print(report)

In [11]:
# Print Accuracy 

def ViewAccuracy(y_test, y_pred):
    print("Accuracy : %",accuracy_score(y_test, y_pred) * 100)

## Split the data into training and testing sets


In [12]:
# Split the data into training and testing sets

data = raisin_data.copy()

x = raisin_data.drop("Class", axis = 1)
y = raisin_data['Class']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

## Part2

In [13]:
mlp1 = MLPClassifier(hidden_layer_sizes=(10,), max_iter=100)

mlp1.fit(X_train, y_train)

predictions1 = mlp1.predict(X_test)

ViewAccuracy(y_test, predictions1)
ReportPerformance(y_test=y_test,y_pred=predictions1)


Accuracy : % 53.888888888888886
              precision    recall  f1-score   support

           0       0.54      1.00      0.70        97
           1       0.00      0.00      0.00        83

    accuracy                           0.54       180
   macro avg       0.27      0.50      0.35       180
weighted avg       0.29      0.54      0.38       180



In [14]:
mlp2 = MLPClassifier(hidden_layer_sizes=(20, 10), max_iter=100)

mlp2.fit(X_train, y_train)

predictions2 = mlp2.predict(X_test)

ViewAccuracy(y_test, predictions2)
ReportPerformance(y_test=y_test,y_pred=predictions2)

Accuracy : % 63.888888888888886
              precision    recall  f1-score   support

           0       0.61      0.89      0.73        97
           1       0.72      0.35      0.47        83

    accuracy                           0.64       180
   macro avg       0.67      0.62      0.60       180
weighted avg       0.67      0.64      0.61       180



In [15]:
mlp3 = MLPClassifier(hidden_layer_sizes=(30, 20, 10), max_iter=100)

mlp3.fit(X_train, y_train)

predictions3 = mlp3.predict(X_test)

ViewAccuracy(y_test, predictions3)
ReportPerformance(y_test=y_test,y_pred=predictions3)

Accuracy : % 53.888888888888886
              precision    recall  f1-score   support

           0       0.54      1.00      0.70        97
           1       0.00      0.00      0.00        83

    accuracy                           0.54       180
   macro avg       0.27      0.50      0.35       180
weighted avg       0.29      0.54      0.38       180



## Part3

In [16]:
class CustomizedMLPClassifer(MLPClassifier):
    def resample_with_replacement(self, X_train, y_train, sample_weight):

        sample_weight = sample_weight / sample_weight.sum(dtype=np.float64)

        X_train_resampled = np.zeros((len(X_train), len(X_train[0])), dtype=np.float32)
        y_train_resampled = np.zeros((len(y_train)), dtype=np.int)
        for i in range(len(X_train)):
            draw = np.random.choice(np.arange(len(X_train)), p=sample_weight)
            X_train_resampled[i] = X_train[draw]
            y_train_resampled[i] = y_train[draw]

        return X_train_resampled, y_train_resampled


    def fit(self, X, y, sample_weight=None):
        if sample_weight is not None:
            X, y = self.resample_with_replacement(X, y, sample_weight)

        return self._fit(X, y, incremental=(self.warm_start and hasattr(self, "classes_")))




In [17]:
adabooster = AdaBoostClassifier(base_estimator=CustomizedMLPClassifer())

adabooster.fit(X_train,y_train)

predictions = adabooster.predict(X_test)

ViewAccuracy(y_test, predictions)
ReportPerformance(y_test=y_test,y_pred=predictions)

Accuracy : % 53.888888888888886
              precision    recall  f1-score   support

           0       0.54      1.00      0.70        97
           1       0.00      0.00      0.00        83

    accuracy                           0.54       180
   macro avg       0.27      0.50      0.35       180
weighted avg       0.29      0.54      0.38       180



## Part 4

Custom decision tree node with a trainable perceptron

In [18]:
class DecisionTreeNode:
    def __init__(self):
        self.perceptron = MLPClassifier(hidden_layer_sizes=(10,),  activation='relu', max_iter=100)
    
    def train(self, X, y):
        self.perceptron.fit(X, y)

    def predict(self, X):
        return self.perceptron.predict(X)

Random decision forest with trainable perceptrons

In [19]:
class RandomForestWithPerceptrons:
    def __init__(self, num_trees):
        self.num_trees = num_trees
        self.trees = []
    
    def fit(self, X, y):
        for _ in range(self.num_trees):
            tree = DecisionTreeNode()
            tree.train(X, y)
            self.trees.append(tree)
    
    def predict(self, X):
        predictions = []
        for tree in self.trees:
            predictions.append(tree.predict(X))
        return np.array(predictions).mean(axis=0)

In [25]:
forest = RandomForestWithPerceptrons(num_trees=10)

forest.fit(X_train, y_train)

predictions = forest.predict(X_test)


rounded_predictions = np.round(predictions)


y_pred_labels = rounded_predictions.astype(int)

ViewAccuracy(y_test=y_test,y_pred=y_pred_labels)
ReportPerformance(y_test=y_test,y_pred=y_pred_labels)

Accuracy : % 68.33333333333333
              precision    recall  f1-score   support

           0       0.79      0.56      0.65        97
           1       0.62      0.83      0.71        83

    accuracy                           0.68       180
   macro avg       0.71      0.69      0.68       180
weighted avg       0.71      0.68      0.68       180

