In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sb

from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score 
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from xgboost import XGBClassifier

## Load the dataset

In [None]:
excel_file = 'Rice_Cammeo_Osmancik.xlsx'
raw_data = pd.read_excel(excel_file)

input_features = ['Area', 'Perimeter', 'Major_Axis_Length', 'Minor_Axis_Length', 'Eccentricity', 'Convex_Area', 'Extent']
output_feature = ['Class']
input_data = raw_data[input_features]
output_data = raw_data[output_feature]

In [None]:
numeric_features = input_data.select_dtypes('number').columns

numeric_cols = [col for col in input_data.columns if col in numeric_features]

preprocessor= ColumnTransformer(
    transformers=[('num', StandardScaler(), numeric_cols)])

input_data = preprocessor.fit_transform(dataset)

In [None]:
encoder = LabelEncoder()
output_data = output_data.copy()
output_data['encoded_rice'] = encoder.fit_transform(output_data['Class'])

In [None]:
x_train, x_test, y_train, y_test = train_test_split(input_data, output_data['encoded_rice'], test_size = 0.2, random_state = 66, stratify=output_data['encoded_rice'])
print('Training data shape:' , x_train.shape)
print('Testing data shape:' , x_test.shape)

In [None]:
rfr =  RandomForestClassifier(n_estimators=100)
rfr.fit(x_train, y_train)

rfr_pred_class = rfr.predict(x_test)

accuracy = accuracy_score(y_test, rfr_pred_class)*100
print(accuracy)



In [None]:
num_classes = len(np.unique(y_train))
xgb = XGBClassifier(
    n_estimators=100,
    use_label_encoder=False,
    objective='binary:logistic',
    num_class=num_classes
)
xgb.fit(x_train, y_train)

xgb_pred_class = xgb_rp.predict(x_test)

accuracy = accuracy_score(y_test, xgb_pred_class)*100
print(accuracy)

In [None]:
knn = KNeighborsClassifier(n_neighbors = 5, weights = 'distance')
knn.fit(x_train, y_train)

knn_pred_class = knn.predict(x_test)

accuracy = accuracy_score(y_test, knn_pred_class)*100
print(accuracy)

In [None]:
lr = LogisticRegression(class_weight='balanced', max_iter=1000, solver='lbfgs')
lr.fit(x_train, y_train)

lr_pred_class = lr.predict(x_test)
accuracy = accuracy_score(y_test, lr_pred_class)*100
print(accuracy)

In [None]:
svc = SVC(kernel='rbf', probability=True, class_weight='balanced')
svc.fit(x_train, y_train)

svc_pred_class = svc.predict(x_test)
accuracy = accuracy_score(y_test, svc_pred_class)*100
print(accuracy)