In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_score, recall_score

In [6]:
df = pd.read_csv("Dry_Bean_Dataset.csv")

In [7]:
df.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724,SEKER
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.272751,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843,SEKER
2,29380,624.11,212.82613,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066,SEKER
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199,SEKER
4,30140,620.134,201.847882,190.279279,1.060798,0.33368,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166,SEKER


In [8]:
count = df['Class'].value_counts()
print(count)

Class
DERMASON    3546
SIRA        2636
SEKER       2027
HOROZ       1928
CALI        1630
BARBUNYA    1322
BOMBAY       522
Name: count, dtype: int64


In [9]:
X = df.drop('Class', axis=1)
Y = df['Class']
X=df.drop('Class',axis=1)
Y=df['class']

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.20, random_state= 0)

In [11]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
import pickle

In [13]:
classifier = LogisticRegression(random_state = 0, solver = 'lbfgs', multi_class='auto', max_iter=1000)
classifier.fit(X_train, Y_train)

In [14]:
Y_pred = classifier.predict(X_test)
probs_Y = classifier.predict_proba(X_test) 
probs_Y = np.round(probs_Y, 2)

In [16]:
cm = confusion_matrix(Y_test, Y_pred)


In [17]:
print("Logistic regression Confusin Matrix: \n", cm)

Logistic regression Confusin Matrix: 
 [[222   0  18   0   1   3  11]
 [  0  92   0   0   0   0   0]
 [  6   0 333   0   6   1   4]
 [  1   0   0 681   3   8  42]
 [  1   0   4   2 376   0   7]
 [  1   0   0   1   0 360  14]
 [  0   0   2  44  11   5 463]]


In [18]:
precision = precision_score(Y_test, Y_pred, average="macro")

In [19]:
print("Logistic regression Precision: ", precision)

Logistic regression Precision:  0.9410120882738455


In [20]:
recall = recall_score(Y_test, Y_pred, average="macro")

In [21]:
print("Logistic regressionRecall: ", recall)

Logistic regressionRecall:  0.9360002219265073


In [22]:
#SVM
from sklearn.svm import SVC

In [23]:
svm_classifier = SVC(kernel='rbf', random_state=0, probability=True)
svm_classifier.fit(X_train, Y_train)

In [24]:
Y_pred_svm = svm_classifier.predict(X_test)
probs_Y_svm = svm_classifier.predict_proba(X_test) 
probs_Y_svm = np.round(probs_Y_svm, 2)

In [25]:
cm_svm = confusion_matrix(Y_test, Y_pred_svm)

In [27]:
precision_svm = precision_score(Y_test, Y_pred_svm, average="macro")

In [28]:
print("SVM Precision: ", precision_svm)

SVM Precision:  0.9474416626655665


In [29]:
recall_svm = recall_score(Y_test, Y_pred_svm, average="macro")

In [30]:
print("SVM Recall: ", recall_svm)

SVM Recall:  0.9414050141917885


In [31]:
# Random forest classifier
from sklearn.ensemble import RandomForestClassifier

In [32]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=0)
rf_classifier.fit(X_train, Y_train)

In [33]:
Y_pred_rf = rf_classifier.predict(X_test)
probs_Y_rf = rf_classifier.predict_proba(X_test) 
probs_Y_rf = np.round(probs_Y_rf, 2)

In [34]:
cm_rf = confusion_matrix(Y_test, Y_pred_rf)

In [35]:
print("Random Forest Confusion Matrix: \n", cm_rf)

Random Forest Confusion Matrix: 
 [[231   0  11   0   2   2   9]
 [  0  92   0   0   0   0   0]
 [  9   0 330   0   8   1   2]
 [  0   0   0 688   1  11  35]
 [  1   0   7   2 366   0  14]
 [  0   0   1   3   0 359  13]
 [  2   0   1  56   6   5 455]]


In [36]:
precision_rf = precision_score(Y_test, Y_pred_rf, average="macro")

In [37]:
print("Random Forest Precision: ", precision_rf)

Random Forest Precision:  0.9398748507493878


In [38]:
recall_rf = recall_score(Y_test, Y_pred_rf, average="macro")

In [39]:
print("Random Forest Recall: ", recall_rf)

Random Forest Recall:  0.9349584795339693


In [40]:
#Decision Tree
from sklearn.tree import DecisionTreeClassifier

In [41]:
dt_classifier = DecisionTreeClassifier(random_state=0)
dt_classifier.fit(X_train, Y_train)

In [42]:
Y_pred_dt = dt_classifier.predict(X_test)

In [43]:
cm_dt = confusion_matrix(Y_test, Y_pred_dt)

In [44]:
print("Decision Tree Confusion Matrix: \n", cm_dt)

Decision Tree Confusion Matrix: 
 [[220   0  17   0   3   5  10]
 [  0  92   0   0   0   0   0]
 [ 21   0 318   0   9   1   1]
 [  0   0   0 652   4  18  61]
 [  2   0  10   2 363   0  13]
 [  2   0   0  10   0 344  20]
 [  5   0   3  53  10  11 443]]


In [45]:
precision_dt = precision_score(Y_test, Y_pred_dt, average="macro")

In [46]:
print("Decision Tree Precision: ", precision_dt)

Decision Tree Precision:  0.9074779176762501


In [47]:
recall_dt = recall_score(Y_test, Y_pred_dt, average="macro")

In [48]:
print("Decision Tree Recall: ", recall_dt)

Decision Tree Recall:  0.9068376754489499
