Graph Plot


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.calibration import label_binarize
from itertools import cycle


def roc_plot(y_test, y_pred):
    rice = {
        0: 'Cammeo',
        1: 'Osmanscik',
    }

    plt.figure(figsize=(10, 5))

    colors = cycle(['red', 'blue'])

    for i, class_name in rice.items():
        fpr, tpr, _ = roc_curve(y_test, y_pred)
        roc_auc = auc(fpr, tpr)

        color = next(colors)

        plt.subplot(1, 2, i + 1)
        plt.plot(fpr, tpr, color=color, lw=2,
                 label=f'ROC Curve: {class_name} (area = {roc_auc:.2f})')

        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([-.05, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'ROC Curve - {class_name}')
        plt.legend(loc='lower right')

    plt.tight_layout()
    plt.show()

Neural Network


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

cols = ['Area', 'Perimeter', 'Major_Axis_Length', 'Minor_Axis_Length',
        'Eccentricity', 'Convex_Area', 'Extent', 'Class']

data = pd.read_csv('./data/Rice_Cammeo_Osmancik.arff', names=cols)
data['Class'] = (data['Class'] == 'Osmancik').astype(int)

In [None]:
fig, ax = plt.subplots(figsize=(6,  3))

z_scores = np.abs((data.select_dtypes(include=[np.number]) - data.select_dtypes(include=[np.number]).mean()) / data.select_dtypes(include=[np.number]).std())
outliers = z_scores > 3

for i, col in enumerate(z_scores.columns):
    # Blue Z-Score
    ax.scatter([col]*len(z_scores), z_scores[col], c='b', label='Normal' if i == 0 else "")
    
    # Red Z-Score
    outlier_z_scores = z_scores.loc[outliers[col], col]
    
    ax.scatter([col]*len(outlier_z_scores), outlier_z_scores, c='r', label='Outlier' if i == 0 else "")

ax.set_title('Z-Scores of Features with Outliers')
ax.set_xlabel('Feature')
ax.set_ylabel('Z-Score')
ax.legend()
plt.xticks(rotation=45)
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler

x = data.iloc[:, 0:7].values
y = data.iloc[:, -1]

scaler = StandardScaler()
x = scaler.fit_transform(x)

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=.3, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=.15, random_state=42)

y_train = y_train.astype(int)
y_val = y_val.astype(int)
y_test = y_test.astype(int)

model = tf.keras.layers.Sequential()
model.add(tf.keras.layers.Dense(128, input_shape=(7,),
          kernel_initializer='normal', activation='tanh'))
model.add(tf.keras.layers.Dropout(.4))
model.add(tf.keras.layers.Dense(64, activation='tanh'))
model.add(tf.keras.layers.Dropout(.4))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adadelta(
    learning_rate=.0025), metrics=['accuracy'])


model.fit(x_train, y_train, batch_size=32, shuffle=True, epochs=200, callbacks=tf.keras.callbacks.EarlyStopping(
    monitor='loss'), validation_data=(x_val, y_val))

y_pred = model.predict(x_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

roc_plot(y_test, np.array(y_pred))

y_pred = (y_pred >= 0.5).astype(int)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))