In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

print(f'Tensorflow version: {tf.__version__}')

In [None]:
SEED = 42
from tensorflow.random import set_seed
from numpy.random import seed
seed(SEED)
set_seed(SEED)

## Loading the dataset

In [None]:
min_rock_data = pd.read_csv('../input/mines-vs-rocks/sonar.all-data.csv', header = None)
min_rock_data.head()

In [None]:
min_rock_data.groupby(60).size()

### Split up the data to X and Y 

In [None]:
# min_rock_data_v = min_rock_data.values
# X = min_rock_data_v[:,0:60].astype(float)
# Y = min_rock_data_v[:,60]
# print ('X Shape :', X.shape)
# print ('Y Shape :', Y.shape)
# print ('Number of Unique Values in Y:', set(Y))

X = min_rock_data[min_rock_data.columns[0:60]].values
Y = min_rock_data[min_rock_data.columns[60]].values

In [None]:
print(X[0])

### Prepping Y 

In [None]:
# for i, v in enumerate(Y):
#     if v == 'M':
#         Y[i] = 1
#     elif v == 'R':
#         Y[i] = 0

# Y = np.asarray(Y).astype(int)
# print(Y)


# from sklearn.preprocessing import LabelEncoder
# encoder = LabelEncoder()
# y_one = encoder.fit_transform(Y).astype(int)

y = pd.get_dummies(Y, drop_first=False)

### Split up the data to training set and test set

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### Normalization of the data

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Train the model

In [None]:
model = Sequential()
model.add(Dense(100, input_shape=(X_train.shape[1],), activation = 'relu'))
model.add(Dense(20, activation = 'relu'))
model.add(Dropout(0.7))
model.add(Dense(500, activation = 'relu'))
model.add(Dropout(0.7))
model.add(Dense(500, activation = 'relu'))
model.add(Dropout(0.7))
model.add(Dense(200, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(y_train.shape[1], activation = 'sigmoid'))

model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.summary()

In [None]:
cl = model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=50)

In [None]:
fig, ax = plt.subplots(figsize=(15,5))

plt.plot(cl.history['accuracy'], label='accuracy')
plt.plot(cl.history['val_accuracy'], label='val_accuracy', linestyle='--')
plt.plot(cl.history['loss'], label='loss')
plt.plot(cl.history['val_loss'], label='val_loss', linestyle='--')
plt.legend()

In [None]:
ModelLoss, ModelAccuracy = model.evaluate(X_test, y_test)

print(f'Test Loss is {ModelLoss}')
print(f'Test Accuracy is {ModelAccuracy}')

In [None]:
pred = model.predict(X_test)
pred_list = np.argmax(pred, axis=-1)
y_test_list = y_test['R'].to_list()

for i in range(10):
    print(f"{pred[i][0]:.5f} - {pred[i][1]:.5f} - {pred_list[i]} // {y_test_list[i]}")

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

cr = classification_report(y_test_list, pred_list)
print(cr)

In [None]:
cm=confusion_matrix(y_test_list, pred_list)

f, ax = plt.subplots(figsize=(15, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', square=True, linewidths=0.01, linecolor='grey')
plt.title('Confustion matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score, plot_roc_curve

random_probs = [0 for i in range(len(y_test_list))]

ran_fpr, ran_tpr, _ = roc_curve(y_test_list, random_probs)
fpr, tpr, thresholds = roc_curve(y_test_list, pred_list)

fig = plt.figure(figsize = (10,6))
plt.plot(ran_fpr, ran_tpr, linestyle='--', label='Random')
plt.plot(fpr, tpr, marker='.', label='Model')
plt.title('ROC curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='best')

ran_auc = roc_auc_score(y_test_list, random_probs)
auc = roc_auc_score(y_test_list, pred_list)
print(f'Random: ROC AUC={ran_auc:.3f}')
print(f'Model: ROC AUC={auc:.3f}')

In [None]:
from sklearn.metrics import precision_recall_curve, f1_score, auc

precision, recall, _ = precision_recall_curve(y_test_list, pred_list)
auc=auc(recall, precision)
print(f'Acu: {auc:.5f}')

fig = plt.figure(figsize = (10,6))
plt.plot(recall, precision, marker='.', label='Model')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend()

In [None]:
# from sklearn.metrics import det_curve

# fpr, fnr, thresholds = det_curve(y_test_list, pred_list)

# fig = plt.figure(figsize = (10,6))
# plt.plot(ran_fpr, ran_tpr, linestyle='--', label='Random')
# plt.plot(fpr, tpr, marker='.', label='Model')
# plt.title('ROC curve')
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.legend(loc='best')

In [None]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))

models.append(('AB', AdaBoostClassifier()))
models.append(('GBM', GradientBoostingClassifier()))
models.append(('RF', RandomForestClassifier()))
models.append(('ET', ExtraTreesClassifier()))

results = []
names = []
for name, model in models:
    kfold = KFold(n_splits=10)
    cv_results = cross_val_score(model, X_train, y_train['R'].to_list(), cv=kfold, scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    print(f"{name}: {cv_results.mean()} ({cv_results.std()})")