<a href="https://colab.research.google.com/github/srihith95/binary-classifier-pytorch_vanilla/blob/main/bin_torch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

dataset = pd.read_csv('drive/My Drive/data/sonar.csv', header=None)
dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,Freq_1,Freq_2,Freq_3,Freq_4,Freq_5,Freq_6,Freq_7,Freq_8,Freq_9,Freq_10,...,Freq_52,Freq_53,Freq_54,Freq_55,Freq_56,Freq_57,Freq_58,Freq_59,Freq_60,Label
1,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
2,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
3,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
4,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
204,0.0187,0.0346,0.0168,0.0177,0.0393,0.163,0.2028,0.1694,0.2328,0.2684,...,0.0116,0.0098,0.0199,0.0033,0.0101,0.0065,0.0115,0.0193,0.0157,M
205,0.0323,0.0101,0.0298,0.0564,0.076,0.0958,0.099,0.1018,0.103,0.2154,...,0.0061,0.0093,0.0135,0.0063,0.0063,0.0034,0.0032,0.0062,0.0067,M
206,0.0522,0.0437,0.018,0.0292,0.0351,0.1171,0.1257,0.1178,0.1258,0.2529,...,0.016,0.0029,0.0051,0.0062,0.0089,0.014,0.0138,0.0077,0.0031,M
207,0.0303,0.0353,0.049,0.0608,0.0167,0.1354,0.1465,0.1123,0.1945,0.2354,...,0.0086,0.0046,0.0126,0.0036,0.0035,0.0034,0.0079,0.0036,0.0048,M


In [None]:
X = dataset.iloc[1:, 0:60]
y = dataset.iloc[1:, 60]

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)

In [None]:
print(encoder.classes_)

['M' 'R']


In [None]:
type(y)

numpy.ndarray

In [None]:
X = X.astype(float)

In [None]:
import torch
import numpy as np

X = torch.tensor(X.values, dtype=torch.float64)
y = torch.tensor(y, dtype=torch.float64).reshape(-1,1)

In [None]:
import torch.nn as nn

class Wide(nn.Module):
  def __init__(self):
    super().__init__()
    self.hidden = nn.Linear(60, 180)
    self.relu = nn.ReLU()
    self.output = nn.Linear(180, 1)
    self.sigmoid = nn.Sigmoid()
    self.double()

  def forward(self, x):
    x = self.relu(self.hidden(x))
    x = self.sigmoid(self.output(x))
    return x


class Deep(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Linear(60, 60)
    self.act1 = nn.ReLU()
    self.layer2 = nn.Linear(60, 60)
    self.act2 = nn.ReLU()
    self.layer3 = nn.Linear(60, 60)
    self.act3 = nn.ReLU()
    self.output = nn.Linear(60, 1)
    self.sigmoid = nn.Sigmoid()
    self.double()

  def forward(self, x):
    x = self.act1(self.layer1(x))
    x = self.act2(self.layer2(x))
    x = self.act3(self.layer3(x))
    x = self.sigmoid(self.output(x))
    return x

# Training Loop


In [None]:
import torch.optim as optim
import tqdm
import copy

def model_train(model, X_train, X_val, y_train, y_val):
  #loss and optimizer
  loss_func = nn.BCELoss()
  optimizer = optim.Adam(model.parameters(), lr=0.0001)

  n_epochs = 250
  batch_size = 10
  batch_start = torch.arange(0, len(X_train), batch_size)

  #holding the best model
  best_acc = - np.inf
  best_weights = None

  for epoch in range(n_epochs):
    model.train()
    with tqdm.tqdm(batch_start, mininterval=0, unit='batch', disable=True) as bar:
      bar.set_description(f"Epoch {epoch}")
      for start in bar:
        #batching
        X_batch = X_train[start:start+batch_size]
        y_batch = y_train[start:start+batch_size]
        #forward-pass
        y_pred = model(X_batch)
        loss = loss_func(y_pred, y_batch)

        #backward pass
        optimizer.zero_grad()
        loss.backward()

        #update weights
        optimizer.step()

        #progress
        acc = (y_pred.round() == y_batch).float().mean()
        bar.set_postfix(
            loss = float(loss),
            acc = float(acc)
        )

    #evaluation at the end of an epoch
    model.eval()
    y_pred = model(X_val)
    acc = (y_pred.round() == y_val).float().mean()
    acc = float(acc)
    if acc > best_acc:
      best_acc = acc
      best_weights = copy.deepcopy(model.state_dict())

  model.load_state_dict(best_weights)
  return best_acc


In [None]:
from sklearn.model_selection import StratifiedKFold, train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True)

#defining the k-fold val test harness
k_fold = StratifiedKFold(n_splits=6, shuffle=True)
cv_scores_wide = []
for train, test in k_fold.split(X_train, y_train):
  model = Wide()
  acc = model_train(model, X_train[train], X_train[test], y_train[train], y_train[test])
  print("Accuracy (wide): %.2f" % acc)
  cv_scores_wide.append(acc)

cv_scores_deep = []
for train, test in k_fold.split(X_train, y_train):
  model = Deep()
  acc = model_train(model, X_train[train], X_train[test], y_train[train], y_train[test])
  print("Accuracy (deep): %.2f" % acc)
  cv_scores_deep.append(acc)


widemodel_acc = np.mean(cv_scores_wide)
widemodel_std = np.std(cv_scores_wide)
deepmodel_acc = np.mean(cv_scores_deep)
deepmodel_std = np.std(cv_scores_deep)
print("Final Accuracy:")
print("Wide: %.2f%%(+/- %.2f)" % (widemodel_acc*100, widemodel_std*100))
print("Deep: %.2f%%(+/- %.2f)" % (deepmodel_acc*100, deepmodel_std*100))

Accuracy (wide): 0.72
Accuracy (wide): 0.88
Accuracy (wide): 0.83
Accuracy (wide): 0.83
Accuracy (wide): 0.88
Accuracy (wide): 0.75
Accuracy (deep): 0.80
Accuracy (deep): 0.83
Accuracy (deep): 0.88
Accuracy (deep): 0.79
Accuracy (deep): 0.79
Accuracy (deep): 0.88
Final Accuracy:
Wide: 81.44%(+/- 5.93)
Deep: 82.78%(+/- 3.62)


In [None]:
if widemodel_acc > deepmodel_acc:
  print("Retraining a wide model")
  model = Wide()

else:
  print("Retraining a deep model")
  model = Deep()

acc = model_train(model, X_train, X_test, y_train, y_test)
print("Final model accuracy: %.2f" % (acc*100))

Retraining a deep model
Final model accuracy: 85.71


In [None]:
model.eval()
threshold = 0.68
with torch.no_grad():
  for i in range(5):
    y_pred = model(X_test[i:i+1])
    y_pred = (y_pred > threshold).float()
    print(y_pred)

tensor([[0.]])
tensor([[1.]])
tensor([[1.]])
tensor([[0.]])
tensor([[0.]])


In [None]:
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

with torch.no_grad():
  y_pred = model(X_test)
  fpr, tpr, thresholds = roc_curve(y_test, y_pred)
  plt.plot(fpr, tpr)