In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
lines = []
with open('data.csv') as datafile:
    for i in datafile.readlines():
        line = i.split(',')[:-1]
        lines.append(line)

In [4]:
data = pd.read_csv('data.csv')

In [5]:
data.columns

Index(['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6', 'Col7', 'Col8', 'Col9',
       'Col10', 'Col11', 'Col12', 'Class_att', 'Unnamed: 13'],
      dtype='object')

In [6]:
new_columns = data.columns[:-1]

In [7]:
new_columns

Index(['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6', 'Col7', 'Col8', 'Col9',
       'Col10', 'Col11', 'Col12', 'Class_att'],
      dtype='object')

In [8]:
data = data[new_columns]
data.head()

Unnamed: 0,Col1,Col2,Col3,Col4,Col5,Col6,Col7,Col8,Col9,Col10,Col11,Col12,Class_att
0,63.027817,22.552586,39.609117,40.475232,98.672917,-0.2544,0.744503,12.5661,14.5386,15.30468,-28.658501,43.5123,Abnormal
1,39.056951,10.060991,25.015378,28.99596,114.405425,4.564259,0.415186,12.8874,17.5323,16.78486,-25.530607,16.1102,Abnormal
2,68.832021,22.218482,50.092194,46.613539,105.985135,-3.530317,0.474889,26.8343,17.4861,16.65897,-29.031888,19.2221,Abnormal
3,69.297008,24.652878,44.311238,44.64413,101.868495,11.211523,0.369345,23.5603,12.7074,11.42447,-30.470246,18.8329,Abnormal
4,49.712859,9.652075,28.317406,40.060784,108.168725,7.918501,0.54336,35.494,15.9546,8.87237,-16.378376,24.9171,Abnormal


In [9]:
data.Class_att.value_counts()

Abnormal    210
Normal      100
Name: Class_att, dtype: int64

In [10]:
# changing target to numerical 
target_map = {'Abnormal':1,'Normal':0}
data['Class_att'].replace(target_map,inplace = True)

In [11]:
data['Class_att'].value_counts()

1    210
0    100
Name: Class_att, dtype: int64

In [12]:
# separating features and target 
X = data.iloc[:,0:-1]
y = data.iloc[:,-1]

In [13]:
X.shape,y.shape

((310, 12), (310,))

### Preparing X_train,X_test,y_train and y_test

In [14]:
# train test split 
X_train,X_test,y_train,y_test =train_test_split(
    X,y,test_size = 0.33, random_state = 1
)

In [15]:
# standardizing the features 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [16]:
X_train.shape

(207, 12)

### DataClass 

In [17]:
import torch 
import torch.nn as nn
epochs = 50 
batch_size = 64
learning_rate = 0.001

class TrainData(torch.utils.data.Dataset):
    def __init__(self,x,y):
        super().__init__()
        self.x = x 
        self.y = y 
    def __getitem__(self,index):
        return self.x[index],self.y[index]
    def __len__(self):
        return self.x.shape[0]
    
class TestData(torch.utils.data.Dataset):
    def __init__(self,x):
        super().__init__()
        self.x = x 
    def __getitem__(self,index):
        return self.x[index]
    def __len__(self):
        return self.x.shape[0]

### DataLoader

In [18]:
train_data = TrainData(
    torch.FloatTensor(X_train),torch.FloatTensor(y_train)
)

In [19]:
test_data = TestData(torch.FloatTensor(X_test))

In [20]:

train_loader = torch.utils.data.DataLoader(
    dataset = train_data, batch_size = batch_size, shuffle = True
)
test_loader = torch.utils.data.DataLoader(
    dataset = test_data, batch_size = 1
)

In [21]:
X_train.shape

(207, 12)

Each data instance contains 12 features, so the input dimension is 12. 64 nodes in the first layer. The batch size is also 64. In short we have the following matrices, (64,12)*(12,64) => (64,64) ==> There are 64 instances and each of them is 64 in dimension. 

### Building the Network

In [22]:
class BinaryClassificationTabular(nn.Module):
    def __init__(self):
        super(BinaryClassificationTabular,self).__init__()
        # number of input features = 12 
        self.layer_1 = nn.Linear(12,64)
        self.layer_2 = nn.Linear(64,64)
        self.layer_out = nn.Linear(64,1)
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p = 0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)
    
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.dropout(x)
        x = self.sigmoid(self.layer_out(x))
        
        return x

In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [24]:
model = BinaryClassificationTabular()
print(model)

BinaryClassificationTabular(
  (layer_1): Linear(in_features=12, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [25]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)

In [26]:
def binary_acc(y_pred,y_test):
    y_pred_tag = torch.round(y_pred)
    correct_results_sum = (y_pred_tag ==y_test).sum().float() 
    # average accuracy for a batch is the sum of positive predictions
    # divided by the number of predictions in that batch 
    acc = correct_results_sum/y_test.shape[0]
    
    acc = torch.round(acc*100)
    return acc
    

### Trainng Loop

In [27]:
model.train()
for e in range(1, epochs+1):
    epoch_loss = 0
    epoch_acc = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')   

Epoch 001: | Loss: 0.60187 | Acc: 57.000
Epoch 002: | Loss: 0.60535 | Acc: 58.500
Epoch 003: | Loss: 0.61518 | Acc: 69.000
Epoch 004: | Loss: 0.59765 | Acc: 67.750
Epoch 005: | Loss: 0.59436 | Acc: 70.750
Epoch 006: | Loss: 0.60336 | Acc: 72.250
Epoch 007: | Loss: 0.59625 | Acc: 73.000
Epoch 008: | Loss: 0.58947 | Acc: 68.500
Epoch 009: | Loss: 0.57869 | Acc: 74.750
Epoch 010: | Loss: 0.58538 | Acc: 76.250
Epoch 011: | Loss: 0.57036 | Acc: 73.500
Epoch 012: | Loss: 0.55950 | Acc: 79.000
Epoch 013: | Loss: 0.57403 | Acc: 76.000
Epoch 014: | Loss: 0.56404 | Acc: 72.500
Epoch 015: | Loss: 0.54862 | Acc: 77.000
Epoch 016: | Loss: 0.56179 | Acc: 74.750
Epoch 017: | Loss: 0.54844 | Acc: 79.000
Epoch 018: | Loss: 0.55677 | Acc: 77.250
Epoch 019: | Loss: 0.55137 | Acc: 79.000
Epoch 020: | Loss: 0.54908 | Acc: 74.000
Epoch 021: | Loss: 0.55853 | Acc: 78.000
Epoch 022: | Loss: 0.53961 | Acc: 78.750
Epoch 023: | Loss: 0.53976 | Acc: 80.750
Epoch 024: | Loss: 0.57294 | Acc: 74.000
Epoch 025: | Los

### Testing Loop

In [28]:
y_pred_list = []
model.eval() 
with torch.no_grad():
    for x_batch in test_loader:
        y_test_pred = model(x_batch)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag.numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

### Further Evaluations

In [29]:
# confusion matrics
import sklearn
sklearn.metrics.confusion_matrix(y_test,y_pred_list)

array([[27,  5],
       [12, 59]], dtype=int64)

In [30]:
report = sklearn.metrics.classification_report(y_test,y_pred_list)
print(report)

              precision    recall  f1-score   support

           0       0.69      0.84      0.76        32
           1       0.92      0.83      0.87        71

    accuracy                           0.83       103
   macro avg       0.81      0.84      0.82       103
weighted avg       0.85      0.83      0.84       103

