In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [3]:
dataset = pd.read_csv('costsensitiveregression.csv')

In [4]:
dataset.head()

Unnamed: 0,NotCount,YesCount,ATPM,PFD,PFG,SFD,SFG,WP,WS,AH,AN,Status,FNC
0,2,21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
1,23,0,0.0,0.044,0.0,0.0,0.0,0.306179,0.0,0.0,0.0,1,0.0
2,1,22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
3,5,18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.0
4,1,22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0


In [5]:
dataset.columns

Index(['NotCount', 'YesCount', 'ATPM', 'PFD', 'PFG', 'SFD', 'SFG', 'WP', 'WS',
       'AH', 'AN', 'Status', 'FNC'],
      dtype='object')

In [6]:
X = dataset[['NotCount', 'YesCount', 'ATPM', 'PFD', 'PFG', 'SFD', 'SFG', 'WP', 'WS',
       'AH', 'AN']]
Y = dataset['Status']
FN = dataset['FNC']

In [7]:
torch.tensor(X.values.tolist())

tensor([[ 2.0000, 21.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [23.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.0000, 22.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 2.0000, 21.0000,  1.0000,  ...,  2.4428,  0.0000,  0.0000],
        [ 3.0000, 20.0000,  1.0000,  ...,  4.0787,  0.0000,  0.0000],
        [ 6.0000, 17.0000,  1.0000,  ...,  2.0774,  0.0000,  0.0000]])

In [8]:
X_ten = torch.tensor(X.values, dtype=torch.float32)
Y_ten = torch.tensor(Y.values, dtype=torch.float32)

TP_ten = torch.tensor([4 for i in range(len(Y_ten))], dtype=torch.float32)
FP_ten = torch.tensor([4 for i in range(len(TP_ten))], dtype=torch.float32)
TN_ten = torch.tensor([0 for i in range(len(TP_ten))], dtype=torch.float32)
FN_ten = torch.tensor(FN.values, dtype=torch.float32)


In [9]:
# performing test-train 80-20
indices = np.arange(len(dataset) , dtype = np.int64)
np.random.shuffle(indices)

train_indices = indices[:int(len(dataset)*0.8)].tolist()
test_indices = indices[int(len(dataset)*0.8):].tolist()

In [10]:
X_train = X_ten[train_indices]
Y_train = Y_ten[train_indices].reshape(-1,1)
X_test = X_ten[test_indices]
Y_test = Y_ten[test_indices].reshape(-1,1)

TP_train = TP_ten[train_indices].reshape(-1,1)
TP_test = TP_ten[test_indices].reshape(-1,1)
FP_train = FP_ten[train_indices].reshape(-1,1)
FP_test = FP_ten[test_indices].reshape(-1,1)
TN_train = TN_ten[train_indices].reshape(-1,1)
TN_test = TN_ten[test_indices].reshape(-1,1)
FN_train = FN_ten[train_indices].reshape(-1,1)
FN_test = FN_ten[test_indices].reshape(-1,1)

In [11]:
class CostSensitiveRegression(nn.Module):
    def __init__(self , input_dim , output_dim):
        super(CostSensitiveRegression, self).__init__()
        self.fc1 = nn.Linear(input_dim , output_dim)
        
    def forward(self, x):
        
        return torch.sigmoid(self.fc1(x))

In [12]:
def CostSensitiveLoss(y, y_hat, tp, fp, fn, tn):
    loss = torch.mean(y * (y_hat * tp + (1 - y_hat) * fn) + (1 - y) * (y_hat * fp + (1 - y_hat) * tn))
    loss.requires_grad_()
    return  loss

In [17]:
epochs = 10
batch_size = 1000
num_batches = len(X_train) // batch_size + 1    # to account for the remainder of samples
tp = 4
fp = 4
tn = 0

model = CostSensitiveRegression(11 , 1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(epochs):
    y_hat_probs = model(X_train)
    loss = CostSensitiveLoss(Y_train, y_hat_probs, TP_train, FP_train, FN_train, TN_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print("Epoch : {} , Loss : {}".format(epoch + 1 , loss.item()))


Epoch : 1 , Loss : 124.00675964355469
Epoch : 2 , Loss : 120.8329086303711
Epoch : 3 , Loss : 117.3321304321289
Epoch : 4 , Loss : 113.56090545654297
Epoch : 5 , Loss : 109.58331298828125
Epoch : 6 , Loss : 105.40796661376953
Epoch : 7 , Loss : 100.98535919189453
Epoch : 8 , Loss : 96.25006103515625
Epoch : 9 , Loss : 91.15387725830078
Epoch : 10 , Loss : 85.67796325683594
