In [112]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader

In [113]:
########### Loading the dataset #############################

data_normal = pd.read_csv("data_normal.csv")
data_normal.columns
data_normal.sample(10)

Unnamed: 0,x_1,x_2,y,cluster
12092,-18.450681,21.895815,1,11
12513,-10.572193,-17.949003,1,13
15816,8.37083,1.037896,1,27
15112,7.283613,-28.892073,1,24
9927,-29.722763,-2.667414,1,3
16117,8.558161,8.207811,1,28
6167,8.904074,-30.253288,0,24
4082,-7.332921,9.193358,0,16
10330,-29.991235,9.765038,1,4
17251,19.654346,0.457855,1,33


In [133]:
class dataset(Dataset):
    def __init__(self,x,y):
        self.x = torch.tensor(x,dtype=torch.float32)
        self.y = torch.tensor(y,dtype=torch.float32)
        self.length = self.x.shape[0]

    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]
    def __len__(self):
        return self.length


X_train, X_test, y_train, y_test = train_test_split(np.array(data_normal[["x_1", "x_2"]]), 
                                                    np.array(data_normal["y"]), test_size=0.3, random_state=42)

trainset = dataset(X_train, y_train)
testset = dataset(X_test, y_test)

#DataLoader
trainloader = DataLoader(trainset,batch_size=64,shuffle=False)
testloader = DataLoader(testset,batch_size=64,shuffle=False)

In [134]:
################## Define the model ###########################
class LinearModel(nn.Module):
    def __init__(self, input_size, hidden_size, out_size):
        super(LinearModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.b1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.b2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.b3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 16)
        self.b4 = nn.BatchNorm1d(16)
        self.fc5 = nn.Linear(16, 8)
        self.b5 = nn.BatchNorm1d(8)
        self.fc6 = nn.Linear(8, 1)
        self.sig = nn.Sigmoid()
        # self.relu = nn.ReLU()
                           
    def get_weights(self):
        return self.weight
    
    def forward(self,x):
        out = F.relu(self.fc1(x))
        out = self.b1(out)
        out = F.relu(self.fc2(out))
        out = self.b2(out)
        out = F.relu(self.fc3(out))
        out = self.b3(out)
        out = F.relu(self.fc4(out))
        out = self.b4(out)
        out = F.relu(self.fc5(out))
        out = self.b5(out)
        out = self.fc6(out)
        # out = self.relu(out)
        out = self.sig(out) #sigmoid as we use BCELoss
        return out

In [135]:
######################### Hyper-parameters #########################
input_size = 2
hidden_size = 32
out_size = 1 
num_epochs = 5
learning_rate = 0.001
BATCH_SIZE_1 = 1

model = LinearModel(input_size, hidden_size, out_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [136]:
def train(model, train_loader, optimizer):
    model.train()
    y_true = []
    y_pred = []
    for i in tqdm(train_loader):
        
        data, target = i
 
        # data, target = data.cuda(), target.cuda()
       
        #FORWARD PASS
        output = model(data.float())
        loss = criterion(output, target.unsqueeze(1)) 
        
        #BACKWARD AND OPTIMIZE
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # PREDICTIONS 
        pred = np.round(output.detach())
        target = np.round(target.detach())             
        y_pred.extend(pred.tolist())
        y_true.extend(target.tolist())
        
    print("Accuracy on training set is" ,         
    accuracy_score(y_true,y_pred))

In [137]:
#TESTING THE MODEL
def test(model, test_loader):
    #model in eval mode skips Dropout etc
    model.eval()
    y_true = []
    y_pred = []
    
    # set the requires_grad flag to false as we are in the test mode
    with torch.no_grad():
        for i in test_loader:
            
            #LOAD THE DATA IN A BATCH
            data,target = i
            
            
            # the model on the data
            output = model(data.float())
                       
            #PREDICTIONS
            pred = np.round(output)
            target = target.float()
            y_true.extend(target.tolist()) 
            y_pred.extend(pred.reshape(-1).tolist())
    
            
    print("Accuracy on test set is" , accuracy_score(y_true,y_pred))
    print("***********************************************************")

In [138]:
train(model, trainloader, optimizer)
test(model, testloader)

100%|██████████| 197/197 [00:01<00:00, 101.98it/s]


Accuracy on training set is 0.5934920634920635
Accuracy on test set is 0.6344444444444445
***********************************************************


In [139]:
X_train.shape

(12600, 2)

In [140]:
##### Include cluster id as input

X_train, X_test, y_train, y_test = train_test_split(np.array(data_normal[["x_1", "x_2", "cluster"]]), 
                                                    np.array(data_normal["y"]), test_size=0.3, random_state=42)

print(X_train.shape)
trainset = dataset(X_train, y_train)
testset = dataset(X_test, y_test)

#DataLoader
trainloader = DataLoader(trainset,batch_size=64,shuffle=False)
testloader = DataLoader(testset,batch_size=64,shuffle=False)

######################### Hyper-parameters #########################
input_size = 3

model = LinearModel(input_size, hidden_size, out_size)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

(12600, 3)


In [141]:
train(model, trainloader, optimizer)
test(model, testloader)

100%|██████████| 197/197 [00:01<00:00, 98.68it/s] 


Accuracy on training set is 0.6036507936507937
Accuracy on test set is 0.6212962962962963
***********************************************************
