In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('train.csv')
dfTest = pd.read_csv('test.csv')
dataset = pd.read_csv('train.csv')

In [4]:
assert dataset.isnull().sum().sum() == 0

num_col_names = list(dataset.select_dtypes(include='number').columns)
cat_col_names = list(set(dataset.columns) - set(num_col_names))

In [5]:
df = pd.get_dummies(df, columns=cat_col_names)
tensor = torch.from_numpy(df.values)

In [6]:
df.pop("id")

0          0
1          1
2          2
3          3
4          4
        ... 
1672    1672
1673    1673
1674    1674
1675    1675
1676    1676
Name: id, Length: 1677, dtype: int64

In [7]:
labels = df.pop("Attrition")

In [8]:
from torch.utils.data import random_split
from torch.utils.data import TensorDataset
from sklearn.preprocessing import StandardScaler


sca = StandardScaler()
df = sca.fit_transform(df)

tensor = torch.from_numpy(df)
labeltensor = torch.from_numpy(labels.values)

dataset = TensorDataset(tensor, labeltensor)


train_ratio = 0.85 # ratio of data to be used for training

train_len = int(train_ratio * len(dataset))
test_len = len(dataset) - train_len

train_set, test_set = random_split(dataset, [train_len, test_len])

In [9]:
from torch.utils.data import Dataset, DataLoader

loader = DataLoader(train_set, batch_size=32, shuffle=True)
loaderVal = DataLoader(test_set, batch_size=32, shuffle=True)

In [10]:
class MihaNetForSwag(nn.Module):

    def __init__(self):
        super(MihaNetForSwag, self).__init__()
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(54, 256)
        self.fc2 = nn.Linear(256, 64) 
        self.fc3 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.33)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x
    


In [11]:
# set the number of training iterations (epochs)
num_epochs = 60
#torch.set_grad_enabled(True) 

model = MihaNetForSwag()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
criterion = nn.BCELoss()

# start the training loop
for epoch in range(num_epochs):
    # loop over the training data in batches
    for data, labels in loader:
        output = model(data.type(torch.FloatTensor))
        loss = criterion(output, labels.reshape(-1,1).type(torch.FloatTensor))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # print the loss at the end of the epoch
    print("Epoch {}/{}, Loss: {:.4f}".format(epoch+1, num_epochs, loss.item()))

Epoch 1/60, Loss: 0.3883
Epoch 2/60, Loss: 0.2578
Epoch 3/60, Loss: 0.1835
Epoch 4/60, Loss: 0.2127
Epoch 5/60, Loss: 0.5005
Epoch 6/60, Loss: 0.1835
Epoch 7/60, Loss: 0.1315
Epoch 8/60, Loss: 0.2277
Epoch 9/60, Loss: 0.1499
Epoch 10/60, Loss: 0.1461
Epoch 11/60, Loss: 0.0811
Epoch 12/60, Loss: 0.0819
Epoch 13/60, Loss: 0.1238
Epoch 14/60, Loss: 0.2825
Epoch 15/60, Loss: 0.0745
Epoch 16/60, Loss: 0.1403
Epoch 17/60, Loss: 0.2273
Epoch 18/60, Loss: 0.0229
Epoch 19/60, Loss: 0.0762
Epoch 20/60, Loss: 0.0186
Epoch 21/60, Loss: 0.0116
Epoch 22/60, Loss: 0.0151
Epoch 23/60, Loss: 0.3179
Epoch 24/60, Loss: 0.0243
Epoch 25/60, Loss: 0.1472
Epoch 26/60, Loss: 0.2567
Epoch 27/60, Loss: 0.2611
Epoch 28/60, Loss: 0.0037
Epoch 29/60, Loss: 0.0296
Epoch 30/60, Loss: 0.1894
Epoch 31/60, Loss: 0.0595
Epoch 32/60, Loss: 0.0178
Epoch 33/60, Loss: 0.0417
Epoch 34/60, Loss: 0.1230
Epoch 35/60, Loss: 0.0042
Epoch 36/60, Loss: 0.0408
Epoch 37/60, Loss: 0.0123
Epoch 38/60, Loss: 0.0786
Epoch 39/60, Loss: 0.

In [12]:
validations = []
val_inputs = []


for x, y in loaderVal:
    for xt in x:
        # print(xt)
        val_inputs.append(xt.numpy())
    for yt in y:
        validations.append(yt)

# print(val_inputs)
        
model.eval()
out = model.forward(torch.from_numpy(np.array(val_inputs)).type(torch.FloatTensor))
val_loss = criterion(out,torch.from_numpy(np.array(validations)).type(torch.FloatTensor).reshape(-1,1)) 
vl = val_loss

print("Validation Loss: {:.4f}".format(vl.item()))


Validation Loss: 1.0788


In [13]:
ids = dfTest.pop("id")

In [14]:
dfTest = pd.get_dummies(dfTest, columns=cat_col_names)

In [15]:
test_setf = sca.transform(dfTest)

In [16]:
final = []
for data in test_setf:
    final.append(model(torch.from_numpy(data).type(torch.FloatTensor)).detach().numpy()[0])

In [17]:

Output = pd.DataFrame({'id':ids,'Attrition':final})

In [18]:
Output.to_csv('Submision.csv',index=False)
Output.head()

Unnamed: 0,id,Attrition
0,1677,0.0001908519
1,1678,0.0002304955
2,1679,2.314597e-07
3,1680,4.130553e-05
4,1681,0.1700484
