In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('train.csv')
dfTest = pd.read_csv('test.csv')
dataset = pd.read_csv('train.csv')

dfe = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')

In [3]:
assert dataset.isnull().sum().sum() == 0

num_col_names = list(dataset.select_dtypes(include='number').columns)
cat_col_names = list(set(dataset.columns) - set(num_col_names))

In [4]:
df = pd.get_dummies(df, columns=cat_col_names)
tensor = torch.from_numpy(df.values)

In [5]:
df.pop("id")

0          0
1          1
2          2
3          3
4          4
        ... 
1672    1672
1673    1673
1674    1674
1675    1675
1676    1676
Name: id, Length: 1677, dtype: int64

In [6]:
labels = df.pop("Attrition")

In [7]:
from torch.utils.data import random_split
from torch.utils.data import TensorDataset
from sklearn.preprocessing import StandardScaler


sca = StandardScaler()
df = sca.fit_transform(df)

tensor = torch.from_numpy(df)
labeltensor = torch.from_numpy(labels.values)

dataset = TensorDataset(tensor, labeltensor)


train_ratio = 0.6 # ratio of data to be used for training

train_len = int(train_ratio * len(dataset))
test_len = len(dataset) - train_len

train_set, test_set = random_split(dataset, [train_len, test_len])

In [8]:
from torch.utils.data import Dataset, DataLoader

loader = DataLoader(train_set, batch_size=32, shuffle=True)
loaderVal = DataLoader(test_set, batch_size=32, shuffle=True)

In [22]:
class MihaNetForSwag(nn.Module):

    def __init__(self):
        super(MihaNetForSwag, self).__init__()
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(54, 256)
        self.fc2 = nn.Linear(256, 64) 
        self.fc3 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.4)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

class MihaNetForSwag2(nn.Module):

    def __init__(self):
        super(MihaNetForSwag2, self).__init__()
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(54, 256)
        self.fc2 = nn.Linear(256, 64) 
        self.fc3 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.4)
        self.dropout2 = nn.Dropout(0.4)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.sigmoid(self.fc3(x))
        return x


    
class DeepDropNet(nn.Module):

    def __init__(self):
        super(DeepDropNet, self).__init__()
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(54, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, 512) 
        self.fc4 = nn.Linear(512, 256) 
        self.fc5 = nn.Linear(256, 1)
        forget_rate = 0.15
        self.dropout = nn.Dropout(forget_rate)
        self.dropout2 = nn.Dropout(forget_rate)
        self.dropout3 = nn.Dropout(forget_rate)
        self.dropout4 = nn.Dropout(forget_rate)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = F.relu(self.fc3(x))
        x = self.dropout3(x)
        x = F.relu(self.fc4(x))
        x = self.dropout4(x)
        x = torch.sigmoid(self.fc5(x))
        return x
    
class SimpleNet(nn.Module):

    def __init__(self):
        super(SimpleNet, self).__init__()
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(54, 512)
        self.fc2 = nn.Linear(512, 1)
        self.dropout = nn.Dropout(0.6)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.sigmoid(self.fc2(x))
        return x
    


In [23]:
# set the number of training iterations (epochs)
num_epochs = 120
#torch.set_grad_enabled(True) 

model = SimpleNet()
optimizer = torch.optim.Adam(model.parameters(),lr=0.0002)
criterion = nn.BCELoss()

# start the training loop
for epoch in range(num_epochs):
    # loop over the training data in batches
    for data, labels in loader:
        output = model(data.type(torch.FloatTensor))
        loss = criterion(output, labels.reshape(-1,1).type(torch.FloatTensor))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # print the loss at the end of the epoch
    print("Epoch {}/{}, Loss: {:.4f}".format(epoch+1, num_epochs, loss.item()))

Epoch 1/120, Loss: 0.4717
Epoch 2/120, Loss: 0.4707
Epoch 3/120, Loss: 0.2524
Epoch 4/120, Loss: 0.5984
Epoch 5/120, Loss: 0.1608
Epoch 6/120, Loss: 0.4070
Epoch 7/120, Loss: 0.2039
Epoch 8/120, Loss: 0.2944
Epoch 9/120, Loss: 0.4486
Epoch 10/120, Loss: 0.3698
Epoch 11/120, Loss: 0.4246
Epoch 12/120, Loss: 0.2377
Epoch 13/120, Loss: 0.1453
Epoch 14/120, Loss: 0.2825
Epoch 15/120, Loss: 0.1614
Epoch 16/120, Loss: 0.3573
Epoch 17/120, Loss: 0.3195
Epoch 18/120, Loss: 0.1024
Epoch 19/120, Loss: 0.1776
Epoch 20/120, Loss: 0.1322
Epoch 21/120, Loss: 0.3271
Epoch 22/120, Loss: 0.3598
Epoch 23/120, Loss: 0.0930
Epoch 24/120, Loss: 0.1124
Epoch 25/120, Loss: 0.4011
Epoch 26/120, Loss: 0.2347
Epoch 27/120, Loss: 0.3127
Epoch 28/120, Loss: 0.2103
Epoch 29/120, Loss: 0.5104
Epoch 30/120, Loss: 0.2060
Epoch 31/120, Loss: 0.5615
Epoch 32/120, Loss: 0.0632
Epoch 33/120, Loss: 0.2713
Epoch 34/120, Loss: 0.1599
Epoch 35/120, Loss: 0.3690
Epoch 36/120, Loss: 0.4139
Epoch 37/120, Loss: 0.1012
Epoch 38/1

In [24]:
validations = []
val_inputs = []


for x, y in loaderVal:
    for xt in x:
        # print(xt)
        val_inputs.append(xt.numpy())
    for yt in y:
        validations.append(yt)

# print(val_inputs)
        
model.eval()
out = model.forward(torch.from_numpy(np.array(val_inputs)).type(torch.FloatTensor))
val_loss = criterion(out,torch.from_numpy(np.array(validations)).type(torch.FloatTensor).reshape(-1,1)) 
vl = val_loss

print("Validation Loss: {:.4f}".format(vl.item()))


Validation Loss: 0.3718


In [25]:
ids = dfTest.pop("id")

KeyError: 'id'

In [13]:
dfTest = pd.get_dummies(dfTest, columns=cat_col_names)

In [14]:
test_setf = sca.transform(dfTest)

In [26]:
final = []
for data in test_setf:
    final.append(model(torch.from_numpy(data).type(torch.FloatTensor)).detach().numpy()[0])

In [27]:

Output = pd.DataFrame({'id':ids,'Attrition':final})

In [28]:
Output.to_csv('Submision.csv',index=False)
Output.head()

Unnamed: 0,id,Attrition
0,1677,0.038483
1,1678,0.066631
2,1679,0.000514
3,1680,0.002333
4,1681,0.533811


In [18]:
# Be careful to overwrite our original name file!
model_name = 'simplenet.net'
torch.save(model.state_dict(),model_name)