In [1]:
import pandas as pd

train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

cols = ['attribute_0', 'attribute_1']
mapper = [{'material_7': 7.0, 'material_5': 5.0}, {'material_8': 8.0, 'material_5': 5.0, 'material_6': 6.0, 'material_7': 7.0}]

for i in range(len(train_df)):
    train_df.iat[i, 1] = ord(train_df.iat[i, 1]) * 3.0
for i in range(len(test_df)):
    test_df.iat[i, 1] = ord(test_df.iat[i, 1]) * 3.0

for i, col in enumerate(cols):
    train_df[col] = train_df[col].replace(mapper[i])
    test_df[col] = test_df[col].replace(mapper[i])

x_train_df = train_df.drop(['id'], axis=1)
x_test_df = test_df.drop(['id'], axis=1)

from sklearn.impute import SimpleImputer
import numpy as np

x_train_df = x_train_df.astype({'attribute_0':'float', 'attribute_1':'float', 'attribute_2':'float', 'attribute_3':'float', 'measurement_0':'float', 'measurement_1':'float', 'measurement_2':'float'})
x_test_df = x_test_df.astype({'attribute_0':'float', 'attribute_1':'float', 'attribute_2':'float', 'attribute_3':'float', 'measurement_0':'float', 'measurement_1':'float', 'measurement_2':'float'})

imp = SimpleImputer(strategy='median')
train_imp = imp.fit(x_train_df)
X_train = train_imp.transform(x_train_df)
test_imp = imp.fit(x_test_df)
x_test = test_imp.transform(x_test_df)

# DNN

In [2]:
from torch.utils.data import Dataset, DataLoader
import torch

class TaskDataset(Dataset):
    def __init__(self, data, label=None):
        if label is None:
            self.data = [(sample[:-1], sample[-1]) for sample in data]
        else:
            self.data = [sample for sample in data]
    
    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)

batchs = 500
train_ds = TaskDataset(X_train)
train_dl = DataLoader(train_ds, batch_size=batchs, num_workers=4, drop_last=True, shuffle=True)

In [3]:
from torchvision import datasets, models, transforms, utils
import torch.nn as nn

# set parameters
drop1, drop2, drop3, drop4 = 0.1, 0.15, 0.15, 0.1
dim1, dim2, dim3, dim4, dim5 = 512, 2048, 1024, 512, 32

# Create Model
class mymodel(nn.Module):
    def __init__(self):
        super(mymodel, self).__init__()
        self.model = nn.Sequential(nn.Linear(24, dim1),
                     nn.ReLU(),
                     nn.Linear(dim1, dim2),
                     nn.ReLU(),
                     nn.Dropout(drop1),
                     nn.Linear(dim2, dim3),
                     nn.ReLU(),
                     nn.Dropout(drop2),
                     nn.Linear(dim3, dim4),
                     nn.ReLU(),
                     nn.Dropout(drop3),
                     nn.Linear(dim4, dim5),
                     nn.ReLU(),
                     nn.Dropout(drop4),
                     nn.Linear(dim5, 1),
                     nn.Sigmoid())
    
    def forward(self, x):
        logits = self.model(x)
        return logits

In [4]:
print(torch.cuda.is_available())
device = 'cuda'

model = mymodel().to('cuda')
LR = 1e-6
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
loss_fn = nn.BCELoss()

epo = 35
for epoch in range(epo):
    model.train()
    for x_attr, label in train_dl:
        x_attr = x_attr.to(device).to(torch.float32)
        label = label.to(device).to(torch.float32)

        pred = model(x_attr).view(len(label))
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

test_ds = TaskDataset(x_test, label=0)
test_dl = DataLoader(test_ds, batch_size=100, num_workers=4, drop_last=False, shuffle=False)

f = open("109550042.csv", "w")
f.write("id,failure\n")

cnt = 26570
model.eval()
for x_attr in test_dl:
    x_attr = x_attr.to(device).to(torch.float32)

    pred = model(x_attr)
    for i in range(len(pred)):
        f.write(f'{cnt},{pred[i][0]}\n')
        cnt += 1
f.close()

FILE = f'model.pt'
torch.save(model.state_dict(), FILE)

True
