In [1]:
import numpy as np
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import torch
import torch.nn as nn
import random
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import datetime
import sys
import sklearn
from torch.autograd import Function
import torch.nn.functional as F

In [2]:
batch_size = 8
input_size = (256, 128)
aspect_ratio = [2, 1]
testing = True
if(torch.cuda.is_available()):
  device=torch.device('cuda')
  print("GPU")
else:
  device=torch.device('cpu')
  print('CPU')

GPU


In [3]:
def progress_bar(start, i, training_batch_count):
    elapsed_time = (datetime.datetime.now() - start).seconds // 60
    sys.stdout.write('\r')
    sys.stdout.write("Validating: [%-50s] %d%% || ETA: %d minutes"
                     % ('=' * int(50 * (i + 1) / training_batch_count), int(100 * (i + 1) / training_batch_count),
                        (elapsed_time / (i+1)) * (training_batch_count - i)))
    sys.stdout.flush()
    
def show_image_grid(images, count, title=''):
    _, axs = plt.subplots(int(1+(count-1)/4), 4, figsize=(count*4, 12))
    axs = axs.flatten()
    for img, ax in zip(images, axs):
        if img.mode == 'RGB':
            ax.imshow(img, vmin=0, vmax=255)
        else:
            ax.imshow(img, cmap='gray', vmin=0, vmax=255)
    plt.title(title)
    plt.show()

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.43335001, 0.43360192, 0.42602362), (0.28486016, 0.28320433, 0.28699529)),
])
flipTransform = transforms.RandomHorizontalFlip(p=1)
zoomTransform = transforms.RandomResizedCrop(input_size[:: -1], scale=(0.7, 1))
colorTransform = transforms.ColorJitter(brightness=(0.65, 0.9), contrast=(1.1, 1.35))

def resize_image(image):
    w, h = image.size
    cut_h = h - aspect_ratio[1] * w / aspect_ratio[0]
    image = image.crop((0, cut_h / 2, w, h-(cut_h/2)))
    image = image.resize(input_size)
    return image

In [5]:
def generate_batches(df, train=True, validation=False):
    if train:
        labels = df['has_under_extrusion'].tolist()
    printers = df['printer_id'].unique().tolist()
    printer_domain_map = {printer_id: idx for idx, printer_id in enumerate(printers)}
    domains = [printer_domain_map[domain] for domain in df['printer_id'].tolist()]
    prints = df['print_id'].unique().tolist()
    print_domain_map = {print_id: idx for idx, print_id in enumerate(prints)}
    print_jobs = [print_domain_map[print] for print in df['print_id'].tolist()]
        
    image_paths = df['img_path'].tolist()
    current = 0
    while current < len(df):
        batch_images, batch_domains, batch_labels, batch_paths, batch_prints = [], [], [], [], []
        batch_idx = 0
        reserve_images, reserve_domains, reserve_labels, reserve_prints = [], [], [], []
        
        while batch_idx < batch_size:
            if current + batch_idx >= len(df):
                break
            
            image_path = image_paths[current + batch_idx]
            domain = domains[current + batch_idx]
            print_job = print_jobs[current + batch_idx]
            filename = '/kaggle/input/early-detection-of-3d-printing-issues/images/'+image_path
            image = Image.open(filename)
            image = resize_image(image)
            if train:
                label = labels[current + batch_idx]
                batch_labels.append(label)
            batch_domains.append(domain)
            batch_prints.append(print_job)
            batch_images.append(transform(image))
            batch_paths.append(image_path)
            batch_idx += 1
        current += batch_size
        if train:
            yield batch_images, np.array(batch_labels), np.array(batch_domains), np.array(batch_prints)
        else:
            yield batch_images, batch_paths

In [6]:
# full_df = pd.read_csv('/kaggle/input/early-detection-of-3d-printing-issues/train.csv')
full_df = pd.read_csv('/kaggle/input/new-csv/semi-supervised-train.csv')
# val_df = full_df.loc[full_df['printer_id']==22|(full_df['printer_id']==101&full_df['print_id'].isin([1678580155, 1678593348]))]
# train_df = full_df.loc[~(full_df['printer_id']==22|(full_df['printer_id']==101&full_df['print_id'].isin([1678580155, 1678593348])))]
if testing:
    num_domains = len(full_df['printer_id'].unique().tolist())
else:
    num_domains = len(train_df['printer_id'].unique().tolist())

In [7]:
class_weights = sklearn.utils.class_weight.compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(full_df['has_under_extrusion'].tolist()),
                                        y = full_df['has_under_extrusion'].tolist()                                                    
                                    )
class_weights = torch.tensor(class_weights,dtype=torch.float).to(device)
class_weights

tensor([1.0540, 0.9513], device='cuda:0')

In [28]:
def batch_shuffle(df):
    index_list = np.array(df.index)
    index_list = index_list[:int(len(df)/batch_size)*batch_size]
    np.random.shuffle(np.reshape(index_list, (-1, batch_size)))
    return df.loc[index_list, :]

In [8]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, use_batchnorm=True):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False
        )
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        if not use_batchnorm:
            self.bn1 = self.bn2 = nn.Sequential()

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(
                    in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False
                ),
                nn.BatchNorm2d(self.expansion * planes) if use_batchnorm else nn.Sequential(),
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, use_batchnorm=True):
        super(ResNet, self).__init__()
        self.in_planes = 64
        self.use_batchnorm = use_batchnorm
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64) if use_batchnorm else nn.Sequential()
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * 8 * 4, num_classes)
        self.linear_domain = nn.Linear(512 * 8 * 4, num_domains)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, self.use_batchnorm))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, alpha):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)      
        out = out.view(out.size(0), -1)
        
        out_domain = out
        if alpha is not None:
            out_domain = ReverseLayerF.apply(out_domain, alpha)
            out_domain = self.linear_domain(out_domain)
        out = self.linear(out)
        
        return out, out_domain


def ResNet10(num_classes=2, use_batchnorm=True):
    return ResNet(BasicBlock, [1, 1, 1, 1], num_classes=num_classes, use_batchnorm=use_batchnorm)


def ResNet18(num_classes=2, use_batchnorm=True):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes, use_batchnorm=use_batchnorm)


def ResNet34(num_classes=2, use_batchnorm=True):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, use_batchnorm=use_batchnorm)

In [9]:
class ReverseLayerF(Function):
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha
        return output, None

In [10]:
def print_loss_function(output, target, print_jobs):
    unique_print_jobs = torch.unique(print_jobs)
    num_print_jobs = unique_print_jobs.shape[0]
    pj_losses = []
    for i in range(num_print_jobs):
        pj_mask = (print_jobs == unique_print_jobs[i])
        pj_output = output[pj_mask]
        pred = torch.max(pj_output, dim=1)[1]
        count_0, count_1 = 0, 0
        for j in range(len(pj_output)):
            if pred[j] == 0:
                count_0 += 1
            if pred[j] == 1:
                count_1 += 1
        pj_loss = min(count_0, count_1) / len(pj_output)
        pj_losses.append(pj_loss)
    pj_loss = sum(pj_losses) / num_print_jobs
    return torch.tensor(pj_loss, requires_grad=True).to(device)

In [107]:
model = ResNet10().to(device)
loss_function = nn.CrossEntropyLoss(weight=class_weights)
domain_loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
domain_lambda = 0.6
print_lambda = 4
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)

In [14]:
test_df = pd.read_csv('/kaggle/input/early-detection-of-3d-printing-issues/test.csv')
def predict_test(images, model):
    images = torch.stack(images, dim=0, out=None).to(device)
    output, _ = model(images, alpha=None)
    _, pred = torch.max(output, dim=1)
    x2 = torch.nn.functional.softmax(output).detach()
    return np.asarray(pred.cpu()), np.asarray(x2.cpu())

In [15]:
def run_testing(epoch=-1):
    test_generator = generate_batches(test_df, train=False)
    results = pd.DataFrame(columns=['img_path', 'has_under_extrusion'])
    start, idx = datetime.datetime.now(), 0
    for images, paths in test_generator:
        progress_bar(start, idx, len(test_df) / batch_size)
        idx += 1
        predictions, _ = predict_test(images, model)
        for i in range(min(len(predictions), batch_size)):
            temp_data = {'img_path': paths[i], 'has_under_extrusion': predictions[i]}
            results = pd.concat([results, pd.DataFrame.from_records([temp_data])])
    if epoch == -1:
        filename = 'submission.csv'
    else:
        filename = 'submission'+str(epoch+1)+'.csv'
    results.to_csv(filename, index=False)

In [108]:
n_epochs = 5
print_every = 3000
train_loss = []
for epoch in range(n_epochs):
    total_step = len(new_df) / batch_size
    new_df = new_df.sample(frac=1).reset_index(drop=True)
#     full_df = print_job_shuffle(full_df)
#     full_df = batch_shuffle(full_df)
    generator = generate_batches(new_df)
    running_loss, correct, domain_correct, total, current_step,  current_idx = 0.0, 0, 0, 0, 0, 0
    total_step_loss, step_loss, domain_step_loss, print_step_loss = 0.0, 0.0, 0.0, 0.0
    print(f'\nEpoch {epoch+1}')
    model.train()
    for images, labels, domains, print_jobs in generator:
        p = float(current_idx + epoch * total_step) / n_epochs / total_step
        alpha = 2. / (1. + np.exp(-10 * p)) - 1
        current_idx += 1
        optimizer.zero_grad()
        images = torch.stack(images, dim=0, out=None).to(device)
        labels = torch.from_numpy(labels).to(device)
        domains = torch.from_numpy(domains).to(device)
        print_jobs = torch.from_numpy(print_jobs).to(device)
        outputs, domain_outputs = model(images, alpha)
        
        classification_loss = loss_function(outputs, labels)
        domain_loss = domain_loss_function(domain_outputs, domains)
        print_loss = print_loss_function(outputs, labels, print_jobs)
        loss = classification_loss + domain_lambda*domain_loss + print_lambda*print_loss
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        step_loss += classification_loss.item()
        domain_step_loss += domain_loss.item()
        total_step_loss += loss.item()
        print_step_loss += print_loss.item()


        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==labels).item()
        _,domain_pred = torch.max(domain_outputs, dim=1)
        domain_correct += torch.sum(domain_pred==domains).item()
        total += labels.size(0)
        
        current_step += 1
        if current_step % print_every == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Classification Loss: {:.4f}, Domain Loss: {:.4f}, Print Loss: {:.4f}, Total Loss: {:.4f}' 
                   .format(epoch+1, n_epochs, current_step, total_step, step_loss / print_every, 
                           domain_step_loss / print_every, print_step_loss / print_every, total_step_loss / print_every))
            step_loss = 0.0
            domain_step_loss = 0.0
            print_step_loss = 0.0
            total_step_loss = 0.0

    train_loss.append(running_loss/total_step)
    print(f'\ntrain-loss: {np.mean(train_loss):.4f}, train-acc: {(100 * correct/total):.4f}, train-domain-acc: {(100 * domain_correct/total):.4f}')
    scheduler.step()
    if epoch > 2:
        run_testing(epoch)
        torch.save(model.state_dict(), 'TempModel'+str(epoch+1)+'.pt')


Epoch 1
Epoch [1/5], Step [3000/14680.5], Classification Loss: 0.3073, Domain Loss: 0.1129, Print Loss: 0.0047, Total Loss: 0.3936
Epoch [1/5], Step [6000/14680.5], Classification Loss: 2.1657, Domain Loss: 24.5372, Print Loss: 0.0053, Total Loss: 16.9091
Epoch [1/5], Step [9000/14680.5], Classification Loss: 4.8831, Domain Loss: 105.9742, Print Loss: 0.0073, Total Loss: 68.4970
Epoch [1/5], Step [12000/14680.5], Classification Loss: 5.5151, Domain Loss: 153.8866, Print Loss: 0.0046, Total Loss: 97.8655

train-loss: 60.3983, train-acc: 82.7348, train-domain-acc: 34.6225

Epoch 2
Epoch [2/5], Step [3000/14680.5], Classification Loss: 2.7689, Domain Loss: 141.9695, Print Loss: 0.0027, Total Loss: 87.9616
Epoch [2/5], Step [6000/14680.5], Classification Loss: 1.7177, Domain Loss: 72.6740, Print Loss: 0.0021, Total Loss: 45.3306
Epoch [2/5], Step [9000/14680.5], Classification Loss: 1.4215, Domain Loss: 51.5562, Print Loss: 0.0024, Total Loss: 32.3648
Epoch [2/5], Step [12000/14680.5], Cl

KeyboardInterrupt: 

In [110]:
run_testing()

Validating: [                                                  ] 0% || ETA: 0 minutes

  




In [19]:
torch.save(model.state_dict(), 'TempModel1.pt')

In [12]:
model.load_state_dict(torch.load('/kaggle/input/models/TempModel6.pt'))

<All keys matched successfully>

In [17]:
pred = pd.DataFrame(columns=['img_path', 'printer_id', 'print_id', 'has_under_extrusion'])
batch = []
for idx, row in test_df.iterrows():
    if len(batch) == batch_size:
        images, printers, prints, paths = [], [], [], []
        for sample in batch:
            paths.append(sample[0])
            filename = '/kaggle/input/early-detection-of-3d-printing-issues/images/'+sample[0]
            image = Image.open(filename)
            image = resize_image(image)
            images.append(transform(image))
            printers.append(sample[1])
            prints.append(sample[2])
        batch = []
        predictions, probs = predict_test(images, model)
        probs_thresh = probs.max(axis=1)
        for i in range(batch_size):
            if probs_thresh[i] > 0.75:
                temp_data = {'img_path': paths[i], 'printer_id':printers[i], 'print_id':prints[i], 'has_under_extrusion': predictions[i]}
                pred = pd.concat([pred, pd.DataFrame.from_records([temp_data])])
    else:
        batch.append(row)

  


In [18]:
pred.reset_index(drop=True, inplace=True)

In [99]:
len(pred), len(test_df)

(21630, 25279)

In [21]:
prints = pred['print_id'].unique().tolist()
pred['print_id'].unique()

array([1678578332, 1678343246, 1678764144, 1678744931, 1674179039,
       1674179701, 1674180283, 1674180772, 1674181142, 1674182025,
       1674182884, 1674183481, 1674185002, 1674185462, 1674184477,
       1674186076, 1674187131, 1674183959, 1674184223, 1679011090,
       1678831256, 1678452362, 1679222113, 1673025450, 1673032842,
       1673034242, 1673031848, 1673034918, 1673037071, 1673037348,
       1673037665, 1673038169, 1673047933, 1673040188, 1673047525,
       1673040671, 1673041144, 1673041994, 1673043066, 1673043367,
       1673043651, 1673046546, 1672776024, 1672795514], dtype=object)

In [97]:
pred.loc[pred['print_id']==1673034918]['has_under_extrusion'].value_counts()

1    496
0    244
Name: has_under_extrusion, dtype: int64

In [98]:
pred['has_under_extrusion'].loc[pred['print_id']==1673034918] = 1
pred.loc[pred['print_id']==1673034918]['has_under_extrusion'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


1    740
Name: has_under_extrusion, dtype: int64

In [95]:
pred = pred[pred.print_id != 1673037071]

In [100]:
new_df = pd.concat([pred, full_df])

In [101]:
new_df.reset_index(drop=True, inplace=True)

In [102]:
new_df

Unnamed: 0,img_path,printer_id,print_id,has_under_extrusion
0,101/1678578332/1678578538.704966.jpg,101,1678578332,1
1,101/1678578332/1678578539.108019.jpg,101,1678578332,1
2,101/1678578332/1678578539.512872.jpg,101,1678578332,1
3,101/1678578332/1678578539.916711.jpg,101,1678578332,1
4,101/1678578332/1678578540.329089.jpg,101,1678578332,1
...,...,...,...,...
117439,102/1678816535/1678817028.16467.jpg,102,1678816535,1
117440,104/1678415191/1678415719.521451.jpg,104,1678415191,0
117441,101/1678580155/1678580442.074918.jpg,101,1678580155,1
117442,101/1678589738/1678590940.145716.jpg,101,1678589738,1


In [103]:
new_df.to_csv('semi-supervised-train.csv', index=False)