In [0]:
%%capture
!pip install -U catalyst
!pip install albumentations
!pip install pretrainedmodels
!pip install efficientnet-pytorch

# Importing packages

---



In [1]:
import torch
import torch.nn as nn

import pandas as pd
import numpy as np


import catalyst
from catalyst.dl import utils


alchemy not available, to install alchemy, run `pip install alchemy-catalyst`.
alchemy not available, to install alchemy, run `pip install alchemy-catalyst`.


# Test augmentations

In [2]:
import albumentations as albu
from albumentations.pytorch import ToTensor

BORDER_CONSTANT = 0
BORDER_REFLECT = 2

def pre_transforms(image_size=224):
    # Convert the image to a square of size image_size x image_size
    # (keeping aspect ratio)
    result = [
        albu.LongestMaxSize(max_size=image_size),
        albu.PadIfNeeded(image_size, image_size, border_mode=BORDER_CONSTANT)
    ]
    
    return result

def hard_transforms():
    result = [
        # Random shifts, stretches and turns with a 50% probability
        albu.ShiftScaleRotate( 
            shift_limit=0.1,
            scale_limit=0.1,
            rotate_limit=15,
            border_mode=BORDER_REFLECT,
            p=0.5
        ),
        albu.VerticalFlip(),
        albu.IAAPerspective(scale=(0.02, 0.05), p=0.3),
        # Random brightness / contrast with a 30% probability
        albu.RandomBrightnessContrast(
            brightness_limit=0.2, contrast_limit=0.2, p=0.3
        ),
        # Random gamma changes with a 30% probability
        albu.RandomGamma(gamma_limit=(85, 115), p=0.3),
        # Randomly changes the hue, saturation, and color value of the input image
        albu.HueSaturationValue(p=0.3),
        albu.JpegCompression(quality_lower=80),
    ]
    
    return result

def post_transforms():
    # we use ImageNet image normalization
    # and convert it to torch.Tensor
    return [albu.Normalize(), ToTensor()]

def compose(transforms_to_compose):
    # combine all augmentations into one single pipeline
    result = albu.Compose([
      item for sublist in transforms_to_compose for item in sublist
    ])
    return result


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject


unclosed file <_io.BufferedReader name='/usr/local/lib/python3.6/dist-packages/torchvision/_C.so'>



In [0]:
from catalyst.data import Augmentor

valid_transforms = compose([pre_transforms(), post_transforms()])
valid_data_transforms = Augmentor(
    dict_key="features",
    augment_fn=lambda x: valid_transforms(image=x)["image"]
)

# Test Loader

In [9]:
test_df = pd.read_csv('test.csv')
test_df.head()

Unnamed: 0,filepath
0,ICLR/test/test/008FWT.JPG
1,ICLR/test/test/00AQXY.JPG
2,ICLR/test/test/01OJZX.JPG
3,ICLR/test/test/07OXKK.jfif
4,ICLR/test/test/085IEC.jpg


In [0]:
test_data = test_df.to_dict('records')

In [0]:
from catalyst.data import ImageReader, ScalarReader, ReaderCompose
test_loader = utils.get_loader(
        test_data,
        open_fn=ReaderCompose([
                ImageReader(
                input_key="filepath",
                output_key="features",
                rootpath=None
            )        
        ]),
        dict_transform=valid_data_transforms,
        batch_size=64,
        num_workers=4,
        shuffle=False, 
        sampler=None,
        drop_last=False,
    )

# Model

In [0]:
from efficientnet_pytorch import EfficientNet

def get_model(n_classes):
    model = EfficientNet.from_pretrained('efficientnet-b0') 
    in_features = model._fc.in_features
    model._fc = torch.nn.Linear(in_features, n_classes)
    return model

In [6]:
model = get_model(3)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/checkpoints/efficientnet-b0-355c32eb.pth


HBox(children=(FloatProgress(value=0.0, max=21388428.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b0


# Prediction

In [0]:
from catalyst.dl import SupervisedRunner
device = utils.get_device()
runner = SupervisedRunner(device=device,input_target_key="targets_one_hot")

In [13]:
runner_out = runner.predict_loader(
    model, test_loader, resume=f"drive/My Drive/DS/wheat_rust/model1_best.pth"
)


numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192


can't resolve package from __spec__ or __package__, falling back on __name__ and __path__


unclosed file <_io.TextIOWrapper name='/root/.keras/keras.json' mode='r' encoding='UTF-8'>



=> loading checkpoint drive/My Drive/DS/wheat_rust/model1_best.pth
loaded checkpoint drive/My Drive/DS/wheat_rust/model1_best.pth (global epoch 8, epoch 8, stage train)


In [14]:
runner_out.shape

(610, 3)

In [15]:
runner_out

array([[-4.681852 ,  0.6134349, -1.2900218],
       [-5.433887 , -4.927367 ,  4.3162065],
       [-4.681852 ,  0.6134349, -1.2900218],
       ...,
       [-6.535986 , -6.8402276,  5.714055 ],
       [-4.986258 , -1.9813712,  2.063176 ],
       [-2.7113864, -1.0770136, -0.6659966]], dtype=float32)

In [16]:
out = torch.nn.Softmax(dim=1)(torch.from_numpy(runner_out)).numpy()
out

array([[4.3456522e-03, 8.6650032e-01, 1.2915404e-01],
       [5.8280184e-05, 9.6716249e-05, 9.9984503e-01],
       [4.3456522e-03, 8.6650032e-01, 1.2915404e-01],
       ...,
       [4.7848812e-06, 3.5297217e-06, 9.9999166e-01],
       [8.5223158e-04, 1.7201381e-02, 9.8194641e-01],
       [7.2158322e-02, 3.6990103e-01, 5.5794060e-01]], dtype=float32)

In [17]:
test_df['ID'] = test_df['filepath'].map(lambda x: str(x).split('/')[-1].split('.')[0])
test_df.head()

Unnamed: 0,filepath,ID
0,ICLR/test/test/008FWT.JPG,008FWT
1,ICLR/test/test/00AQXY.JPG,00AQXY
2,ICLR/test/test/01OJZX.JPG,01OJZX
3,ICLR/test/test/07OXKK.jfif,07OXKK
4,ICLR/test/test/085IEC.jpg,085IEC


In [0]:
# Remember:
tag_to_label = {'healthy_wheat': 0, 'leaf_rust': 1, 'stem_rust': 2}

In [19]:
out[:,1].shape

(610,)

In [20]:
test_df['leaf_rust'] = out[:,1]
test_df.head()

Unnamed: 0,filepath,ID,leaf_rust
0,ICLR/test/test/008FWT.JPG,008FWT,0.8665
1,ICLR/test/test/00AQXY.JPG,00AQXY,9.7e-05
2,ICLR/test/test/01OJZX.JPG,01OJZX,0.8665
3,ICLR/test/test/07OXKK.jfif,07OXKK,0.00718
4,ICLR/test/test/085IEC.jpg,085IEC,0.013442


In [21]:
test_df['stem_rust'] = out[:,2]
test_df.head()

Unnamed: 0,filepath,ID,leaf_rust,stem_rust
0,ICLR/test/test/008FWT.JPG,008FWT,0.8665,0.129154
1,ICLR/test/test/00AQXY.JPG,00AQXY,9.7e-05,0.999845
2,ICLR/test/test/01OJZX.JPG,01OJZX,0.8665,0.129154
3,ICLR/test/test/07OXKK.jfif,07OXKK,0.00718,0.024959
4,ICLR/test/test/085IEC.jpg,085IEC,0.013442,0.002227


In [22]:
test_df['healthy_wheat'] = out[:,0]
test_df.head()

Unnamed: 0,filepath,ID,leaf_rust,stem_rust,healthy_wheat
0,ICLR/test/test/008FWT.JPG,008FWT,0.8665,0.129154,0.004346
1,ICLR/test/test/00AQXY.JPG,00AQXY,9.7e-05,0.999845,5.8e-05
2,ICLR/test/test/01OJZX.JPG,01OJZX,0.8665,0.129154,0.004346
3,ICLR/test/test/07OXKK.jfif,07OXKK,0.00718,0.024959,0.967861
4,ICLR/test/test/085IEC.jpg,085IEC,0.013442,0.002227,0.984331


In [23]:
test_df.shape

(610, 5)

In [24]:
sub_df = test_df.copy()
sub_df.drop(columns=['filepath'], inplace=True)
sub_df.head()

Unnamed: 0,ID,leaf_rust,stem_rust,healthy_wheat
0,008FWT,0.8665,0.129154,0.004346
1,00AQXY,9.7e-05,0.999845,5.8e-05
2,01OJZX,0.8665,0.129154,0.004346
3,07OXKK,0.00718,0.024959,0.967861
4,085IEC,0.013442,0.002227,0.984331


In [0]:
sub_df.to_csv('submission_model1.csv', index=False)

# Getting pseudo labels

In [26]:
test_df.shape

(610, 5)

In [27]:
pseudo_df = test_df[(test_df.leaf_rust>0.75) | (test_df.stem_rust>0.75) | (test_df.healthy_wheat>0.75)]
pseudo_df.head()

Unnamed: 0,filepath,ID,leaf_rust,stem_rust,healthy_wheat
0,ICLR/test/test/008FWT.JPG,008FWT,0.8665,0.129154,0.004346
1,ICLR/test/test/00AQXY.JPG,00AQXY,9.7e-05,0.999845,5.8e-05
2,ICLR/test/test/01OJZX.JPG,01OJZX,0.8665,0.129154,0.004346
3,ICLR/test/test/07OXKK.jfif,07OXKK,0.00718,0.024959,0.967861
4,ICLR/test/test/085IEC.jpg,085IEC,0.013442,0.002227,0.984331


In [28]:
pseudo_df.shape

(537, 5)

In [29]:
pseudo_df['label'] = -1
pseudo_df['class'] = pseudo_df[['leaf_rust', 'stem_rust', 'healthy_wheat']].idxmax(axis=1)
pseudo_df['label'] = pseudo_df['class'].map(lambda x: tag_to_label[x])
pseudo_df.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,filepath,ID,leaf_rust,stem_rust,healthy_wheat,label,class
0,ICLR/test/test/008FWT.JPG,008FWT,0.8665,0.129154,0.004346,1,leaf_rust
1,ICLR/test/test/00AQXY.JPG,00AQXY,9.7e-05,0.999845,5.8e-05,2,stem_rust
2,ICLR/test/test/01OJZX.JPG,01OJZX,0.8665,0.129154,0.004346,1,leaf_rust
3,ICLR/test/test/07OXKK.jfif,07OXKK,0.00718,0.024959,0.967861,0,healthy_wheat
4,ICLR/test/test/085IEC.jpg,085IEC,0.013442,0.002227,0.984331,0,healthy_wheat


In [30]:
pseudo_df = pseudo_df[['class', 'filepath', 'label']]
pseudo_df.head()

Unnamed: 0,class,filepath,label
0,leaf_rust,ICLR/test/test/008FWT.JPG,1
1,stem_rust,ICLR/test/test/00AQXY.JPG,2
2,leaf_rust,ICLR/test/test/01OJZX.JPG,1
3,healthy_wheat,ICLR/test/test/07OXKK.jfif,0
4,healthy_wheat,ICLR/test/test/085IEC.jpg,0


In [0]:
pseudo_df.to_csv('pseudo_labels1.csv', index=False)