In [1]:
%load_ext autoreload
%autoreload 2

In [50]:
import os, sys
import glob
import warnings
from tqdm import tqdm_notebook as tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from collections import Counter
from skmultilearn.model_selection import iterative_train_test_split
from skmultilearn.model_selection.measures import get_combination_wise_output_matrix

from skimage import io

from fastai.vision import *

In [3]:
data_df = pd.read_csv('./data/train.csv')
data_df

Unnamed: 0,ImageId_ClassId,EncodedPixels
0,0002cc93b.jpg_1,29102 12 29346 24 29602 24 29858 24 30114 24 3...
1,0002cc93b.jpg_2,
2,0002cc93b.jpg_3,
3,0002cc93b.jpg_4,
4,00031f466.jpg_1,
5,00031f466.jpg_2,
6,00031f466.jpg_3,
7,00031f466.jpg_4,
8,000418bfc.jpg_1,
9,000418bfc.jpg_2,


## Split

In [20]:
defects_df = []
for i in range(0, len(data_df), 4):
    defi = {}
    defi['ImageId_ClassId'] = data_df.loc[i, 'ImageId_ClassId'][:-2]
    defi['1'] = int(not pd.isnull(data_df.loc[i, 'EncodedPixels']))
    defi['2'] = int(not pd.isnull(data_df.loc[i+1, 'EncodedPixels']))
    defi['3'] = int(not pd.isnull(data_df.loc[i+2, 'EncodedPixels']))
    defi['4'] = int(not pd.isnull(data_df.loc[i+3, 'EncodedPixels']))
    defects_df.append(defi)
defects_df = pd.DataFrame(defects_df)[['ImageId_ClassId', '1', '2', '3', '4']]
# defects_df.to_csv('./data/defect_types.csv', index=False)

In [21]:
defects_df[defects_df.columns[1:]].sum()

1     897
2     247
3    5150
4     801
dtype: int64

In [7]:
print(len(defects_df))
Counter(combination for row in get_combination_wise_output_matrix(defects_df[['1', '2', '3', '4']].to_numpy(), order=2) for combination in row)

12568


Counter({(0, 0): 897,
         (2, 2): 5150,
         (3, 3): 801,
         (2, 3): 284,
         (0, 1): 37,
         (1, 1): 247,
         (0, 2): 93,
         (1, 2): 16,
         (1, 3): 1})

In [13]:
Xd = np.expand_dims(np.array(range(len(defects_df))), 1)
X_train, y_train, X_test, y_test = iterative_train_test_split(Xd, defects_df[['1', '2', '3', '4']].to_numpy(), test_size = 0.2)
defects_df['is_valid'] = False
defects_df.loc[X_test[:,0], 'is_valid'] = True

In [14]:
pd.DataFrame({
    'train': Counter(str(combination) for row in get_combination_wise_output_matrix(y_train, order=2) for combination in row),
    'test' : Counter(str(combination) for row in get_combination_wise_output_matrix(y_test, order=2) for combination in row)
}).T.fillna(0.0)

Unnamed: 0,"(0, 0)","(0, 1)","(0, 2)","(1, 1)","(1, 2)","(1, 3)","(2, 2)","(2, 3)","(3, 3)"
train,718.0,30.0,74.0,198.0,13.0,1.0,4120.0,227.0,641.0
test,179.0,7.0,19.0,49.0,3.0,0.0,1030.0,57.0,160.0


## Mask Generation

In [9]:
def rle2mask(rle, imgshape):
    width = imgshape[0]
    height= imgshape[1]
    
    mask= np.zeros( width*height ).astype(np.uint8)
    
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = 1
        current_position += lengths[index]
        
    return np.flipud( np.rot90( mask.reshape(height,width), k=1 ) )

In [10]:
for i in range(0,len(data_df),4):
    r1 = data_df['EncodedPixels'][i]
    r2 = data_df['EncodedPixels'][i+1]    
    r3 = data_df['EncodedPixels'][i+2]    
    r4 = data_df['EncodedPixels'][i+3]
    if (pd.isnull(r1)+pd.isnull(r2)+pd.isnull(r3)+pd.isnull(r4)) < 2:
        print(i)

43212
47104


In [11]:
def addmask(fullmask, rle, def_id):
    rle = '' if pd.isnull(rle) else rle
    mask = rle2mask(rle, (256, 1600, 3))
    assert(fullmask[mask!=0].sum() == 0)
    fullmask[mask!=0] = def_id
    return fullmask

In [12]:
plt.figure(figsize=(16,8))
for i in tqdm(range(0, len(data_df), 4)):
#     i=47104
    id = data_df.loc[i, 'ImageId_ClassId'].replace('.jpg', '')
    img = io.imread('./data/train_images/' + id[:-2] + '.jpg')
#     plt.imshow(img)
#     print(img.shape)
    mask = np.zeros(img.shape[:-1], dtype='uint8')
    mask = addmask(mask, data_df.loc[i, 'EncodedPixels'], 1)
    mask = addmask(mask, data_df.loc[i+1, 'EncodedPixels'], 2)
    mask = addmask(mask, data_df.loc[i+2, 'EncodedPixels'], 3)
    mask = addmask(mask, data_df.loc[i+3, 'EncodedPixels'], 4)
    
    readmask = io.imread('./data/train_masks/' + id[:-2] + '.png', mask)
    assert(np.array_equal(mask, readmask))

#     print(mask.sum())
#     plt.imshow(mask, alpha=0.2)
#     break
#     with warnings.catch_warnings():
#         warnings.simplefilter("ignore")
#         io.imsave('./data/train_masks/' + id[:-2] + '.png', mask)

HBox(children=(IntProgress(value=0, max=12568), HTML(value='')))




<Figure size 1152x576 with 0 Axes>

## Dataset

In [53]:
valid_files = pd.read_csv('./data/split.csv')
valid_files = set(valid_files[valid_files['is_valid'] == True]['ImageId_ClassId'])

In [54]:
src = (SegmentationItemList.from_folder('./data/train_images/')
       .split_by_files(valid_files))
#        .label_from_func(get_y_fn, classes=codes))

In [55]:
src

ItemLists;

Train: SegmentationItemList (10054 items)
Image (3, 256, 1600),Image (3, 256, 1600),Image (3, 256, 1600),Image (3, 256, 1600),Image (3, 256, 1600)
Path: data/train_images;

Valid: SegmentationItemList (2514 items)
Image (3, 256, 1600),Image (3, 256, 1600),Image (3, 256, 1600),Image (3, 256, 1600),Image (3, 256, 1600)
Path: data/train_images;

Test: None

In [77]:
np.unique(mask)

array([0, 1, 2, 3], dtype=uint8)

In [8]:
a = np.array([1,1,0,0])

In [9]:
b = np.array([0,1,1,0])

In [11]:
a[b!=0].sum()

1

In [11]:
globals()['mxresnet']

<module 'models.mxresnet' from '/home/litemax/kaggle/severstal-steel/models/mxresnet.py'>

In [6]:
Net = getattr(model_list, 'mxresnet18')

In [7]:
Net()

functools.partial(<function mxresnet at 0x7f7176904378>, expansion=1, n_layers=[2, 2, 2, 2], name='mxresnet18')