In [136]:
from fastbook import *
from fastai.vision import *
from kaggle import api

In [2]:
IMG_HEIGHT, IMG_WIDTH = 1400, 2100
IMG_HEIGHT_TEST, IMG_WIDTH_TEST = 350, 525

clouds_path = URLs.path('clouds')
train_images_path = clouds_path/'train_images'
test_img_files = get_image_files(clouds_path/'test_images')
submissions_path = clouds_path/'submissions'

df_train = pd.read_csv(Path(clouds_path/'train.csv'))

In [3]:
df_train.head()

Unnamed: 0,Image_Label,EncodedPixels
0,0011165.jpg_Fish,264918 937 266318 937 267718 937 269118 937 270518 937 271918 937 273318 937 274718 937 276118 937 277518 937 278918 937 280318 937 281718 937 283118 937 284518 937 285918 937 287318 937 288718 937 290118 937 291518 937 292918 937 294318 937 295718 937 297118 937 298518 937 299918 937 301318 937 302718 937 304118 937 305518 937 306918 937 308318 937 309718 937 311118 937 312518 937 313918 937 315318 937 316718 937 318118 937 319518 937 320918 937 322318 937 323718 937 325118 937 326518 937 327918 937 329318 937 330718 937 332118 937 333518 937 334918 937 336318 937 337718 937 339118 937 34...
1,0011165.jpg_Flower,1355565 1002 1356965 1002 1358365 1002 1359765 1002 1361165 1002 1362565 1002 1363965 1002 1365365 1002 1366765 1002 1368165 1002 1369565 1002 1370965 1002 1372365 1002 1373765 1002 1375165 1002 1376565 1002 1377965 1002 1379365 1002 1380765 1002 1382165 1002 1383565 1002 1384965 1002 1386365 1002 1387765 1002 1389165 1002 1390565 1002 1391965 1002 1393365 1002 1394765 1002 1396165 1002 1397565 1002 1398965 1002 1400365 1002 1401765 1002 1403165 1002 1404565 1002 1405965 1002 1407365 1002 1408765 1002 1410165 1002 1411565 1002 1412965 1002 1414365 1002 1415765 1002 1417165 1002 1418565 100...
2,0011165.jpg_Gravel,
3,0011165.jpg_Sugar,
4,002be4f.jpg_Fish,233813 878 235213 878 236613 878 238010 881 239410 881 240810 881 242210 881 243610 881 245010 881 246410 881 247810 881 249210 881 250610 881 252010 881 253410 881 254810 881 256210 881 257610 881 259010 881 260410 881 261810 881 263210 881 264610 881 266010 881 267410 881 268810 881 270210 881 271610 881 273010 881 274410 881 275810 881 277210 881 278610 881 280010 881 281410 881 282810 881 284210 881 285610 881 287010 881 288410 881 289810 881 291210 881 292610 881 294010 881 295410 881 296810 881 298210 881 299610 881 301010 881 302410 881 303810 881 305210 881 306610 881 308010 881 30...


In [4]:
cloud_types = ['Fish', 'Flower', 'Gravel', 'Sugar']

### Functions to load data from files

In [5]:
def rle2mask(encoded_pixels):
    mask = torch.zeros(IMG_HEIGHT * IMG_WIDTH, dtype=torch.uint8)
    encoded_pixels = encoded_pixels.split()
    
    # - 1 if for converting one-indexed to zero-indexed pixels
    start_pixles = [int(encoded_pixels[i]) - 1 for i in range(0, len(encoded_pixels), 2)]
    lengths = [int(encoded_pixels[i]) for i in range(1, len(encoded_pixels), 2)]
    
    for start_px, length in zip(start_pixles, lengths):
        mask[start_px:start_px + length] = 1
    
    return mask.reshape((IMG_WIDTH, IMG_HEIGHT)).T

In [6]:
def get_x(row):
    fname = row['Image_Label'].split('_')[0]
    return train_images_path/fname

In [7]:
def get_y(row):
    encoded_pixels = str(row['EncodedPixels'])
    if encoded_pixels =='nan':
        return torch.zeros((IMG_HEIGHT, IMG_WIDTH), dtype=torch.uint8)
    return rle2mask(encoded_pixels)

### Train model for each cloud type

In [8]:
splitter=RandomSplitter(seed=42)

In [9]:
def get_learner(cloud_type):
    df_train_one_type = df_train[df_train['Image_Label'].apply(lambda x: cloud_type in x)]
    dblock = DataBlock(
        blocks=(ImageBlock, MaskBlock(codes=['no_clouds', cloud_type])),
        get_x=get_x, 
        get_y=get_y,
        splitter=splitter,
        item_tfms=RatioResize(IMG_WIDTH_TEST//2),
    )
    dls = dblock.dataloaders(df_train_one_type, bs=16)
    return unet_learner(dls, resnet34)

In [10]:
def train(cloud_type, save=True):
    learn = get_learner(cloud_type)
    learn.fine_tune(3)
    if save:
        learn.save(f'first_model_{cloud_type}')
    return learn

In [11]:
def load(cloud_type):
    learn = get_learner(cloud_type)
    learn = learn.load(f'first_model_{cloud_type}')
    return learn

In [12]:
cloud_type_models = {cloud_type: load(cloud_type) for cloud_type in cloud_types}

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [12]:
cloud_type_models = {cloud_type: train(cloud_type) for cloud_type in cloud_types}

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


epoch,train_loss,valid_loss,time
0,0.364374,0.3343,02:53


epoch,train_loss,valid_loss,time
0,0.337661,0.350523,03:00
1,0.28062,0.286822,03:00
2,0.180602,0.330955,02:59


epoch,train_loss,valid_loss,time
0,0.308858,0.277261,02:52


epoch,train_loss,valid_loss,time
0,0.236189,0.240275,02:59
1,0.196628,0.231562,02:59
2,0.132944,0.267327,03:01


epoch,train_loss,valid_loss,time
0,0.481592,0.407555,02:52


epoch,train_loss,valid_loss,time
0,0.328678,0.323166,03:00
1,0.283623,0.297906,02:59
2,0.210927,0.318786,03:00


epoch,train_loss,valid_loss,time
0,0.357592,0.354764,02:53


epoch,train_loss,valid_loss,time
0,0.302644,0.308183,03:00
1,0.279093,0.296636,03:01
2,0.197673,0.324008,03:01


### Get predictions for test images

In [13]:
def mask2rle(mask):
    mask_one_dim = torch.cat((mask.T.reshape((-1,)), tensor([0])))
    mask_shifted = torch.cat((tensor([0]), mask_one_dim[:-1]))
    diff = mask_one_dim - mask_shifted
    starts = torch.where(diff == 1)[0]
    ends = torch.where(diff == -1)[0]
    lengths = ends - starts
    start_pixels = starts + 1
    encoded_pixels = itertools.chain(*zip(start_pixels.tolist(), lengths.tolist()))
    return ' '.join(map(str, encoded_pixels))

In [14]:
def get_test_preds(learn):
    test_dl = learn.dls.test_dl(test_img_files)
    preds = learn.get_preds(dl=test_dl)
    return torch.argmax(preds[0], dim=1)

In [90]:
def resize_to_submit_size(mask):
    mask = PILMask.create(TensorMask(mask)).resize((IMG_WIDTH_TEST, IMG_HEIGHT_TEST))
    return TensorMask(mask)

In [91]:
def pred_mask2rle(pred_mask):
    pred_mask = resize_to_submit_size(pred_mask)
    return mask2rle(pred_mask)

In [52]:
submission_dict = {'Image_Label': [], 'EncodedPixels': []}

for cloud_type in cloud_types:
    print(cloud_type)
    mask_preds = get_test_preds(cloud_type_models[cloud_type])
    for file_path, mask_pred in zip(test_img_files, mask_preds):
        rle_pred = pred_mask2rle(mask_pred)
        img_label = f'{file_path.name}_{cloud_type}'
        submission_dict['EncodedPixels'].append(rle_pred)
        submission_dict['Image_Label'].append(img_label)

Fish


Flower


Gravel


Sugar


In [55]:
submission_df = pd.DataFrame(submission_dict, columns=['Image_Label', 'EncodedPixels'])

In [65]:
submission_df.head()

Unnamed: 0,Image_Label,EncodedPixels
0,719307c.jpg_Fish,57 14 75 12 407 14 425 12 753 43 1103 43 1433 77 1783 77 2129 91 2479 91 2823 107 3173 107 3521 109 3871 109 4219 111 4569 111 4917 115 5267 115 5613 121 5963 121 6313 121 6663 121 7013 123 7363 123 7713 123 8063 123 8413 123 8763 123 9113 123 9463 123 9813 123 10163 123 10513 125 10863 125 11211 127 11561 127 11911 127 12261 127 12611 127 12961 127 13309 129 13659 129 14009 129 14214 4 14238 2 14359 129 14564 4 14588 2 14709 129 14914 4 14938 2 14942 6 15059 129 15264 4 15288 2 15292 6 15409 129 15610 8 15642 6 15658 7 15759 129 15960 8 15992 6 16008 7 16109 129 16310 8 16342 8 16358 7 16...
1,c9ac078.jpg_Fish,
2,39c9ac1.jpg_Fish,55373 6 55385 2 55390 8 55723 6 55735 2 55740 8 56077 37 56427 37 56769 49 57119 49 57445 4 57461 61 57795 4 57811 61 58133 18 58153 2 58157 65 58483 18 58503 2 58507 65 58837 89 59187 89 59537 91 59887 91 60233 97 60583 97 60933 97 61283 97 61629 101 61979 101 62329 101 62679 101 63025 105 63375 105 63725 107 64075 107 64425 105 64775 105 65123 107 65473 107 65821 111 66171 111 66521 111 66871 111 67221 111 67571 111 67921 111 68271 111 68621 111 68971 111 69321 109 69671 109 70021 111 70371 111 70721 107 71071 107 71421 109 71771 109 72121 109 72471 109 72821 109 73171 109 73521 109 7387...
3,881fab1.jpg_Fish,
4,5fa934a.jpg_Fish,40001 3 40351 3 40696 36 41046 36 41394 40 41744 40 42087 53 42437 53 42785 57 43135 57 43481 61 43831 61 44171 6 44181 61 44521 6 44531 61 44869 69 45219 69 45569 67 45919 67 46271 61 46621 61 46973 3 46979 51 47323 3 47329 51 47681 17 47704 4 47710 20 48031 17 48054 4 48060 20 48381 17 48402 28 48731 17 48752 28 49081 17 49110 2 49114 16 49431 17 49460 2 49464 16 49781 17 49806 24 50131 17 50156 24 50475 23 50510 20 50825 23 50860 20 51175 25 51210 20 51525 25 51560 20 51875 25 51910 20 52225 25 52260 20 52575 25 52612 18 52925 25 52962 18 53275 25 53314 16 53625 25 53664 16 53973 29 540...


In [131]:
submission_df.to_csv(submissions_path/'submission.csv', index=False)

In [132]:
import os
kaggle_data={"username":"","key":""}
os.environ['KAGGLE_USERNAME']=kaggle_data["username"]
os.environ['KAGGLE_KEY']=kaggle_data["key"]

In [134]:
!kaggle competitions submit -c understanding_cloud_organization -f submissions_path/'submission.csv' -m "first submission"

Traceback (most recent call last):
  File "/opt/conda/envs/fastai/bin/kaggle", line 8, in <module>
    sys.exit(main())
  File "/opt/conda/envs/fastai/lib/python3.8/site-packages/kaggle/cli.py", line 67, in main
    out = args.func(**command_args)
  File "/opt/conda/envs/fastai/lib/python3.8/site-packages/kaggle/api/kaggle_api_extended.py", line 561, in competition_submit_cli
    submit_result = self.competition_submit(file_name, message,
  File "/opt/conda/envs/fastai/lib/python3.8/site-packages/kaggle/api/kaggle_api_extended.py", line 513, in competition_submit
    content_length=os.path.getsize(file_name),
  File "/opt/conda/envs/fastai/lib/python3.8/genericpath.py", line 50, in getsize
    return os.stat(filename).st_size
FileNotFoundError: [Errno 2] No such file or directory: 'submissions_path/submission.csv'


In [138]:
api.competition_submit_cli(submissions_path/'submission.csv', 'first submission', 'understanding_cloud_organization')

100%|██████████| 22.2M/22.2M [00:09<00:00, 2.46MB/s]


Successfully submitted to Understanding Clouds from Satellite Images