# HuBMAP + HPA kaggle baseline submission

The goal of this notebook is to establish a baseline for the accuracy of 
the models that will be used. It creates random segementation masks with a uniform distribution of 0s and 1s and submits them to the competition for evaluation.
The best score that is accomplished by this method is:<br>
* Public Score: **0.29014**
* Private Score: **0.32931**

_Note: The private score is not shown to the participants until after the competition is concluded and it comprises 50% of the test set_

In [1]:
from fastai.vision.all import *
import numpy as np
import pandas as pd

import os        

In [2]:
DATASET_FOLDER = '../input/hubmap-organ-segmentation'

In [3]:
# Read the training set into a DataFrame 
df_train = pd.read_csv(os.path.join(DATASET_FOLDER, "train.csv"))
df_train.head()

Unnamed: 0,id,organ,data_source,img_height,img_width,pixel_size,tissue_thickness,rle,age,sex
0,10044,prostate,HPA,3000,3000,0.4,4,1459676 77 1462675 82 1465674 87 1468673 92 1471671 97 1474669 101 1477667 105 1480665 109 1483664 115 1486662 119 1489661 121 1492659 125 1495658 127 1498656 131 1501655 133 1504653 137 1507652 139 1510651 140 1513650 142 1516649 143 1519648 145 1522647 146 1525646 148 1528645 149 1531644 151 1534643 152 1537642 153 1540641 155 1543640 156 1546639 157 1549638 158 1552637 160 1555636 161 1558635 162 1561634 163 1564633 165 1567631 167 1570628 171 1573625 175 1576622 179 1579620 182 1582619 184 1585618 186 1588617 188 1591616 190 1594615 191 1597615 191 1600614 192 1603613 193 1606612 194 1...,37.0,Male
1,10274,prostate,HPA,3000,3000,0.4,4,715707 2 718705 8 721703 11 724701 18 727692 34 730692 35 733679 54 736677 58 739676 65 742675 67 745671 76 748669 79 751669 79 754667 87 757663 93 760662 94 763658 113 766657 114 769654 184 772651 202 775650 204 778649 216 781649 216 784646 228 787646 237 790644 241 793643 246 796641 249 799637 260 802636 267 805636 269 808635 274 811634 277 814632 281 817631 285 820631 285 823630 291 826629 294 829627 298 832626 301 835626 302 838624 310 841623 319 844622 321 847621 326 850621 327 853619 335 856618 341 859617 344 862616 349 865616 350 868615 357 871614 365 874614 367 877613 371 880613 37...,76.0,Male
2,10392,spleen,HPA,3000,3000,0.4,4,1228631 20 1231629 24 1234624 40 1237623 47 1240619 61 1243618 63 1246618 63 1249610 82 1252609 87 1255606 93 1258604 99 1261601 105 1264600 109 1267596 122 1270595 125 1273595 127 1276594 130 1279593 135 1282592 140 1285591 145 1288587 157 1291586 162 1294585 169 1297584 172 1300584 174 1303583 177 1306581 181 1309580 195 1309962 121 1312579 198 1312961 124 1315578 200 1315960 128 1318578 201 1318959 131 1321577 202 1321958 134 1324577 210 1324955 139 1327576 212 1327953 143 1330576 222 1330945 153 1333575 227 1333943 157 1336575 528 1339575 532 1342574 535 1345574 536 1348573 537 1351573...,82.0,Male
3,10488,lung,HPA,3000,3000,0.4,4,3446519 15 3449517 17 3452514 20 3455510 24 3458491 43 3461488 46 3464486 48 3467485 49 3470484 50 3473483 51 3476482 52 3479481 53 3482480 54 3485479 55 3488478 57 3491477 59 3494476 61 3497475 62 3500474 63 3503473 64 3506472 65 3509471 67 3512470 68 3515469 69 3518468 70 3521466 73 3524075 19 3524465 76 3527068 28 3527464 78 3530066 30 3530463 79 3533064 33 3533462 80 3536063 34 3536461 82 3539062 37 3539460 85 3542062 38 3542459 87 3545061 41 3545458 91 3548060 43 3548457 95 3551060 44 3551456 96 3554059 45 3554455 98 3557058 47 3557454 99 3560057 48 3560453 102 3563057 49 3563453 103 ...,78.0,Male
4,10610,spleen,HPA,3000,3000,0.4,4,478925 68 481909 87 484893 105 487863 154 490854 169 493845 182 496836 194 499827 205 502820 216 505814 227 508808 237 511803 246 514797 256 517792 264 520786 274 523780 283 526776 290 529773 296 532769 303 535766 309 538763 314 541760 320 544757 326 547753 332 550750 338 553747 343 556744 354 559740 360 562737 365 565734 369 568731 373 571728 377 574724 382 577721 386 580718 395 583715 399 586712 403 589708 408 592705 413 595702 417 598700 421 601698 424 604695 428 607693 432 610690 436 613688 440 616686 443 619683 448 622681 451 625679 455 628676 459 631673 464 634671 467 637667 472 6406...,21.0,Female


In [4]:
def rle_encode_less_memory(img):
''' Convert a mask to rle encoded format
    Input: a mask of unsigned integers of 0s or 1s as a numpy array
    Output: A rle encoded string 
'''
    #the image should be transposed
    pixels = img.T.flatten()
    
    # This simplified method requires first and last pixel to be zero
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    
    return ' '.join(str(x) for x in runs)

In [5]:
# Read the test set into a DataFrame 
df_test = pd.read_csv(os.path.join(DATASET_FOLDER, "test.csv"))
display(df_test.head())

Unnamed: 0,id,organ,data_source,img_height,img_width,pixel_size,tissue_thickness
0,10078,spleen,Hubmap,2023,2023,0.4945,4


In [7]:
# Create the submission DataFrame
submission = pd.read_csv(Path(DATASET_FOLDER)/'sample_submission.csv')
submission = submission.astype({'id': 'int64'})
ds = df_test
for idx in range(ds.shape[0]):
    submission.at[idx, "id"] = ds.iloc[idx]["id"]
    # Height and width of original image of the test set
    h = ds.iloc[idx]["img_height"]
    w = ds.iloc[idx]["img_width"]
    # Create a random uniformly distributed mask of 0s and 1s 
    # of size (height, width)
    mask = torch.rand((h,w))
    mask = (mask > 0.5).to(torch.uint8)
    rle = rle_encode_less_memory(mask.numpy())
    submission.at[idx, "rle"] = rle

submission.head()


Unnamed: 0,id,rle
0,10078,5 1 7 2 10 1 14 2 17 1 20 2 23 3 30 2 33 2 36 1 42 4 47 1 50 1 52 1 57 1 62 2 65 1 67 4 73 1 76 1 78 1 81 1 84 1 86 1 89 1 91 3 95 1 98 2 101 2 107 2 110 2 114 5 122 1 126 2 130 2 133 3 137 1 141 1 146 1 148 2 152 4 158 6 165 3 173 4 179 1 185 1 187 3 191 1 194 7 203 4 208 2 213 2 216 3 221 1 223 2 227 4 232 1 234 1 236 1 239 1 243 1 246 2 249 1 254 4 260 1 264 2 267 1 270 1 272 2 277 3 281 1 284 1 290 3 295 2 298 8 307 6 314 3 318 1 321 1 326 1 329 3 334 1 338 1 346 1 349 2 352 2 355 2 358 1 360 1 364 3 368 1 370 3 378 2 382 3 386 5 392 2 396 2 400 6 407 3 411 3 415 1 417 1 421 1 423 3 43...


In [8]:
# Save the submission file to be evaluated by the competition
submission.to_csv("submission.csv", index=False)