In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
#Use the fastaiV2 version
from fastai.vision.all import *
from zipfile import ZipFile

In [None]:
debug = False

In [None]:
#Copy and extract the train file
with ZipFile('../input/carvana-image-masking-challenge/train.zip', 'r') as zip_ref:
  zip_ref.extractall('')

In [None]:
with ZipFile('../input/carvana-image-masking-challenge/train_masks.zip', 'r') as zip_ref:
  zip_ref.extractall('')

In [None]:
with ZipFile('../input/carvana-image-masking-challenge/sample_submission.csv.zip', 'r') as zip_ref:
  zip_ref.extractall('')

In [None]:
with ZipFile('../input/carvana-image-masking-challenge/test.zip', 'r') as zip_ref:
  zip_ref.extractall('')

In [None]:
#View the file
path = Path('')
fnames = get_image_files(path/'train')
lbl_names = get_image_files(path/'train_masks')

In [None]:
#View train and mask file details
print (fnames[0],lbl_names[0])
get_mask = lambda o:'train_masks/'+str(o.stem)+'_mask.gif' 

In [None]:
img_fn = fnames[random.randint(0,len(fnames))]
im = PILImage.create(img_fn)
im.show(figsize=(5,5))

In [None]:
#Print the paired mask
mask_fn = get_mask(img_fn)
msk = PILMask.create(mask_fn)
msk.show(figsize=(5,5), alpha=1)
msk.shape

In [None]:
#Verify and obtain all the categories of masks,
#for semantic segmentation, there should be several outcomes for a few classes.
#This step, when running at the first time, needs to perform a full operation;
#later it can be sampled. But the full amount is time-consuming
def n_codes(fnames, is_partial=True):
  "Gather the codes from a list of `fnames`"
  vals = set()
  if is_partial:
    random.shuffle(fnames)
    fnames = fnames[:10]
  for fname in fnames:
    msk = np.array(PILMask.create(fname))
    for val in np.unique(msk):
      if val not in vals:
        vals.add(val)
  vals = list(vals)
  p2c = dict()
  for i,val in enumerate(vals):
    p2c[i] = vals[i]
  return p2c
p2c=n_codes(lbl_names)
p2c

In [None]:
#Classify the values in the label as 0, 1, etc
def get_mask2(fn,p2c=n_codes(lbl_names)):
    fn = 'train_masks/'+str(fn.stem)+'_mask.gif'
    msk = np.array(PILMask.create(fn))
    mx = np.max(msk)
    for i,val in enumerate(p2c):
        msk[msk == p2c[i]] = val
    return PILMask.create(msk)

In [None]:
#Generate a DataBlock
binary = DataBlock(blocks=(ImageBlock, MaskBlock( ['Background', 'car'])),    
                   get_items=get_image_files,    #x is obtained in get_image_files
                   splitter=RandomSplitter(),    #Random segmentation
                   get_y=get_mask2,              #The method of getting y
                   item_tfms=Resize((512,512),ResizeMethod.Squish),# “Squish” isimportant     
                   batch_tfms=[Normalize.from_stats(*imagenet_stats)])   

In [None]:
#Read the picture and display the sample
dls = binary.dataloaders(path/'train',bs=3)
dls.show_batch(cmap='Greens', vmin=0, vmax=1)

In [None]:
#Select the model, for semantic segmentation, metrics generally choose Dice; 
#to_fp16 ( ) is a hybrid-precision model that increases training speed
learn = unet_learner(dls,resnet34,metrics = Dice).to_fp16()

In [None]:
#Select lr and start training. The higher the DICE value, the better
if(debug):
    learn.fit_one_cycle(1)
else:
    learn.fit_one_cycle(12)
learn.recorder.plot_loss()

In [None]:
#The model has been successfully trained and saved
learn.save('carvana_stage1')

In [None]:
#Read the sample_submission
submit_mask = pd.read_csv('sample_submission.csv')
submit_mask

In [None]:
#Verify that the test directory and csv data are consistent
if(debug):
    for idx,name in (enumerate(submit_mask['img'].iloc[:])):
        name =  'test/'+ str(name)
        if(not(os.path.exists(name))):
            print (idx,name)
print('done')

In [None]:
test_csv = 'test/'+submit_mask['img']
test_csv

In [None]:
def rle_encode(mask):
    pixels = mask.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]
    return runs

In [None]:
#To avoid overflow, the method of batch infer is used
for ibatch in range(100):
    test_csv_part = test_csv[1000*ibatch:1000*(ibatch+1)]
    test_dl = learn.dls.test_dl(test_csv_part)
    preds=[]
    if(not debug):
        preds = learn.get_preds(dl=test_dl)
    print(ibatch,time.time())
    for idx in range(1000):
        if(debug):
            rle = 'rle'
        else:
            submit_np = np.array(preds[0][idx][0]<0.5).astype(np.uint8) 
            msk = PILMask.create(submit_np)
            #The middle upside down one more time is due to 
            #the observation of npresize causing the image to be distorted
            msk = msk.resize((1918,1280),Image.ANTIALIAS)  
            submit_np2 = np.array(msk)
            rle = rle_encode(submit_np2)
            rle = ' '.join(str(x) for x in rle)
        submit_mask['rle_mask'][1000*ibatch+idx]=rle


In [None]:
submit_mask.to_csv('submission0.csv',index=False, header=True)
print('end 0')

In [None]:
test_csv_part = test_csv[100000:100064]
test_dl = learn.dls.test_dl(test_csv_part)
preds=[]
if(not debug):
    preds = learn.get_preds(dl=test_dl)
print(ibatch,time.time())
for idx in range(64):
    if(debug):
        rle = 'rle'
    else:
        submit_np = np.array(preds[0][idx][0]<0.5).astype(np.uint8) 
        msk = PILMask.create(submit_np)
        #The middle upside down one more time is due to 
        #the observation of npresize causing the image to be distorted
        msk = msk.resize((1918,1280),Image.ANTIALIAS)  
        submit_np2 = np.array(msk)
        rle = rle_encode(submit_np2)
        rle = ' '.join(str(x) for x in rle)
    submit_mask['rle_mask'][100000+idx]=rle

In [None]:
submit_mask.to_csv('submission.csv',index=False, header=True)
print('end')