## Import

In [1]:
import numpy as np
import os
import json
import matplotlib.pyplot as plt
import tifffile as tiff 
import cv2
import pandas as pd
from tqdm import tqdm
import shutil

## Read data files

In [13]:
datapath = '/workspace/Competition/landmap/01_DATA'
trainpath = os.path.join(datapath,'train')
testpath = os.path.join(datapath,'test')

In [14]:
train_meta_path = os.path.join(trainpath, 'label', 'meta_converted')
train_poly_path = os.path.join(trainpath, 'label', 'polygon')
train_raw_path = os.path.join(trainpath, 'raw')

test_meta_path = os.path.join(testpath, 'label', 'meta_converted')
test_poly_path = os.path.join(testpath, 'label', 'polygon')
test_raw_path = os.path.join(testpath, 'raw')

train_meta_files = os.listdir(train_meta_path)
train_poly_files = os.listdir(train_poly_path)
train_raw_files = os.listdir(train_raw_path)
train_meta_files = [x for x in train_meta_files if not x.startswith('.')]
train_poly_files = [x for x in train_poly_files if not x.startswith('.')]
train_raw_files = [x for x in train_raw_files if not x.startswith('.')]

test_meta_files = os.listdir(test_meta_path)
test_poly_files = os.listdir(test_poly_path)
test_raw_files = os.listdir(test_raw_path)
test_meta_files = [x for x in test_meta_files if not x.startswith('.')]
test_poly_files = [x for x in test_poly_files if not x.startswith('.')]
test_raw_files = [x for x in test_raw_files if not x.startswith('.')]

## Make mask files

In [15]:
# Function to change 
def coord_to_points(coords, origin, resolution):
    new_coord = []
    for point in coords:
        npoint = (max(0,point[0]-origin[0]), max(0,origin[1]-point[1]))
        new_coord.append(npoint)
    xes = [int(np.round(x[0]/resolution)) for x in new_coord]
    yes = [int(np.round(x[1]/resolution)) for x in new_coord]
    return xes,yes
    

In [19]:
building_raw_path = os.path.join(testpath,'raw_buildings')
building_raw_files = os.listdir(building_raw_path)
building_raw_files = [x for x in building_raw_files if not x.startswith('.')]

In [20]:
outpath = '/workspace/Competition/landmap/01_DATA/test/mask_buildings'

In [22]:
for imfile in tqdm(building_raw_files):
    #files
    fid = imfile.split('.')[0]
    metafile = fid+'_META.json'
    polyfile = fid+'_FGT.json'
    #file paths
    impath = os.path.join(building_raw_path,imfile)
    metapath = os.path.join(test_meta_path, metafile)
    polypath = os.path.join(test_poly_path, polyfile)
    # Load image and files
    im = tiff.imread(impath)
    poly_dict = json.load(open(polypath))
    meta_dict = json.load(open(metapath))
    # Load coordinates for buildings
    building_coordinates = []
    for feat in poly_dict['features']:
        if feat['properties']['ANN_CD']==10:
            coords = feat['geometry']['coordinates'][0]
            building_coordinates.append(coords)
    # Meta info
    meta_coord = meta_dict[0]['coordinates']
    origin = [float(meta_coord.split(', ')[0]), float(meta_coord.split(', ')[1])]
    org_x = origin[0]
    org_y = origin[1]
    # Aggregate all masks
    allmasks = []
    for sub_coord in building_coordinates:
        xs,ys = coord_to_points(sub_coord, origin, 0.51)
        points = np.array(list(zip(xs,ys)))
        maskim = np.zeros((512,512),dtype=np.int32)
        maskim = cv2.fillPoly(maskim, pts=[points],color=(255,255,255))
        allmasks.append(maskim)
    finmask = np.zeros((512,512),dtype=np.int32)
    for submask in allmasks:
        finmask = finmask + submask
    finpath = os.path.join(outpath, fid+'.png')
    cv2.imwrite(finpath, finmask)
    
    

100%|██████████| 1303/1303 [00:15<00:00, 81.73it/s] 


## Convert raw tiff files to png

In [46]:
raw_path = '/workspace/Competition/landmap/01_DATA/test/raw_buildings'
raw_tiffs = os.listdir(raw_path)
raw_tiffs = [x for x in raw_tiffs if not x.startswith('.')]

In [47]:
outpath = '/workspace/Competition/landmap/01_DATA/test/png_raw'

for raw_tiff in tqdm(raw_tiffs):
    imname = raw_tiff.split('.')[0]
    impath = os.path.join(raw_path, raw_tiff)
    im = tiff.imread(impath)
    svpath = os.path.join(outpath, imname+'.png')
    cv2.imwrite(svpath, cv2.cvtColor(im, cv2.COLOR_RGB2BGR))

100%|██████████| 1303/1303 [00:34<00:00, 38.12it/s]


CHECK to see that raw and mask have the same names

In [48]:
rawpath = '/workspace/Competition/landmap/01_DATA/test/png_raw'
maskpath = '/workspace/Competition/landmap/01_DATA/test/png_mask'

rawfiles = os.listdir(rawpath)
maskfiles = os.listdir(maskpath)
rawfiles = [x for x in rawfiles if not x.startswith('.')]
maskfiles = [x for x in maskfiles if not x.startswith('.')]

In [49]:
set(rawfiles) == set(maskfiles)

True

## Encode file names (and sample)

In [51]:
cnt = 0

mask_outpath = '/workspace/Competition/landmap/01_DATA/test/png_mask_encoded'
raw_outpath = '/workspace/Competition/landmap/01_DATA/test/png_raw_encoded'

oldnames = []
newnames = []

for rawimg in tqdm(rawfiles):
    newname = 'test_' + str(cnt) + '.png'
    rawsrc = os.path.join(rawpath, rawimg)
    masksrc = os.path.join(maskpath, rawimg)
    rawdst = os.path.join(raw_outpath, newname)
    maskdst = os.path.join(mask_outpath, newname)
    shutil.copy(rawsrc, rawdst)
    shutil.copy(masksrc, maskdst)
    cnt+=1
    oldnames.append(rawimg)
    newnames.append(newname)


100%|██████████| 1303/1303 [00:00<00:00, 1877.10it/s]


In [52]:
keydf = pd.DataFrame(list(zip(oldnames,newnames)),columns=['original','newid'])

In [53]:
keydf.to_csv('test_keydf.csv',index=False)

## Make img-mask dataframe

In [4]:
rawpath = '/workspace/Competition/landmap/01_DATA/train/png_raw_encoded'
rawfiles = os.listdir(rawpath)
rawfiles = [x for x in rawfiles if not x.startswith('.')]

In [7]:
traindf = pd.DataFrame(list(zip(rawfiles,rawfiles)),columns=['img','mask'])

In [8]:
traindf.head()

Unnamed: 0,img,mask
0,map_122.png,map_122.png
1,map_0.png,map_0.png
2,map_1.png,map_1.png
3,map_2.png,map_2.png
4,map_3.png,map_3.png


In [9]:
traindf.to_csv('traindf.csv',index=False)