In [2]:
import pandas as pd

train = pd.read_json('data/train.json')
test = pd.read_json('data/test.json')

train

Unnamed: 0,filename,label
0003d2eb,{'S1': {'September': 'data/train_features/0003...,data/train_agbm/0003d2eb_agbm.tif
000aa810,{'S1': {'September': 'data/train_features/000a...,data/train_agbm/000aa810_agbm.tif
000d7e33,{'S1': {'September': 'data/train_features/000d...,data/train_agbm/000d7e33_agbm.tif
00184691,{'S1': {'September': 'data/train_features/0018...,data/train_agbm/00184691_agbm.tif
001b0634,{'S1': {'September': 'data/train_features/001b...,data/train_agbm/001b0634_agbm.tif
...,...,...
ffc563f4,{'S1': {'September': 'data/train_features/ffc5...,data/train_agbm/ffc563f4_agbm.tif
ffc730b9,{'S1': {'September': 'data/train_features/ffc7...,data/train_agbm/ffc730b9_agbm.tif
ffc7d4f2,{'S1': {'September': 'data/train_features/ffc7...,data/train_agbm/ffc7d4f2_agbm.tif
ffd8db7a,{'S1': {'September': 'data/train_features/ffd8...,data/train_agbm/ffd8db7a_agbm.tif


In [4]:
from skimage import io
import numpy as np
from tqdm import tqdm
import multiprocessing
from concurrent.futures import ProcessPoolExecutor

sensors = ['S1', 'S2']
months = ['September', 'October', 'November', 'December', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August']
df = test
dest_path = 'data/test_features_npy'
images = df.filename.values

def generate_np(filename):
    for sensor in sensors:
        ts = []
        for month in months:
            path = filename[sensor][month]
            if path is not None:
                img = io.imread(path)
                if sensor == 'S1':
                    img = img[..., (0, 1)]
                    img = np.clip(img, -30, 0)*(-8.4) / 255.
                if sensor == 'S2':
                    red = img[..., 2].astype(np.float32)
                    nir = img[..., 6].astype(np.float32)
                    swir = img[..., 8].astype(np.float32)
                    clouds = img[..., 10] 
                    img = img[...,(2,1,0)]
                    img = np.clip(img / 4000, 0, 1)
                    ndvi = (nir - red) / (nir + red + 1e-8)
                    ndvi = (ndvi + 1.) / 2.
                    ndwi = (nir - swir) / (nir + swir + 1e-8)
                    ndwi = (ndwi + 1.) / 2.
                    clouds[clouds == 255] = 100
                    clouds = clouds / 100
                    img = np.concatenate([img.astype(np.float32), ndvi[..., None], ndwi[..., None], clouds[..., None].astype(np.float32)], axis=-1)
            else:
                img = np.zeros((256, 256, 6 if sensor == 'S2' else 2), dtype=np.float32)
            ts.append(img.astype(np.float32))
        ts = np.stack(ts, axis=0)
        chip_id = path.split('/')[-1].split('_')[0]
        np.save(f'{dest_path}/{chip_id}_{sensor}.npy', ts.astype(np.float32))

num_cores = multiprocessing.cpu_count()
with ProcessPoolExecutor(max_workers=num_cores) as pool:
    with tqdm(total=len(images)) as progress:
        futures = []
        for img in images:
            future = pool.submit(generate_np, img)
            future.add_done_callback(lambda p: progress.update())
            futures.append(future)

        # guardamos los resultados
        results = []
        for future in futures:
            result = future.result()
            results.append(result)


100%|██████████| 2773/2773 [03:25<00:00, 13.51it/s]


In [51]:
def generate_np(path):
	img = io.imread(path)
	img = img / 12905.3
	chip_id = path.split('/')[-1].split('_')[0]
	np.save(f'data/train_agbm_npy/{chip_id}.npy', img.astype(np.float32))

num_cores = multiprocessing.cpu_count()
with ProcessPoolExecutor(max_workers=num_cores) as pool:
    with tqdm(total=len(train.label.values)) as progress:
        futures = []
        for img in train.label.values:
            future = pool.submit(generate_np, img)
            future.add_done_callback(lambda p: progress.update())
            futures.append(future)

        # guardamos los resultados
        results = []
        for future in futures:
            result = future.result()
            results.append(result)

100%|██████████| 8689/8689 [00:02<00:00, 4070.22it/s]


In [45]:
train_chip_ids = train.index.values
test_chip_ids = test.index.values

train_npy = pd.DataFrame({'chip_id': train_chip_ids})
test_npy = pd.DataFrame({'chip_id': test_chip_ids})

train_npy.to_csv('data/train_chip_ids.csv', index=False)
test_npy.to_csv('data/test_chip_ids.csv', index=False)

In [46]:
train_npy = pd.read_csv('data/train_chip_ids.csv')
test_npy = pd.read_csv('data/test_chip_ids.csv')

train_npy

Unnamed: 0,chip_id
0,0003d2eb
1,000aa810
2,000d7e33
3,00184691
4,001b0634
...,...
8684,ffc563f4
8685,ffc730b9
8686,ffc7d4f2
8687,ffd8db7a


In [47]:
test_npy

Unnamed: 0,chip_id
0,00a28320
1,01047f47
2,0128a38e
3,0169bbe3
4,0189741b
...,...
2768,ff9dfc68
2769,ffda6cc9
2770,ffde1ed8
2771,ffdfa26a
