In [1]:
import numpy as np
from src.data.bridge_site import get_dataloaders

In [2]:
data_order_large = ['population', 'osm_img', 'elevation', 'slope', 'roads', 'waterways', 'admin_bounds_qgis']
data_order_small = ['osm_img', 'slope', 'waterways', 'admin_bounds_qgis']

In [3]:
dataloaders = get_dataloaders(batch_size=64, tile_size=300, data_version="v1", data_order=data_order_large, 
                              transform=False, test_batch_size=64, use_augment=False, 
                              use_rnd_center_point=False, use_several_test_samples=False)

train sampler: 1630 samples in total (815 pos, 815 neg) ([Rwanda] 815 pos 815 neg [Uganda] 0 pos 0 neg)
val sampler: 484 samples in total (242 pos, 242 neg) ([Rwanda] 242 pos 242 neg [Uganda] 0 pos 0 neg)
test sampler: 1240 samples in total (620 pos, 620 neg) ([Rwanda] 370 pos 310 neg [Uganda] 250 pos 310 neg)
test sampler: 740 samples in total (370 pos, 370 neg) ([Rwanda] 370 pos 370 neg [Uganda] 0 pos 0 neg)
test sampler: 500 samples in total (250 pos, 250 neg) ([Rwanda] 0 pos 0 neg [Uganda] 250 pos 250 neg)


In [8]:
for data_order, data_order_name in [[data_order_small, "small"], [data_order_large, "large"]]:
    for version in ["v1", "v2"]:
        for tile_size in [300, 600, 1200]:
            numpy_save = {}
            dataloaders = get_dataloaders(
                batch_size=64, tile_size=tile_size, data_version=version, data_order=data_order, 
                transform=False, test_batch_size=64, use_augment=False,
                use_rnd_center_point=False, use_several_test_samples=False)
            for i, name in [[0, "train"], [1, "val"], [2, "test_rwanda"], [3, "test_uganda"]]:
                X = []
                Y = []
                dataset = dataloaders[i].dataset
                indices = list(dataloaders[i].sampler)
                for j in indices:
                    if dataset.train_gdf.iloc[j].pos_neg not in ["pos", "neg"]:
                        continue
                    x, y = dataset[j]
                    num_channels = x.shape[0]
                    X.append(x.view(1, num_channels, -1).numpy())
                    Y.append(y)
                X = np.concatenate(X, 0)
                Y = np.array(Y)
                numpy_save["{}_X".format(name)] = X
                numpy_save["{}_Y".format(name)] = Y
            np.savez(
                "data_{}_{}_{}.npz".format(data_order_name, tile_size, version),
                **numpy_save)

train sampler: 1630 samples in total (815 pos, 815 neg) ([Rwanda] 815 pos 815 neg [Uganda] 0 pos 0 neg)
val sampler: 484 samples in total (242 pos, 242 neg) ([Rwanda] 242 pos 242 neg [Uganda] 0 pos 0 neg)
test sampler: 1240 samples in total (620 pos, 620 neg) ([Rwanda] 370 pos 310 neg [Uganda] 250 pos 310 neg)
test sampler: 740 samples in total (370 pos, 370 neg) ([Rwanda] 370 pos 370 neg [Uganda] 0 pos 0 neg)
test sampler: 500 samples in total (250 pos, 250 neg) ([Rwanda] 0 pos 0 neg [Uganda] 250 pos 250 neg)
train sampler: 1630 samples in total (815 pos, 815 neg) ([Rwanda] 815 pos 815 neg [Uganda] 0 pos 0 neg)
val sampler: 484 samples in total (242 pos, 242 neg) ([Rwanda] 242 pos 242 neg [Uganda] 0 pos 0 neg)
test sampler: 1240 samples in total (620 pos, 620 neg) ([Rwanda] 370 pos 310 neg [Uganda] 250 pos 310 neg)
test sampler: 740 samples in total (370 pos, 370 neg) ([Rwanda] 370 pos 370 neg [Uganda] 0 pos 0 neg)
test sampler: 500 samples in total (250 pos, 250 neg) ([Rwanda] 0 pos 

In [10]:
import numpy

data = np.load("data_large_1200_v1.npz")
print(data["train_X"].shape, data["train_Y"].shape)
print(data["val_X"].shape, data["val_Y"].shape)
print(data["test_rwanda_X"].shape, data["test_rwanda_Y"].shape)
print(data["test_uganda_X"].shape, data["test_uganda_Y"].shape)

print()

data = np.load("data_small_1200_v1.npz")
print(data["train_X"].shape, data["train_Y"].shape)
print(data["val_X"].shape, data["val_Y"].shape)
print(data["test_rwanda_X"].shape, data["test_rwanda_Y"].shape)
print(data["test_uganda_X"].shape, data["test_uganda_Y"].shape)

print()

data = np.load("data_large_1200_v2.npz")
print(data["train_X"].shape, data["train_Y"].shape)
print(data["val_X"].shape, data["val_Y"].shape)
print(data["test_rwanda_X"].shape, data["test_rwanda_Y"].shape)
print(data["test_uganda_X"].shape, data["test_uganda_Y"].shape)

print()

data = np.load("data_small_1200_v2.npz")
print(data["train_X"].shape, data["train_Y"].shape)
print(data["val_X"].shape, data["val_Y"].shape)
print(data["test_rwanda_X"].shape, data["test_rwanda_Y"].shape)
print(data["test_uganda_X"].shape, data["test_uganda_Y"].shape)

(1630, 9, 2304) (1630,)
(484, 9, 2304) (484,)
(1240, 9, 2304) (1240,)
(740, 9, 2304) (740,)

(1630, 6, 2304) (1630,)
(484, 6, 2304) (484,)
(1240, 6, 2304) (1240,)
(740, 6, 2304) (740,)

(1964, 9, 2304) (1964,)
(522, 9, 2304) (522,)
(868, 9, 2304) (868,)
(740, 9, 2304) (740,)

(1964, 6, 2304) (1964,)
(522, 6, 2304) (522,)
(868, 6, 2304) (868,)
(740, 6, 2304) (740,)
