In [1]:
from torch.utils.data import Dataset
from PIL import Image
import os
import pandas as pd

In [5]:
# Metadata file provided is modified to split the train set as valid and test sets are provided without labels.
# Check if 'split_file.csv' exists, which would signal this has already been done

if os.path.isfile('split_file.csv') is False:
    with open('data/archive/metadata.csv') as f:
        metadata = pd.read_csv(f)
    metadata = metadata[metadata['split'] == 'train']

    # Shuffle the rows
    metadata = metadata.sample(frac=1).reset_index(drop=True)

print(len(metadata))

803


In [2]:
class LandDataset(Dataset):
    def __init__(self, root, splitfile, transform=None):
        self.transform = transform

        with open(os.path.join(root, 'metadata.csv')) as f:
            metadata = pd.read_csv(f)
        metadata = metadata[metadata['split'] == 'train']

        self.images = []
        self.masks = []
        for _, row in metadata.iterrows():
            img_path = os.path.join(root, row['sat_image_path'])
            mask_path = os.path.join(root, row['mask_path'])
            if os.path.isfile(img_path) and os.path.isfile(mask_path):
                self.images.append(img_path)
                self.masks.append(mask_path)
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_name = self.images[idx]
        mask_name = self.masks[idx]

        image = Image.open(img_name)
        mask = Image.open(mask_name)

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
    
        return image, mask


In [3]:
import torchvision.transforms as T

transform = T.Compose([
    T.Resize((512, 512)),
    T.ToTensor()
])

dataset = LandDataset('data/archive', transform=transform)

split_file_path = 'data/archive/data_split.csv'
if os.path.isfile(split_file_path):
    with open(split_file_path) as f:
        split_file = pd.read_csv(f)
    

In [1]:
import torchvision.transforms as T
import torch

class ToFloatTensor(object):
    """
    Converts numpy arrays to float Variables in Pytorch.
    """
    def __call__(self, tile):
        tile = torch.from_numpy(tile).float()
        return tile

def cdl_to_binary(cdl):
    return (((cdl <= 60) | (cdl >= 196)) | ((cdl >= 66) & (cdl <= 77)))

transf = T.Compose([
    ToFloatTensor()
])

In [2]:
import numpy as np
tile = np.load('data/weakly_supervised_cropland/images/patch0.npy')
tile = np.nan_to_num(tile)
tile = np.moveaxis(tile, -1, 0)
print(tile.shape)

mask = np.load('data/weakly_supervised_cropland/single_pixel_masks/mask_patch0.npy')
mask = np.expand_dims(mask, axis=0)
tile = np.concatenate([tile, mask], axis=0)
print(tile.shape)
print(mask.shape)

tile = transf(tile)
print(tile.shape)


(18, 50, 50)
(19, 50, 50)
(1, 50, 50)
torch.Size([19, 50, 50])


In [3]:
features = tile[:7,:,:]
print(features.shape)

label = tile[-2,:,:] * 10000
label = cdl_to_binary(label)
label = label.float()

mask = tile[-1,:,:]
mask = mask.byte()

torch.Size([7, 50, 50])


In [4]:
print(features.shape)

torch.Size([7, 50, 50])


In [1]:
import numpy as np
tile = np.load('data/land_cover_representation/tiles/0anchor.npy')

In [3]:
tile.shape

(100, 100, 5)

In [11]:
tile[:, :, 4]

array([[61, 61, 61, ..., 61, 61, 61],
       [61, 61, 61, ..., 61, 61, 61],
       [61, 61, 61, ..., 61, 61, 61],
       ...,
       [69, 69, 69, ..., 69, 69, 69],
       [69, 69, 69, ..., 69, 69, 69],
       [69, 69, 69, ..., 69, 69, 69]], dtype=uint8)