In [46]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

import rasterio

import warnings
warnings.filterwarnings("ignore")

In [69]:
# Define torch dataset Class
class Dataset(Dataset):
    def __init__(self,folder_path,dataset_file,sen2_amount=1,sen2_tile="all"):
        
        # define filepaths
        self.folder_path = folder_path
        # read file
        self.df = pd.read_pickle(dataset_file)
        # set amount of sen2 pictures that should be returned
        self.sen2_amount = sen2_amount
        
        # filter for sen2 tile
        if sen2_tile!="all":
            self.df = self.df[self.df["sen2_tile"]==sen2_tile]
            
        # clear up DF
        self.df = self.df[self.df["sen2_no"]>2]
        try:
            self.df = self.df.drop(labels=["level_0"], axis=1)
        except KeyError:
            pass
        self.df = self.df.reset_index()
        
    def __len__(self):
        """
        Returns length of data
        """
        return(len(self.df))
    
 
    def __getitem__(self,idx):
        
        current = self.df.iloc[idx]
        spot6_file = current["spot6_filenames"]
        sen2_files = current["sen2_filenames"]
        other_valid_acq = current["other_valid_acq"]        
        
        """ORDER SEN2 DATES"""
        _ = [x in ]
        sorted_sen2 = []
        for i in sorted(other_valid_acq):
            if other_valid_acq[i][1] in sen2_files:
                sorted_sen2.append(other_valid_acq[i])
            
        
        """READ SPOT6"""
        spot6 = rasterio.open(self.folder_path+"y/"+spot6_file).read()

    
        """READ SEN2 SERIES"""
        # read first file
        sen2 = rasterio.open(self.folder_path+"x/"+XXX).read()
        
        if self.sen2_amount>1:
            # read following sen2 and stack
            count=1
            for key in sen2_files_keys[1:]:
                # read file as array
                sen2_following = rasterio.open(self.folder_path+"x/"+XXX).read()
                # stack to previous images
                sen2 = np.concatenate([sen2, sen2_following])

                # break if all wanted files loaded
                count=count+1
                if count==self.sen2_amount:
                    break
            # if final count not yet reached, repeat last chip until enough are there
            while count<self.sen2_amount:
                sen2 = np.concatenate([sen2, sen2_following])
                count=count+1
        
        # transform to tensor
        sen2  = torch.from_numpy(sen2)
        spot6 = torch.from_numpy(spot6)
        sen2 = sen2.float()
        spot6 = spot6.float()
        
        #print(len(sen2_files),sen2.size())
        
        # define transformer
        transform_spot = transforms.Compose([transforms.Normalize(mean=[479.0, 537.0, 344.0], std=[430.0, 290.0, 229.0]) ])
        # dynamically define transform to reflect shape of tensor
        trans_mean,trans_std = [78.0, 91.0, 62.0]*self.sen2_amount,[36.0, 28.0, 30.0]*self.sen2_amount
        transform_sen = transforms.Compose([transforms.Normalize(mean=trans_mean, std= trans_std)])
        # perform transform
        sen2  = transform_sen(sen2)
        spot6 = transform_spot(spot6)
        
        # return result
        return(spot6,sen2)


In [70]:
dataset = Dataset("data_f4/","data_f4_pkls/df_saved_images.pkl",sen2_amount=4,sen2_tile="T30UXU")
loader  = DataLoader(dataset,batch_size=64, shuffle=True, num_workers=0,pin_memory=True,
                    drop_last=True,prefetch_factor=2)

In [71]:
next(iter(loader))

['SENTINEL2A_20180802-105938-888_L2A_T30UXU_C_V2-2_FRE_RGB_2154_360750.0_6831750.0_79days.tif', 'SENTINEL2A_20180805-111514-408_L2A_T30UXU_C_V2-2_FRE_RGB_2154_360750.0_6831750.0_82days.tif', 'SENTINEL2A_20180623-110520-630_L2A_T30UXU_C_V2-2_FRE_RGB_2154_360750.0_6831750.0_39days.tif', 'SENTINEL2A_20180626-111413-074_L2A_T30UXU_C_V2-2_FRE_RGB_2154_360750.0_6831750.0_42days.tif', 'SENTINEL2B_20180708-110215-631_L2A_T30UXU_C_V2-2_FRE_RGB_2154_360750.0_6831750.0_54days.tif', 'SENTINEL2A_20180716-111023-182_L2A_T30UXU_D_V1-8_FRE_RGB_2154_360750.0_6831750.0_62days.tif']
SENTINEL2B_20180519-110334-001_L2A_T30UXU_C_V2-2_FRE_RGB_2154.tif
SENTINEL2B_20180505-112304-300_L2A_T30UWU_C_V2-2_FRE_RGB_2154.tif
SENTINEL2A_20180504-110230-455_L2A_T30UXU_C_V2-2_FRE_RGB_2154.tif
SENTINEL2A_20180420-112611-293_L2A_T30UWU_C_V2-2_FRE_RGB_2154.tif
SENTINEL2B_20180419-110127-730_L2A_T30UXU_C_V2-2_FRE_RGB_2154.tif
SENTINEL2A_20180623-110520-630_L2A_T30UXU_C_V2-2_FRE_RGB_2154.tif
SENTINEL2A_20180626-111413-074_L2

NameError: name 'XXX' is not defined

In [59]:
dataset.df

Unnamed: 0,level_0,index,x,y,geometry,index_right,name,min,max,other_acq,geom,type,other_valid_acq,spot6_validity,sen2_no,sen2_filenames,sen2_tile,spot6_filenames
0,57210,71611,351000.0,6750750.0,POINT (351000.000 6750750.000),2023,ORT_2018_0351_6753_LA93_8Bits.jp2,0,96,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...","POLYGON ((351000.000 6753000.000, 351000.000 6...",train,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...",True,8,[SENTINEL2A_20180802-105938-888_L2A_T30TXT_C_V...,T30UXU,ORT_2018_0351_6753_LA93_8Bits_351000.0_6750750...
1,57211,71612,351000.0,6751500.0,POINT (351000.000 6751500.000),2023,ORT_2018_0351_6753_LA93_8Bits.jp2,0,96,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...","POLYGON ((351000.000 6753000.000, 351000.000 6...",train,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...",True,8,[SENTINEL2A_20180802-105938-888_L2A_T30TXT_C_V...,T30UXU,ORT_2018_0351_6753_LA93_8Bits_351000.0_6751500...
2,57212,71613,351000.0,6752250.0,POINT (351000.000 6752250.000),2023,ORT_2018_0351_6753_LA93_8Bits.jp2,0,96,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...","POLYGON ((351000.000 6753000.000, 351000.000 6...",train,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...",True,8,[SENTINEL2A_20180802-105938-888_L2A_T30TXT_C_V...,T30UXU,ORT_2018_0351_6753_LA93_8Bits_351000.0_6752250...
3,57213,71614,351000.0,6753000.0,POINT (351000.000 6753000.000),2023,ORT_2018_0351_6753_LA93_8Bits.jp2,0,96,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...","POLYGON ((351000.000 6753000.000, 351000.000 6...",train,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...",True,8,[SENTINEL2A_20180802-105938-888_L2A_T30TXT_C_V...,T30UXU,ORT_2018_0351_6753_LA93_8Bits_351000.0_6753000...
4,57215,71616,351750.0,6750750.0,POINT (351750.000 6750750.000),2023,ORT_2018_0351_6753_LA93_8Bits.jp2,0,96,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...","POLYGON ((351000.000 6753000.000, 351000.000 6...",train,"{40: [2018-08-02 00:00:00, 'SENTINEL2A_2018080...",True,7,[SENTINEL2A_20180802-105938-888_L2A_T30TXT_C_V...,T30UXU,ORT_2018_0351_6753_LA93_8Bits_351750.0_6750750...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9185,68116,82642,401250.0,6828000.0,POINT (401250.000 6828000.000),3334,ORT_2018_0399_6831_LA93_8Bits.jp2,1,159,"{159: [2018-09-26 00:00:00, 'SENTINEL2B_201809...","POLYGON ((399000.000 6831000.000, 399000.000 6...",train,"{159: [2018-09-26 00:00:00, 'SENTINEL2B_201809...",True,9,[SENTINEL2B_20180926-110028-459_L2A_T30UXU_C_V...,T30UXU,ORT_2018_0399_6831_LA93_8Bits_401250.0_6828000...
9186,68117,82643,401250.0,6828750.0,POINT (401250.000 6828750.000),3334,ORT_2018_0399_6831_LA93_8Bits.jp2,1,159,"{159: [2018-09-26 00:00:00, 'SENTINEL2B_201809...","POLYGON ((399000.000 6831000.000, 399000.000 6...",train,"{159: [2018-09-26 00:00:00, 'SENTINEL2B_201809...",True,7,[SENTINEL2B_20180926-110028-459_L2A_T30UXU_C_V...,T30UXU,ORT_2018_0399_6831_LA93_8Bits_401250.0_6828750...
9187,68118,82644,401250.0,6829500.0,POINT (401250.000 6829500.000),3334,ORT_2018_0399_6831_LA93_8Bits.jp2,1,159,"{159: [2018-09-26 00:00:00, 'SENTINEL2B_201809...","POLYGON ((399000.000 6831000.000, 399000.000 6...",train,"{159: [2018-09-26 00:00:00, 'SENTINEL2B_201809...",True,7,[SENTINEL2B_20180926-110028-459_L2A_T30UXU_C_V...,T30UXU,ORT_2018_0399_6831_LA93_8Bits_401250.0_6829500...
9188,68119,82645,401250.0,6830250.0,POINT (401250.000 6830250.000),3334,ORT_2018_0399_6831_LA93_8Bits.jp2,1,159,"{159: [2018-09-26 00:00:00, 'SENTINEL2B_201809...","POLYGON ((399000.000 6831000.000, 399000.000 6...",train,"{159: [2018-09-26 00:00:00, 'SENTINEL2B_201809...",True,9,[SENTINEL2B_20180926-110028-459_L2A_T30UXU_C_V...,T30UXU,ORT_2018_0399_6831_LA93_8Bits_401250.0_6830250...


In [79]:
len("SENTINEL2A_20180802-105938-888_L2A_T30UXU_C_V2-2_FRE_RGB_2154")

61