In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

from torch.utils.data import Dataset
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

import rasterio

import warnings
warnings.filterwarnings("ignore")

In [20]:
# Define torch dataset Class
class Dataset(Dataset):
    def __init__(self,folder_path,dataset_file,sen2_amount=1):
        
        # define filepaths
        self.folder_path = folder_path
        # read file
        self.df = pd.read_pickle(dataset_file)
        # set amount of sen2 pictures that should be returned
        self.sen2_amount = sen2_amount
        
        # clear up DF
        self.df = self.df[self.df["sen2_no"]!=0]
        self.df = self.df.reset_index()
        
        # define transformer
        self.transform_sen = transforms.Compose([
            transforms.Normalize(mean=[479.0, 537.0, 344.0], std=[430.0, 290.0, 229.0]) ])
        self.transform_spot = transforms.Compose([
            transforms.Normalize(mean=[78.0, 91.0, 62.0], std=[36.0, 28.0, 30.0]) ])
        
 
    def __len__(self):
        """
        Returns length of data
        """
        return(len(self.df))
    
 
    def __getitem__(self,idx):
        
        current = self.df.iloc[idx]
        spot6_file = current["spot6_filenames"]
        sen2_files = current["sen2_filenames"]
        
        """READ SPOT6"""
        #with rasterio.open(self.folder_path+"y/"+spot6_file) as dataset:
        spot6 = rasterio.open(self.folder_path+"y/"+spot6_file).read()
        #spot6 = raster
    
    
        """READ SEN2 SERIES"""
        # read first file
        sen2 = rasterio.open(self.folder_path+"x/"+sen2_files[0]).read()
        
        if self.sen2_amount>1:
            # read following sen2 and stack
            count=1
            for sen2_file in sen2_files[1:]:
                # read file as array
                sen2_following = rasterio.open(self.folder_path+"x/"+sen2_file).read()
                # stack to previous images
                sen2 = np.concatenate([sen2, sen2_following])

                # break if all wanted files loaded
                count=count+1
                if count==self.sen2_amount:
                    break
        
        return(spot6,sen2)
            
        
    
        """
        # Perform Transform and change types
        im_sen2  = torch.from_numpy(im_sen2)
        im_spot6 = torch.from_numpy(im_spot6)
        
        im_sen2 = im_sen2.float()
        im_spot6 = im_spot6.float()
        
        im_sen2  = self.transform_sen(im_sen2)
        im_spot6 = self.transform_spot(im_spot6)
        """
        #return(im_sen2,im_spot6)

    

In [21]:
dataset = Dataset("data_f4/","df_saved_images.pkl",1)
loader = DataLoader(dataset,batch_size=1, shuffle=True, num_workers=1)

In [26]:
%time
for i in loader:
    a,b = i
print("done!")

CPU times: user 6 µs, sys: 1e+03 ns, total: 7 µs
Wall time: 16 µs
done!


In [None]:
dataset.df.to_csv("export.csv")

In [23]:
dataset.df.describe()

Unnamed: 0,level_0,index,x,y,index_right,min,max,sen2_no
count,66499.0,66499.0,66499.0,66499.0,66499.0,66499.0,66499.0,66499.0
mean,33674.706672,85684.865457,271049.053369,6801503.0,1668.955608,2.139175,111.1691,3.392653
std,19710.62209,46037.806905,68479.331806,33918.45,958.077492,3.962137,22.864399,0.908871
min,0.0,1028.0,126000.0,6709000.0,0.0,0.0,82.0,1.0
25%,16624.5,45416.5,217500.0,6776500.0,880.5,0.0,95.0,3.0
50%,33304.0,85823.0,267500.0,6800500.0,1659.0,1.0,97.0,4.0
75%,50625.5,126414.5,327000.0,6828000.0,2485.0,3.0,135.0,4.0
max,68199.0,162527.0,401000.0,6879000.0,3351.0,25.0,159.0,4.0
