In [5]:
import glob
import tqdm
import numpy as np
import pandas as pd
import torch
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torchvision
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

In [None]:
os.system("wget http://madm.dfki.de/files/sentinel/EuroSAT.zip")



In [None]:
import zipfile
 
zip_file = "EuroSAT.zip"

try:
    with zipfile.ZipFile(zip_file) as z:
        z.extractall()
        print("Extracted all")
except:
    print("Invalid file")

In [2]:
def load_eurosat_dataset():
    """
    
    Using the extracted folder for EuroSAT dataset, transforms data into training and test sets to be processed via dataloaders.
    
    Based on https://github.com/thegomeslab/dsces/blob/2e7f0a9e1b5761b78857d8ee709e6ec09421bef7/lectures/18b_Convolutional_Neural_Networks_EuroSAT.ipynb
    """
    data_folders = sorted(glob.glob("2750/*"))
    # preprocessing steps for image
    preprocess = transforms.Compose([transforms.ToTensor(),
                                   transforms.RandomHorizontalFlip(),
                                   transforms.RandomVerticalFlip(),
                                   # normalization used on training resnet-50 data - TODO confirm same for resnet 18
                                   transforms.Normalize(mean=[0.7137, 0.6628, 0.6519], \
                                                        std=[0.2970, 0.3017, 0.2979]),])
    X = []
    y = []
    label_dict = {}
    for idx, folder in enumerate(data_folders):
        label_dict[idx] = folder.replace('2750/','')
        imgs = sorted(glob.glob(folder + "/*.jpg"))
        for i in tqdm.tqdm(imgs[:250]):
            img = Image.open(i)
            img = preprocess(img)
            X.append(img)
            y.append(torch.tensor([idx]))
    X = torch.stack(X, dim=0).float()
    y = torch.stack(y, dim=0).flatten().long()
    return X, y, label_dict

X, y, label_dict = load_eurosat_dataset()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

100%|██████████| 250/250 [00:00<00:00, 270.42it/s]
100%|██████████| 250/250 [00:00<00:00, 309.39it/s]
100%|██████████| 250/250 [00:00<00:00, 294.77it/s]
100%|██████████| 250/250 [00:00<00:00, 301.31it/s]
100%|██████████| 250/250 [00:00<00:00, 288.55it/s]
100%|██████████| 250/250 [00:00<00:00, 313.88it/s]
100%|██████████| 250/250 [00:00<00:00, 296.68it/s]
100%|██████████| 250/250 [00:00<00:00, 268.29it/s]
100%|██████████| 250/250 [00:00<00:00, 294.68it/s]
100%|██████████| 250/250 [00:00<00:00, 314.56it/s]


In [6]:
model = torchvision.models.resnet18(pretrained=True)


In [33]:
class EuroSATDataset(Dataset):
    def __init__(self,X,y,label_dict):
    
        self.X = X
        self.y = y
        self.label_dict = label_dict
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self,idx):
        
        x = self.X[idx]
        y = self.y[idx]
        category = self.label_dict[y.item()]
        return x, y

In [34]:
train_dataset = EuroSATDataset(X_train,y_train,label_dict)