## Import troch

In [1]:
import torch
from torch import nn

torch.__version__

'2.0.0+cpu'

### Device selection

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

### Path for dataset

In [3]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import rasterio
import shutil
import random

#before continueing make sure you have the EuroSAT_MS dataset downloaded
# the dataset paths might be diiferent here but e.g. instead of data/EuroSAT/EuroSAT_MS we have considered Eurosat/EuroSAt
# for the outfut consider data/<outfut_dir_name> as the standard output instead of the way it is mentioned here




  Walks through dir_path returning its contents.
  Args:
    dir_path (str or pathlib.Path): target directory
  
  Returns:
    A print out of:
      number of subdiretories in dir_path
      number of images (files) in each subdirectory
      name of each subdirectory

In [4]:
def walk_through_dir(dir_path):
  
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [5]:
# here the Dir should be the folder which containe the image files sorted in class folders
walk_through_dir('./EuroSAT/EuroSAT_MS')

# OUTPUT EXAMPLE for the mentioned dir, normally it would be   data/EuroSAT_MS
"""
There are 10 directories and 0 images in './EuroSAT/EuroSAT_MS'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\AnnualCrop'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\Forest'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\HerbaceousVegetation'.
There are 0 directories and 2500 images in './EuroSAT/EuroSAT_MS\Highway'.
There are 0 directories and 2500 images in './EuroSAT/EuroSAT_MS\Industrial'.
There are 0 directories and 2000 images in './EuroSAT/EuroSAT_MS\Pasture'.
There are 0 directories and 2500 images in './EuroSAT/EuroSAT_MS\PermanentCrop'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\Residential'.
There are 0 directories and 2500 images in './EuroSAT/EuroSAT_MS\River'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\SeaLake'.
"""

There are 10 directories and 0 images in './EuroSAT/EuroSAT_MS'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\AnnualCrop'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\Forest'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\HerbaceousVegetation'.
There are 0 directories and 2500 images in './EuroSAT/EuroSAT_MS\Highway'.
There are 0 directories and 2500 images in './EuroSAT/EuroSAT_MS\Industrial'.
There are 0 directories and 2000 images in './EuroSAT/EuroSAT_MS\Pasture'.
There are 0 directories and 2500 images in './EuroSAT/EuroSAT_MS\PermanentCrop'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\Residential'.
There are 0 directories and 2500 images in './EuroSAT/EuroSAT_MS\River'.
There are 0 directories and 3000 images in './EuroSAT/EuroSAT_MS\SeaLake'.


In [6]:

# will divide the dataset into test and train dataset
# e.g for 80/20 split set split ratio to 0.8

def create_train_test_split(dataset_dir, output_dir, split_ratio=0.8):

    # Define train and test directory paths
    train_dir = os.path.join(output_dir, 'train')
    test_dir = os.path.join(output_dir, 'test')

    # Create output directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # List all class subdirectories (i.e., categories)
    class_dirs = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]

    for class_dir in class_dirs:
        class_path = os.path.join(dataset_dir, class_dir)
        images = os.listdir(class_path)

        # Shuffle the images
        random.shuffle(images)

        # Split images into train and test sets
        split_idx = int(len(images) * split_ratio)
        train_images = images[:split_idx]
        test_images = images[split_idx:]

        # Create class directories in train and test directories
        train_class_dir = os.path.join(train_dir, class_dir)
        test_class_dir = os.path.join(test_dir, class_dir)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)

        # Move or copy images to respective directories
        for img_name in train_images:
            img_src = os.path.join(class_path, img_name)
            img_dest = os.path.join(train_class_dir, img_name)
            shutil.copy(img_src, img_dest)  # Use shutil.move if you want to move instead of copy

        for img_name in test_images:
            img_src = os.path.join(class_path, img_name)
            img_dest = os.path.join(test_class_dir, img_name)
            shutil.copy(img_src, img_dest)

    print(f"Dataset split into train and test sets. Train set size: {split_idx}, Test set size: {len(images) - split_idx}")


# Example usage
dataset2_dir = "EuroSAT/EuroSAT_MS"                   
output_dir = "Euro_MS"                               # it should be the dir where you want the output, eg data/<output_dir_name>
create_train_test_split(dataset2_dir, output_dir, split_ratio=0.8)



Dataset split into train and test sets. Train set size: 2400, Test set size: 600


In [7]:
walk_through_dir('./Euro_MS')

There are 2 directories and 0 images in './Euro_MS'.
There are 10 directories and 0 images in './Euro_MS\test'.
There are 0 directories and 1070 images in './Euro_MS\test\AnnualCrop'.
There are 0 directories and 1069 images in './Euro_MS\test\Forest'.
There are 0 directories and 1070 images in './Euro_MS\test\HerbaceousVegetation'.
There are 0 directories and 892 images in './Euro_MS\test\Highway'.
There are 0 directories and 894 images in './Euro_MS\test\Industrial'.
There are 0 directories and 732 images in './Euro_MS\test\Pasture'.
There are 0 directories and 903 images in './Euro_MS\test\PermanentCrop'.
There are 0 directories and 1093 images in './Euro_MS\test\Residential'.
There are 0 directories and 911 images in './Euro_MS\test\River'.
There are 0 directories and 1093 images in './Euro_MS\test\SeaLake'.
There are 10 directories and 0 images in './Euro_MS\train'.
There are 0 directories and 2870 images in './Euro_MS\train\AnnualCrop'.
There are 0 directories and 2869 images in '


will divide the dataset into test and train dataset with the final dataset being reduced by the mentioned size reduction
 e.g for 80/20 split of 10% the size of database set split ratio to 0.8 and dataset_size_reduction to 0.1

In [8]:

# will divide the dataset into test and train dataset with the final dataset being reduced by the mentioned size reduction
# e.g for 80/20 split of 10% the size of database set split ratio to 0.8 and dataset_size_reduction to 0.1

def create_train_test_split_2(dataset_dir, output_dir, split_ratio=0.8,dataset_size_reduction = 1):

    # Define train and test directory paths
    train_dir = os.path.join(output_dir, 'train')
    test_dir = os.path.join(output_dir, 'test')

    # Create output directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # List all class subdirectories (i.e., categories)
    class_dirs = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]

    for class_dir in class_dirs:
        class_path = os.path.join(dataset_dir, class_dir)
        images = os.listdir(class_path)

        # Shuffle the images
        random.shuffle(images)

        # Split images into train and test sets
        idx = int(len(images)*dataset_size_reduction)
        split_idx = int(idx * split_ratio)
        train_images = images[:split_idx]
        test_images = images[split_idx:idx]

        # Create class directories in train and test directories
        train_class_dir = os.path.join(train_dir, class_dir)
        test_class_dir = os.path.join(test_dir, class_dir)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)

        # Move or copy images to respective directories
        for img_name in train_images:
            img_src = os.path.join(class_path, img_name)
            img_dest = os.path.join(train_class_dir, img_name)
            shutil.copy(img_src, img_dest)  # Use shutil.move if you want to move instead of copy

        for img_name in test_images:
            img_src = os.path.join(class_path, img_name)
            img_dest = os.path.join(test_class_dir, img_name)
            shutil.copy(img_src, img_dest)

    print(f"Dataset split into train and test sets. Train set size: {split_idx}, Test set size: {idx - split_idx}")


In [9]:
dataset2_dir = "EuroSAT/EuroSAT_MS"
output_dir = "Euro_MS_Reduced"                              #same here as mentioned above
create_train_test_split_2(dataset2_dir, output_dir, split_ratio=0.8,dataset_size_reduction=0.1)

Dataset split into train and test sets. Train set size: 240, Test set size: 60


In [10]:
walk_through_dir('./Euro_MS_Reduced')

There are 2 directories and 0 images in './Euro_MS_Reduced'.
There are 10 directories and 0 images in './Euro_MS_Reduced\test'.
There are 0 directories and 120 images in './Euro_MS_Reduced\test\AnnualCrop'.
There are 0 directories and 115 images in './Euro_MS_Reduced\test\Forest'.
There are 0 directories and 119 images in './Euro_MS_Reduced\test\HerbaceousVegetation'.
There are 0 directories and 100 images in './Euro_MS_Reduced\test\Highway'.
There are 0 directories and 98 images in './Euro_MS_Reduced\test\Industrial'.
There are 0 directories and 80 images in './Euro_MS_Reduced\test\Pasture'.
There are 0 directories and 98 images in './Euro_MS_Reduced\test\PermanentCrop'.
There are 0 directories and 119 images in './Euro_MS_Reduced\test\Residential'.
There are 0 directories and 100 images in './Euro_MS_Reduced\test\River'.
There are 0 directories and 120 images in './Euro_MS_Reduced\test\SeaLake'.
There are 10 directories and 0 images in './Euro_MS_Reduced\train'.
There are 0 directori

In [11]:
dataset2_dir = "EuroSAT/EuroSAT_MS"
output_dir = "Euro_MS_Test_red"
create_train_test_split_2(dataset2_dir, output_dir, split_ratio=0.8,dataset_size_reduction=0.01)

Dataset split into train and test sets. Train set size: 24, Test set size: 6


In [12]:
walk_through_dir('./Euro_MS_Test_red')

There are 2 directories and 0 images in './Euro_MS_Test_red'.
There are 10 directories and 0 images in './Euro_MS_Test_red\test'.
There are 0 directories and 12 images in './Euro_MS_Test_red\test\AnnualCrop'.
There are 0 directories and 12 images in './Euro_MS_Test_red\test\Forest'.
There are 0 directories and 12 images in './Euro_MS_Test_red\test\HerbaceousVegetation'.
There are 0 directories and 10 images in './Euro_MS_Test_red\test\Highway'.
There are 0 directories and 10 images in './Euro_MS_Test_red\test\Industrial'.
There are 0 directories and 8 images in './Euro_MS_Test_red\test\Pasture'.
There are 0 directories and 10 images in './Euro_MS_Test_red\test\PermanentCrop'.
There are 0 directories and 12 images in './Euro_MS_Test_red\test\Residential'.
There are 0 directories and 10 images in './Euro_MS_Test_red\test\River'.
There are 0 directories and 12 images in './Euro_MS_Test_red\test\SeaLake'.
There are 10 directories and 0 images in './Euro_MS_Test_red\train'.
There are 0 dire