In [198]:
#Import the necessary packages
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.models import resnet50
from torchvision import transforms
from tqdm import tqdm
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import shutil
import torch
import time
import os

In [199]:
#Specify ImageNet mean and standard deviation and image size
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
IMAGE_SIZE = 224

# determine the device to be used for training and evaluation
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

DATA_PATH = "../EuroSAT_RGB"
BASE_PATH = "../dataset"

#/home/anjana/Courses/Aug_2022/ML_Project_2022/data/EuroSAT_RGB/AnnualCrop

# define validation split and paths to separate train and validation
# splits
VAL_SPLIT = 0.2
TEST_SPLIT = 0.1
TRAIN_SPLIT = 0.7
TRAIN = os.path.join(BASE_PATH, "train")
TEST = os.path.join(BASE_PATH, "test")
VAL = os.path.join(BASE_PATH, "val")

# specify training hyperparameters
BATCH_SIZE = 64
PRED_BATCH_SIZE = 4
EPOCHS = 1
LR = 0.001
#LR_FINETUNE = 0.0005

In [200]:
image_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")


def list_images(basePath, contains=None):
    # return the set of files that are valid
    return list_files(basePath, validExts=image_types, contains=contains)


def list_files(basePath, validExts=None, contains=None):
    # loop over the directory structure
    for (rootDir, dirNames, filenames) in os.walk(basePath):
        # loop over the filenames in the current directory
        for filename in filenames:
            # if the contains string is not none and the filename does not contain
            # the supplied string, then ignore the file
            if contains is not None and filename.find(contains) == -1:
                continue

            # determine the file extension of the current file
            ext = filename[filename.rfind("."):].lower()

            # check to see if the file is an image and should be processed
            if validExts is None or ext.endswith(validExts):
                # construct the path to the image and yield it
                imagePath = os.path.join(rootDir, filename)
                yield imagePath

In [203]:
def copy_images(imagePaths, folder):
    # check if the destination folder exists and if not create it
    if not os.path.exists(folder):
        os.makedirs(folder)

    # loop over the image paths
    for path in imagePaths:
        # grab image name and its label from the path and create
        # a placeholder corresponding to the separate label folder
        imageName = path.split(os.path.sep)[-1]
        label = path.split(os.path.sep)[-2]
        labelFolder = os.path.join(folder, label)

        # check to see if the label folder exists and if not create it
        if not os.path.exists(labelFolder):
            os.makedirs(labelFolder)

        # construct the destination image path and copy the current
        # image to it
        destination = os.path.join(labelFolder, imageName)
        shutil.copy(path, destination)

In [204]:
print("[INFO] loading image paths...")
imagePaths = list(list_images(DATA_PATH))
np.random.seed(999)
np.random.shuffle(imagePaths)

[INFO] loading image paths...


In [205]:
len(imagePaths)

27000

In [206]:
# generate training and validation paths
valPathsLen = int(len(imagePaths) * VAL_SPLIT)
testPathsLen = int(len(imagePaths) * TEST_SPLIT)
trainPathsLen = len(imagePaths) - (valPathsLen+testPathsLen)
trainValPathsLen = trainPathsLen+valPathsLen
trainPaths = imagePaths[:trainPathsLen]
valPaths = imagePaths[trainPathsLen:trainValPathsLen]
testPaths = imagePaths[trainValPathsLen:]

# copy the training and validation images to their respective
# directories
print("[INFO] copying training and validation images...")
copy_images(trainPaths, TRAIN)
copy_images(valPaths, VAL)
copy_images(testPaths, TEST)

[INFO] copying training and validation images...
