In [33]:
import os
import os.path as op
import json
from pathlib import Path
import shutil
import logging
import numpy as np
from tqdm import tqdm
from skimage import io

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [34]:
# Logging configuration
logging.basicConfig(level=logging.INFO,
                    datefmt='%H:%M:%S',
                    format='%(asctime)s | %(levelname)-5s | %(module)-15s | %(message)s')

IMAGE_SIZE = (299, 299)  # All images contained in this dataset are 299x299 (originally, to match Inception v3 input size)
SEED = 17

# Head directory containing all image subframes. Update with the relative path of your data directory
data_head_dir = Path('/content/drive/MyDrive/Colab Notebooks/data')

# Find all subframe directories
subdirs = [Path(subdir.stem) for subdir in data_head_dir.iterdir() if subdir.is_dir()]
src_image_ids = ['_'.join(a_path.name.split('_')[:3]) for a_path in subdirs]

In [45]:
# Load train/val/test subframe IDs
def load_text_ids(file_path):
    """Simple helper to load all lines from a text file"""
    with open(file_path, 'r') as f:
        lines = [line.strip() for line in f.readlines()]

    return lines

# Load the subframe names for the three data subsets
train_ids = load_text_ids('/content/drive/MyDrive/Colab Notebooks/train_source_images.txt')
validate_ids = load_text_ids('/content/drive/MyDrive/Colab Notebooks/val_source_images.txt')
test_ids = load_text_ids('/content/drive/MyDrive/Colab Notebooks/test_source_images.txt')

# Generate a list containing the dataset split for the matching subdirectory names
subdir_splits = []
for src_id in src_image_ids:
    if src_id in train_ids:
        subdir_splits.append('train')
    elif src_id in validate_ids:
        subdir_splits.append('validate')
    elif(src_id in test_ids):
        subdir_splits.append('test')
    else:
        logging.warning(f'{src_id}: Did not find designated split in train/validate/test list.')
        subdir_splits.append(None)

# Loading and pre processing the data
### Note that there are multiple ways to preprocess and load your data in order to train your model in tensorflow. We have provided one way to do it in the following cell. Feel free to use your own method and get better results.

In [56]:
import random
import tensorflow as tf
from PIL import Image
import os

def load_and_preprocess(img_loc, label):

    def _inner_function(img_loc, label):

        # Convert tensor to native type
        img_loc_str = img_loc.numpy().decode('utf-8')
        label_str = label.numpy().decode('utf-8')

        img = Image.open(img_loc_str).convert('RGB')


        return img, 1 if label_str=='frost' else 0

    # Wrap the Python function
    X, y = tf.py_function(_inner_function, [img_loc, label], [tf.float32, tf.int64])

    return X, y

def load_subdir_data(dir_path, image_size, seed=None):

    """Helper to create a TF dataset from each image subdirectory"""

    # Grab only the classes that (1) we want to keep and (2) exist in this directory
    tile_dir = dir_path / Path('tiles')
    #print(tile_dir)
    label_dir = dir_path /Path('labels')
    #print(label_dir)
    loc_list = []

    for folder in os.listdir(tile_dir):
        print("folder:", folder)
        print("is dir?",os.path.isdir(os.path.join(tile_dir, folder)))
        if os.path.isdir(os.path.join(tile_dir, folder)):
            for file in os.listdir(os.path.join(tile_dir, folder)):
                if file.endswith(".png"):
                    loc_list.append((os.path.join(os.path.join(tile_dir, folder), file), folder))

    return loc_list

# Loop over all subframes, loading each into a list
tf_data_train, tf_data_test, tf_data_val = [], [], []
tf_dataset_train, tf_dataset_test, tf_dataset_val = [], [], []

# Update the batch and buffer size as per your model requirements
buffer_size = 64
batch_size = 32

for subdir, split in zip(subdirs, subdir_splits):
    full_path = data_head_dir / subdir
    #print(full_path)
    if split=='validate':
        tf_data_val.extend(load_subdir_data(full_path, IMAGE_SIZE, SEED))
    elif split=='train':
        tf_data_train.extend(load_subdir_data(full_path, IMAGE_SIZE, SEED))
    elif split=='test':
        tf_data_test.extend(load_subdir_data(full_path, IMAGE_SIZE, SEED))

random.shuffle(tf_data_train)
img_list, label_list = zip(*tf_data_train)
img_list_t = tf.convert_to_tensor(img_list)
lb_list_t = tf.convert_to_tensor(label_list)

tf_dataset_train = tf.data.Dataset.from_tensor_slices((img_list_t, lb_list_t))
tf_dataset_train = tf_dataset_train.map(load_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
tf_dataset_train = tf_dataset_train.shuffle(buffer_size=buffer_size).batch(batch_size)

random.shuffle(tf_data_val)
img_list, label_list = zip(*tf_data_val)
img_list_t = tf.convert_to_tensor(img_list)
lb_list_t = tf.convert_to_tensor(label_list)

tf_dataset_val = tf.data.Dataset.from_tensor_slices((img_list_t, lb_list_t))
tf_dataset_val = tf_dataset_val.map(load_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
tf_dataset_val = tf_dataset_val.shuffle(buffer_size=buffer_size).batch(batch_size)

random.shuffle(tf_data_test)
img_list, label_list = zip(*tf_data_test)
img_list_t = tf.convert_to_tensor(img_list)
lb_list_t = tf.convert_to_tensor(label_list)

tf_dataset_test = tf.data.Dataset.from_tensor_slices((img_list_t, lb_list_t))
tf_dataset_test = tf_dataset_test.map(load_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
tf_dataset_test = tf_dataset_test.shuffle(buffer_size=buffer_size).batch(batch_size)

/content/drive/MyDrive/Colab Notebooks/data/ESP_038058_1205_10240_15360_5120_10240/tiles
/content/drive/MyDrive/Colab Notebooks/data/ESP_038058_1205_10240_15360_5120_10240/labels
folder: frost
is dir? True
/content/drive/MyDrive/Colab Notebooks/data/ESP_047596_1845_0_5120_10240_15360/tiles
/content/drive/MyDrive/Colab Notebooks/data/ESP_047596_1845_0_5120_10240_15360/labels
folder: background
is dir? True
/content/drive/MyDrive/Colab Notebooks/data/ESP_035667_2295_30720_35840_5120_10240/tiles
/content/drive/MyDrive/Colab Notebooks/data/ESP_035667_2295_30720_35840_5120_10240/labels
folder: background
is dir? True
/content/drive/MyDrive/Colab Notebooks/data/PSP_002033_1325_10240_15360_5120_10240/tiles
/content/drive/MyDrive/Colab Notebooks/data/PSP_002033_1325_10240_15360_5120_10240/labels
folder: frost
is dir? True
/content/drive/MyDrive/Colab Notebooks/data/ESP_026885_2295_25600_30720_5120_10240/tiles
/content/drive/MyDrive/Colab Notebooks/data/ESP_026885_2295_25600_30720_5120_10240/la

ValueError: ignored

In [62]:
dir_path = '../content/drive/MyDrive/Colab Notebooks/data/ESP_011605_1170_10240_15360_0_5120'
#load_subdir_data(path1, IMAGE_SIZE, SEED)


tile_dir = dir_path / Path('tiles')
print("tile_dir:", tile_dir)
label_dir = dir_path /Path('labels')
print("label dir:",label_dir)
loc_list = []

for folder in os.listdir(tile_dir):
    print("folder:", folder)
    print("is dir?",os.path.isdir(os.path.join(tile_dir, folder)))
    if os.path.isdir(os.path.join(tile_dir, folder)):
        for file in os.listdir(os.path.join(tile_dir, folder)):
            if file.endswith(".png"):
                loc_list.append((os.path.join(os.path.join(tile_dir, folder), file), folder))

print(loc_list)

tile_dir: ../content/drive/MyDrive/Colab Notebooks/data/ESP_011605_1170_10240_15360_0_5120/tiles
label dir: ../content/drive/MyDrive/Colab Notebooks/data/ESP_011605_1170_10240_15360_0_5120/labels
[]


In [63]:
print("tile_dir:", tile_dir)
os.listdir(tile_dir)


tile_dir: ../content/drive/MyDrive/Colab Notebooks/data/ESP_011605_1170_10240_15360_0_5120/tiles


[]