In [1]:
import os
import os.path as op
import json
from pathlib import Path
import shutil
import logging
import numpy as np
from tqdm import tqdm
from skimage import io
import keras
import math

import tensorflow as tf

from matplotlib import pyplot as plt
import random
from PIL import Image
from tensorflow.keras import layers, models, regularizers, callbacks
import warnings
warnings.filterwarnings('ignore')

from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import classification_report

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [2]:
# Mount drive to Colab notebook
from google.colab import drive
#drive.flush_and_unmount()
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Uploaded data.zip to my google drive. Now, unzip it and store in 552_project_data folder
# zip_file_path = '/content/drive/MyDrive/Colab Notebooks/data.zip'
# destination_folder = '/content/drive/MyDrive/552_project_data/'
# !unzip "$zip_file_path" -d "$destination_folder"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/drive/MyDrive/552_project_data/data/ESP_020415_1195_15360_20480_5120_10240/tiles/frost/ESP_020415_1195_16257_16556_8708_9007.png  
  inflating: /content/drive/MyDrive/552_project_data/data/ESP_020415_1195_15360_20480_5120_10240/tiles/frost/ESP_020415_1195_16556_16855_6914_7213.png  
  inflating: /content/drive/MyDrive/552_project_data/data/ESP_020415_1195_15360_20480_5120_10240/tiles/frost/ESP_020415_1195_16257_16556_6914_7213.png  
  inflating: /content/drive/MyDrive/552_project_data/data/ESP_020415_1195_15360_20480_5120_10240/tiles/frost/ESP_020415_1195_16556_16855_8708_9007.png  
  inflating: /content/drive/MyDrive/552_project_data/data/ESP_020415_1195_15360_20480_5120_10240/tiles/frost/ESP_020415_1195_18948_19247_8110_8409.png  
  inflating: /content/drive/MyDrive/552_project_data/data/ESP_020415_1195_15360_20480_5120_10240/tiles/frost/ESP_020415_1195_18350_18649_9605_9904.png  
  inflating: /con

In [3]:
# Logging configuration
logging.basicConfig(level=logging.INFO,
                    datefmt='%H:%M:%S',
                    format='%(asctime)s | %(levelname)-5s | %(module)-15s | %(message)s')

IMAGE_SIZE = (299, 299)  # All images contained in this dataset are 299x299 (originally, to match Inception v3 input size)
SEED = 17

# Head directory containing all image subframes. Update with the relative path of your data directory
data_head_dir = Path('/content/drive/MyDrive/552_project_data/data')

# Find all subframe directories
subdirs = [Path(subdir.stem) for subdir in data_head_dir.iterdir() if subdir.is_dir()]
src_image_ids = ['_'.join(a_path.name.split('_')[:3]) for a_path in subdirs]

In [4]:
# Load train/val/test subframe IDs
def load_text_ids(file_path):
    """Simple helper to load all lines from a text file"""
    with open(file_path, 'r') as f:
        lines = [line.strip() for line in f.readlines()]
    return lines

# Load the subframe names for the three data subsets
train_ids = load_text_ids('/content/drive/MyDrive/Colab Notebooks/train_source_images.txt')
validate_ids = load_text_ids('/content/drive/MyDrive/Colab Notebooks/val_source_images.txt')
test_ids = load_text_ids('/content/drive/MyDrive/Colab Notebooks/test_source_images.txt')

# Generate a list containing the dataset split for the matching subdirectory names
subdir_splits = []
for src_id in src_image_ids:
    if src_id in train_ids:
        subdir_splits.append('train')
    elif src_id in validate_ids:
        subdir_splits.append('validate')
    elif(src_id in test_ids):
        subdir_splits.append('test')
    else:
        logging.warning(f'{src_id}: Did not find designated split in train/validate/test list.')
        subdir_splits.append(None)

# Loading and pre processing the data
### Note that there are multiple ways to preprocess and load your data in order to train your model in tensorflow. We have provided one way to do it in the following cell. Feel free to use your own method and get better results.

In [5]:
import random
import tensorflow as tf
from PIL import Image

def load_and_preprocess(img_loc, label):

    def _inner_function(img_loc, label):

        # Convert tensor to native type
        img_loc_str = img_loc.numpy().decode('utf-8')
        label_str = label.numpy().decode('utf-8')

        img = Image.open(img_loc_str).convert('RGB')


        return img, 1 if label_str=='frost' else 0

    # Wrap the Python function
    X, y = tf.py_function(_inner_function, [img_loc, label], [tf.float32, tf.int64])

    return X, y


def load_subdir_data(dir_path, image_size, seed=None):

    """Helper to create a TF dataset from each image subdirectory"""

    # Grab only the classes that (1) we want to keep and (2) exist in this directory
    tile_dir = dir_path / Path('tiles')
    label_dir = dir_path /Path('labels')

    loc_list = []

    for folder in os.listdir(tile_dir):
        if os.path.isdir(os.path.join(tile_dir, folder)):
            for file in os.listdir(os.path.join(tile_dir, folder)):
                if file.endswith(".png"):
                    loc_list.append((os.path.join(os.path.join(tile_dir, folder), file), folder))

    return loc_list

# Loop over all subframes, loading each into a list
tf_data_train, tf_data_test, tf_data_val = [], [], []
tf_dataset_train, tf_dataset_test, tf_dataset_val = [], [], []

# Update the batch and buffer size as per your model requirements
buffer_size = 64
batch_size = 32

for subdir, split in zip(subdirs, subdir_splits):
    full_path = data_head_dir / subdir
    if split=='validate':
        tf_data_val.extend(load_subdir_data(full_path, IMAGE_SIZE, SEED))
    elif split=='train':
        tf_data_train.extend(load_subdir_data(full_path, IMAGE_SIZE, SEED))
    elif split=='test':
        tf_data_test.extend(load_subdir_data(full_path, IMAGE_SIZE, SEED))

random.shuffle(tf_data_train)
img_list, label_list = zip(*tf_data_train)
img_list_t = tf.convert_to_tensor(img_list)
lb_list_t = tf.convert_to_tensor(label_list)

tf_dataset_train = tf.data.Dataset.from_tensor_slices((img_list_t, lb_list_t))
tf_dataset_train = tf_dataset_train.map(load_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
tf_dataset_train = tf_dataset_train.shuffle(buffer_size=buffer_size).batch(batch_size)

random.shuffle(tf_data_val)
img_list, label_list = zip(*tf_data_val)
img_list_t = tf.convert_to_tensor(img_list)
lb_list_t = tf.convert_to_tensor(label_list)

tf_dataset_val = tf.data.Dataset.from_tensor_slices((img_list_t, lb_list_t))
tf_dataset_val = tf_dataset_val.map(load_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
tf_dataset_val = tf_dataset_val.shuffle(buffer_size=buffer_size).batch(batch_size)

random.shuffle(tf_data_test)
img_list, label_list = zip(*tf_data_test)
img_list_t = tf.convert_to_tensor(img_list)
lb_list_t = tf.convert_to_tensor(label_list)

tf_dataset_test = tf.data.Dataset.from_tensor_slices((img_list_t, lb_list_t))
tf_dataset_test = tf_dataset_test.map(load_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
tf_dataset_test = tf_dataset_test.shuffle(buffer_size=buffer_size).batch(batch_size)

In [6]:
print("\n\nTraining Set Length")
print(len(tf_dataset_train))



Training Set Length
442


In [7]:
print("\n\nTesting Set Length")
print(len(tf_dataset_test))



Testing Set Length
250


In [8]:
print("\n\nValidation Set Length")
print(len(tf_dataset_val))



Validation Set Length
247


## Training CNN + MLP
###### i. To perform empirical regularization, crop, randomly zoom, rotate, flip, con- trast, and translate images in your training set for image augmentation. You can use various tools to do this, including OpenCV.

In [16]:
!pip install --upgrade tensorflow-addons
!pip install --upgrade tensorflow


Collecting keras<2.16,>=2.15.0 (from tensorflow)
  Using cached keras-2.15.0-py3-none-any.whl (1.7 MB)
Installing collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 3.0.1
    Uninstalling keras-3.0.1:
      Successfully uninstalled keras-3.0.1
Successfully installed keras-2.15.0


In [9]:
# https://github.com/zonna19/DSCI552-HWs/blob/main/Final%20Project/Hong_Seungil_Final_Project.ipynb
import tensorflow_addons as tfa

def augment(image, label):
    # Randomly change brightness
    image = tf.image.random_brightness(image, max_delta = 0.1)
    # Randomly change contrast
    image = tf.image.random_contrast(image, lower = 0.8, upper = 1.2)
    # Randomly rotate the image
    angle = tf.random.uniform([], minval = -math.pi/4, maxval = math.pi/4)  # Random angle in [-45, 45] degrees
    image = tfa.image.rotate(image, angle)

    # We do not need to flip images since those NASA generated images can be very rarely fliped.
    # Means assuming there are no artificial ones.
    # And Mars cannot be flimed with fliped images

    return image, label

# Apply the augment function to each element in the dataset
tf_dataset_train = tf_dataset_train.map(augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)

c) ii) Train a three-layer CNN followed by a dense layer on the data. Choose the size of the kernels and depth of the layers and the number of neurons in the dense layer (MLP) on your own. Use ReLU’s in all of the layers. Use the softmax function, batch normalization3 and a dropout rate of 30%, L2 regularization, as well as ADAM optimizer. Use cross entropy loss. Train for at least 20 epochs and perform early stopping using the validation set. Keep the network parameters that have the lowest validation error. Plot the training and validation errors vs. epochs.

In [10]:
# Define the model
model = Sequential([
    # 1
    Conv2D(32, (3, 3), padding='same', input_shape=(299, 299, 3), kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    ReLU(),
    MaxPooling2D(pool_size=(2, 2)),

    # 2
    Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    ReLU(),
    MaxPooling2D(pool_size=(2, 2)),

    # 3
    Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    ReLU(),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.3),
    Dense(2, activation='softmax')  # for binary classification with softmax
])

In [11]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping and model checkpointing
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

history = model.fit(
    tf_dataset_train,
    epochs=20,
    validation_data=tf_dataset_val,
    callbacks=[early_stopping, model_checkpoint]
)

Epoch 1/20


ValueError: ignored

In [51]:
!pip install --upgrade tensorflow
!pip install --upgrade keras


Collecting tensorflow
  Downloading tensorflow-2.15.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.16,>=2.15 (from tensorflow)
  Downloading tensorboard-2.15.1-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m65.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<2.16,>=2.15.0 (from tensorflow)
  Downloading tensorflow_estimator-2.15.0-py2.py3-none-any.whl (441 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m442.0/442.0 kB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras<2.16,>=2.15.0 (from tensorflow)
  Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m55.8 MB/s[0m eta [36m0:00:00[0m
Installing collected pack

Collecting keras
  Downloading keras-3.0.1-py3-none-any.whl (999 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m999.1/999.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting namex (from keras)
  Downloading namex-0.0.7-py3-none-any.whl (5.8 kB)
Installing collected packages: namex, keras
  Attempting uninstall: keras
    Found existing installation: keras 2.15.0
    Uninstalling keras-2.15.0:
      Successfully uninstalled keras-2.15.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.15.0.post1 requires keras<2.16,>=2.15.0, but you have keras 3.0.1 which is incompatible.[0m[31m
[0mSuccessfully installed keras-3.0.1 namex-0.0.7
