In [None]:
from google.colab import drive
drive.mount('/content/drive')

import numpy as np
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
# from IPython.display import clear_output
import os
import random
from tqdm.auto import tqdm
from timeit import default_timer as timer

import tensorflow as tf
import keras
import keras.backend as K
import keras.layers as KL
# import keras.engine as KE
import keras.models as KM
from tensorflow.keras.initializers import GlorotNormal

In [None]:
!pip install wandb
import wandb
!wandb login

In [None]:
import sys
sys.path.append('/content/drive/My Drive/kaggle/contrail_detector/')
import utils
import models
from configs import binary_classifier_config, Path

In [None]:
train_metadata = pd.read_json("/content/drive/MyDrive/kaggle/contrail_detector/data/train_metadata.json", dtype={'record_id': 'str'})
valid_metadata = pd.read_json("/content/drive/MyDrive/kaggle/contrail_detector/data/valid_metadata.json", dtype={'record_id': 'str'})

In [None]:
# Define your data augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=90,  # Degree range for random rotations
    width_shift_range=0.0,  # Fraction of total width for random horizontal shifts
    height_shift_range=0.0,  # Fraction of total height for random vertical shifts
    shear_range=0.0,  # Shear intensity
    zoom_range=0.0,  # Random zoom range
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill mode for pixels outside the boundaries
)

input_shape = (256,256,3)

def load_data_train(record_id, path):
    # make path objects
    record_id = record_id.numpy().decode('utf-8')
    path = path.numpy().decode('utf-8')
    image_path = path + "image/" + str(record_id) + ".npy"
    # load the images for the corresponding paths
    image = np.load(image_path)[..., Config.n_times_before, :].astype(np.float32) / 255.0
    image = tf.image.resize(image, input_shape[:2])
    label = train_metadata.loc[train_metadata.record_id == record_id, 'contrail_exists'].item()
    return image, label

def load_data_valid(record_id, path):
    # make path objects
    record_id = record_id.numpy().decode('utf-8')
    path = path.numpy().decode('utf-8')
    image_path = path + "image/" + str(record_id) + ".npy"
    # load the images for the corresponding paths
    image = np.load(image_path)[..., Config.n_times_before, :].astype(np.float32) / 255.0
    image = tf.image.resize(image, input_shape[:2])
    label = valid_metadata.loc[valid_metadata.record_id == record_id, 'contrail_exists'].item()
    return image, label

def augment_data(image, label):
    image = tf.py_function(func=lambda img: datagen.random_transform(img.numpy()), inp=[image], Tout=tf.float32)
    return image, label

id_to_data_train = lambda record_id, path: tf.py_function(func=load_data_train, inp=[record_id, path], Tout=(tf.float32, tf.float32))
id_to_data_valid = lambda record_id, path : tf.py_function(func = load_data_valid, inp = [record_id, path], Tout = (tf.float32, tf.float32))
augment_data_map = lambda image, label : tf.py_function(func = augment_data, inp=[image,label], Tout = (tf.float32,tf.float32))

train_id = train_metadata.record_id.to_list()
valid_id = valid_metadata.record_id.to_list()

train_path = len(train_id)*[Path.train]
valid_path = len(valid_id)*[Path.valid]

train_id_dataset = tf.data.Dataset.from_tensor_slices(train_id)
train_path_dataset = tf.data.Dataset.from_tensor_slices(train_path)

valid_id_dataset = tf.data.Dataset.from_tensor_slices(valid_id)
valid_path_dataset = tf.data.Dataset.from_tensor_slices(valid_path)

train_dataset = tf.data.Dataset.zip((train_id_dataset, train_path_dataset)).map(id_to_data_train).map(augment_data_map).shuffle(Config.buffer_size).batch(Config.batch_size).prefetch(tf.data.experimental.AUTOTUNE)
valid_dataset = tf.data.Dataset.zip((valid_id_dataset, valid_path_dataset)).map(id_to_data_valid).shuffle(Config.buffer_size).batch(Config.batch_size).prefetch(tf.data.experimental.AUTOTUNE)


In [None]:
# logging and checkpoints

@tf.function
def train_step(inputs, labels, model, loss_fn, optimizer):
  with tf.GradientTape() as tape:
    predictions = model(inputs, training = True)
    loss_value = loss_fn(predictions, labels)
  gradients = tape.gradient(loss_value, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  return loss_value

def valid_step(inputs, labels, model, loss_fn, metric_fn):
  predictions = model(inputs, training = False)
  loss_value = loss_fn(predictions, labels)
  metric_value = metric_fn(predictions, labels)
  return loss_value, metric_value

def train_model(train_dataset, valid_dataset, model, loss_fn, metric_fn, optimizer, epochs, checkpoint_directory, log_directory, max_to_keep=3):

  # printout which device the model is on
  print(model.layers[0].weights[0].device)

  train_losses = []
  valid_losses = []
  valid_metrics = []

  # Create a checkpoint object and checkpoint manager
  checkpoint = tf.train.Checkpoint(step=tf.Variable(0),
                                   optimizer=optimizer,
                                   model=model)
  manager = tf.train.CheckpointManager(checkpoint,
                                       checkpoint_directory,
                                       max_to_keep)

  # restore the last checkpoint if there is one
  checkpoint.restore(manager.latest_checkpoint)
  if manager.latest_checkpoint:
    print("Restored from {}".format(manager.latest_checkpoint))
  else:
    print("Initializing from scratch.")

  train_summary_writer = tf.summary.create_file_writer(log_directory)
  valid_summary_writer = tf.summary.create_file_writer(log_directory)

  # Initialize W&B run
  wandb.init(project='your_project_name', name='your_experiment_name', resume = '0')


  # training loop starts
  for epoch in range(epochs):
    checkpoint.step.assign_add(1)
    print(f"\n Epoch {checkpoint.step.numpy()} : \n")

    # initialize losses and metrics
    train_total_loss = 0
    valid_total_loss = 0
    valid_total_metric = 0
    train_n_batches = 0
    valid_n_batches = 0

    # train loop
    for step, in enumerate(tqdm(train_dataset, desc='Training', position=0)):
      loss_value = train_step(X_batch, y_batch, model, loss_fn, optimizer)

      train_total_loss += loss_value
      train_n_batches += 1

    # valid loop
    for step, (X_batch, y_batch) in enumerate(tqdm(valid_dataset, desc='Validation', position=0)):
      loss_value, metric_value = valid_step(X_batch, y_batch, model, loss_fn, metric_fn)
      valid_total_loss += loss_value
      valid_total_metric += metric_value
      valid_n_batches += 1

    train_epoch_loss = train_total_loss / train_n_batches
    valid_epoch_loss = valid_total_loss / valid_n_batches
    valid_epoch_metric = valid_total_metric / valid_n_batches

    train_losses.append(train_epoch_loss)
    valid_losses.append(valid_epoch_loss)
    valid_metrics.append(valid_epoch_metric)

    # Log training metrics for the epoch
    with train_summary_writer.as_default():
        tf.summary.scalar('train_loss', train_epoch_loss, step=checkpoint.step.numpy())

    # Log validation metrics for the epoch
    with valid_summary_writer.as_default():
        tf.summary.scalar('valid_loss', valid_epoch_loss, step=checkpoint.step.numpy())
        tf.summary.scalar('valid_metric', valid_epoch_metric, step=checkpoint.step.numpy())

    # Log metrics to W&B
    wandb.log({
              'train_loss': train_epoch_loss.numpy(),
              'valid_loss': valid_epoch_loss.numpy(),
              'valid_metric': valid_epoch_metric.numpy(),
              'epoch': checkpoint.step.numpy()
          })
    #save checkpoint
    save_path = manager.save()

    # Save checkpoint to W&B as an artifact
    artifact = wandb.Artifact(f'epoch-{checkpoint.step.numpy()}-checkpoint', type='model')
    # Add the index file
    artifact.add_file(save_path + ".index")

    # Add the data file
    artifact.add_file(save_path + ".data-00000-of-00001")

    wandb.log_artifact(artifact)
    end_time = timer()

    # print out results
    print("\n")
    print("  (Results)")
    print(f"  train_loss: {train_epoch_loss:.4f}")
    print(f"  valid_loss: {valid_epoch_loss:.4f}")
    print(f"  valid_metric: {valid_epoch_metric:.4f}")
    print("\n")
    print("  (Time)")
    print("  start_time : ", start_time)
    print("  end_time : ", end_time)
    print("-----------------------------------------------------------------")

  # Finish the W&B run
  wandb.finish()

  return train_losses, valid_losses, valid_metrics

In [None]:
model = models.binary_classifier()
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(binary_classifier_config.initial_learning_rate,
                                                        decay_steps=4000,
                                                        alpha=0.0001)
optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule)
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)
metric_fn = tf.keras.metrics.BinaryAccuracy()

In [None]:
checkpoint_directory = "/content/drive/MyDrive/kaggle/contrail_detector/checkpoint/binary_classifier"
log_directory = "/content/drive/MyDrive/kaggle/contrail_detector/log/binary_classifier"
train_losses, valid_losses, valid_metrics = train_model(train_dataset, valid_dataset, model,
                                                        loss_fn, metric_fn, optimizer,
                                                        epochs=Config.n_epochs,
                                                        checkpoint_directory = checkpoint_directory,
                                                        log_directory = log_directory)