# TensorFlow modeling experiments
---

Notebook for initial experiments on modeling deforestation through TensorFlow and the [Planet: Understanding the Amazon from Space](https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/) dataset.

## Setup

In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow_addons.metrics import FBetaScore
from tensorflow.keras import losses, optimizers, metrics
from tqdm.keras import TqdmCallback
from ipywidgets import interact
from functools import partial

In [None]:
sys.path.append('../data/')
sys.path.append('../modeling/')
import data_utils
from models import ResNet

In [None]:
model_type = 'resnet50'
task = 'orig_labels'
pretrain_dataset = 'bigearthnet'

In [None]:
@interact
def choose_model_and_task(chosen_model_type=['resnet50', 'pretrained_resnet50'], chosen_task=['orig_labels', 'deforestation']):
    global model_type
    global task
    global pretrain_dataset
    model_type, task = chosen_model_type, chosen_task
    if chosen_model_type == 'pretrained_resnet50':
        pretrain_dataset = 'bigearthnet'
    else:
        pretrain_dataset = None

## Load the data

### Create a dataset

In [None]:
labels_df = pd.read_csv(data_utils.DATA_PATH + data_utils.LABELS_PATH)
labels_df = data_utils.encode_tags(labels_df, drop_tags_col=True)
if task == 'deforestation':
    labels_df = data_utils.add_deforestation_label(labels_df)
    labels_df = labels_df[['image_name', 'deforestation']]
labels_df

In [None]:
next(data_utils.get_amazon_sample(labels_df))[1]

In [None]:
# Specify the dataframe so that the generator has no required arguments
def data_gen():
    for i in data_utils.get_amazon_sample(labels_df):
        yield i

In [None]:
if task == 'deforestation':
    labels_shape = len(data_utils.TAGS)
else:
    labels_shape = None
dataset = tf.data.Dataset.from_generator(
    data_gen, 
    output_signature=(
        tf.TensorSpec(shape=([256, 256, 3]), dtype=tf.float16),
        tf.TensorSpec(shape=(labels_shape), dtype=tf.uint8)
    )
)

In [None]:
next(iter(dataset))

### Split into train, validation and test sets

In [None]:
n_samples = len(labels_df)
n_samples

In [None]:
train_set, test_set = dataset.take(int(0.9 * n_samples)), dataset.skip(int(0.9 * n_samples))
train_set, val_set = train_set.skip(int(0.1 * n_samples)), train_set.take(int(0.1 * n_samples))

In [None]:
train_set = train_set.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
val_set = val_set.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
test_set = test_set.batch(32).prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
single_batch = train_set.take(1)

In [None]:
for i in test_set.take(1):
    break

In [None]:
i

In [None]:
for i in single_batch:
    print(i)

## Train models

### Set the modeling configuration

In [None]:
model = ResNet(
    pretrain_dataset=pretrain_dataset,
    pooling='max',
    task=task
)

In [None]:
lr = 0.003
opt = optimizers.Adam(learning_rate=lr)
if task == 'orig_labels':
    loss = losses.CategoricalCrossentropy(from_logits=True)
else:
    loss = losses.BinaryCrossentropy(from_logits=True)
metrics = [
    metrics.Accuracy(), 
    FBetaScore(num_classes=model.n_outputs, average='macro', beta=2.0)
]

### Test a model

Overfit a model on a batch of a classification task, so as to confirm that it works.

In [None]:
model.compile(optimizer=opt, loss=loss, metrics=metrics)

In [None]:
model.fit(single_batch, epochs=100, verbose=0, callbacks=[TqdmCallback()])