# Main ...

 
``config, load, preprocess, train, eval  for  Tick tick bloom``

**Yo DON'T rerun this unless you want to overwrite past models, always fork and do your stuff and 
DON'T forget to change the name**

**``Mission: NNs on landsat8-500x500m-v1``**


# Load imports and dependencies

In [2]:
import warnings
import sys
import os
import time
import joblib
import random
from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import OrdinalEncoder

import tensorflow as tf
from tensorflow.keras import layers, activations, losses, metrics, models, optimizers, callbacks
from category_encoders.target_encoder import TargetEncoder

warnings.filterwarnings('ignore')

In [3]:
# local utilities imports
from tick_tick_bloom_utils import comp_metric, den2sev_map

In [4]:
# wandb stuff for tracking
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
wandb_login = user_secrets.get_secret("wandb_bloom_tracker")

import wandb
wandb.login(key=wandb_login)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Config

In [5]:
# dot dictionary
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


# Config
config = {}
config = dotdict(config)
config['RANDOM_SEED'] = 18952


config['unique_id'] = int(time.time())
print(f'unique_id: {config.unique_id}')
config['name'] = f'conv2d64_d128-{config.unique_id}'

config['PROJECT_NAME'] = 'tick-tick-bloom'
# config['DATA_DIR'] = '../data/'
# config['MODEL_DIR'] = '../models/'
config['SAVE_MODEL'] = True


# # Img config
config['IMG_SIZE'] = (36, 36)
config['CHANNELS'] = 5

# training configuration
config['train'] =  dotdict({
                        'epochs': 1000,
                        'batch_size': 32,
                        'validation_split': 0.2,
                        'shuffle': True,
                        'verbose': 1,
                        'lr' : 1e-5
                        })

config['desc'] = """simple nns on landsat8-500x500m-v1 --> Not expecting much since data looks soo noisy...""" 

unique_id: 1673603823


In [6]:
# seed everything
def seed_everything(seed=config.RANDOM_SEED):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
#     os.environ['TF_CUDNN_DETERMINISTIC'] = '1' 
#     os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['PYTHONHASHSEED'] = str(seed)

# tf.keras.utils.set_random_seed(config.RANDOM_SEED)  # supposedly sets seed for python, numpy, tf

seed_everything()

# Utils

In [7]:
def keras_rmse_clf(y_true, y_pred):
    """
    valid competetion metric for clf type settings.
    Can be trusted!
    y_true and y_pred should be [0-4]
    """
    y_pred = tf.argmax(y_pred, axis=1)
    y_pred = tf.cast(y_pred, tf.float16)
    y_true = tf.cast(y_true, tf.float16)
    squared_difference = tf.square(y_true - y_pred)
    return tf.sqrt(tf.reduce_mean(squared_difference, axis=-1))

def keras_rmse_reg(y_true, y_pred):
    """
    valid competetion metric for reg type settings.
    Can be trusted!
    y_true and y_pred should be [0-4]
    """
    y_pred = tf.math.round(y_pred)
    y_pred = tf.cast(y_pred, tf.float16)
    y_true = tf.cast(y_true, tf.float16)
    squared_difference = tf.square(y_true - y_pred)
    return tf.sqrt(tf.reduce_mean(squared_difference, axis=-1))


def rmse_loss(y_true, y_pred):
    """loss func to use in reg type settings"""
    return tf.sqrt(losses.mean_squared_error(y_true, y_pred))

# Load data

In [8]:
INPUT_METADATA_DIR = '/kaggle/input/ticktickbloomdataset'

metadata = pd.read_csv(os.path.join(INPUT_METADATA_DIR, 'metadata.csv'))
sub_format = pd.read_csv(os.path.join(INPUT_METADATA_DIR, 'submission_format.csv'))
train_labels = pd.read_csv(os.path.join(INPUT_METADATA_DIR, 'train_labels.csv'))

In [9]:
part1 = '/kaggle/input/pull-landsat-sr-v1-part-1'
part2 = '/kaggle/input/pull-landsat-sr-v1-part-2'

part1_imgs = os.path.join(part1, "landsat8_sr_500m_v1")
part2_imgs = os.path.join(part2, "landsat8_sr_500m_v1")

p1imgs = [name.split('.')[0] for name in os.listdir(part1_imgs)]
p2imgs = [name.split('.')[0] for name in os.listdir(part2_imgs)]

img_uids = p1imgs + p2imgs
len(img_uids)                           # 1754 imgs missing!

21816

In [10]:
metadata_subset = metadata[metadata['uid'].isin(img_uids)]
data = metadata_subset[metadata_subset.split == 'train']
data = data.merge(train_labels, on='uid')

test_data = metadata[metadata.split == 'test']

data.shape, test_data.shape

((15724, 8), (6510, 5))

In [11]:
missing_data = metadata[~metadata.uid.isin(metadata_subset.uid)]
missing_data.split.value_counts()  # 181 test samples are missing.

train    1336
test      418
Name: split, dtype: int64

In [12]:
# load img from img paths
def get_img(uid: str) -> np.ndarray:
    """return data arr for given uid 
    Only give uids already present in the downloaded data"""
    try:
        arr = joblib.load(part1_imgs + f'/{uid}.npy')
    except Exception as e:
        arr = joblib.load(part2_imgs + f'/{uid}.npy')
    return arr

def normalize_band(img_band):
    # temp normailze to 0 and 1
    m = img_band.max()
    return img_band/m

def resize_band(norm_img_band):
    return cv2.resize(norm_img_band, config.IMG_SIZE)


# seperate img_arr from data and resize all
def get_img_arr(arr: np.ndarray, start: int = 0, end: int = 5) -> np.ndarray:
    img_arr = arr[start:end]                       # just few bands for now!!
    return img_arr

def norm_resize_bands(arr_bands: np.ndarray):
    finished_bands = []
    for band in arr_bands:
        nb = normalize_band(band)
        rb = resize_band(nb)
        finished_bands.append(rb)
    return np.array(finished_bands)

#  do that for all samples in metadata_subset (and test_data)
def get_all_imgs(uid_list: list) -> np.ndarray:
    data_list = []
    # add tqdm..
    for uid in tqdm(uid_list): 
        arr = get_img(uid)
        img_arr = get_img_arr(arr)
        normalized_img_arr = norm_resize_bands(img_arr)
        data_list.append(normalized_img_arr)
    return np.array(data_list)


#  make into tf or np datasets

In [13]:
def get_np_data(split : float = 0.2, task='train'):
    """Return np data for training and validation."""
    if task == 'train':
        print("Loading train and validation data...")
        x_train_uids, x_val_uids, y_train, y_val = train_test_split(
            data['uid'],
            data.severity,
            test_size=split,
            random_state=config.RANDOM_SEED,
            stratify=data.severity
        )

        x_train = get_all_imgs(x_train_uids)
        x_val = get_all_imgs(x_val_uids)

        return x_train, y_train, x_val, y_val

    if task == 'test':
        test_ids = test_data.uids
        x_test
        return x_test

In [14]:
%%time
x_train_, y_train_, x_val_, y_val_ = get_np_data()
print(y_train_.value_counts(normalize=True))
print(y_val_.value_counts(normalize=True))
print('Done')

Loading train and validation data...
1    0.434454
4    0.213769
2    0.189363
3    0.158916
5    0.003498
Name: severity, dtype: float64
1    0.434340
4    0.213990
2    0.189189
3    0.158983
5    0.003498
Name: severity, dtype: float64
Done
CPU times: user 10.6 s, sys: 3.01 s, total: 13.6 s
Wall time: 2min 23s


# Preprocess

In [15]:
# # change labels to 0-3(model works this way) instead of 1-4 given range(given severity)
# -1 for to make labels look like sparse encoded labels

y_train = y_train_-1
y_val = y_val_-1


X_train = x_train_.transpose([0, 2, 3, 1])
X_val = x_val_.transpose([0, 2, 3, 1])

X_train.shape, X_val.shape, y_train.shape, y_val.shape

((12579, 36, 36, 5), (3145, 36, 36, 5), (12579,), (3145,))

# Model

In [16]:
def get_model(mdtype='clf'):
    print(f'Loading {mdtype} type model...')
    input_shape = (*config.IMG_SIZE, config.CHANNELS)

    if mdtype == 'clf':
        loss = losses.SparseCategoricalCrossentropy()
        comp_metric = keras_rmse_clf
        last_layer = layers.Dense(5, activation='softmax')

    if mdtype == 'reg':
        loss = rmse_loss
        comp_metric = keras_rmse_reg
        last_layer = layers.Dense(1)
    
    
    input_imgs = layers.Input(shape=input_shape)
    x = layers.Conv2D(64, (3, 3), activation='relu')(input_imgs)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dense(128, activation='relu')(x)
    output = last_layer(x)

    model = models.Model(inputs=input_imgs, outputs=output, name=config.name)
    

    model.compile(optimizer=optimizers.Adam(learning_rate=config.train.lr),
                    loss = loss,
                    metrics=[
                        comp_metric,
                        metrics.SparseCategoricalAccuracy(name='acc')
                    ])

    return model

In [17]:
model = get_model('clf')
model.summary()

Loading clf type model...


2023-01-13 09:59:28.138682: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 09:59:28.139698: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 09:59:28.376837: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 09:59:28.377870: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 09:59:28.378761: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from S

Model: "conv2d64_d128-1673603823"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 36, 36, 5)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 34, 34, 64)        2944      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 17, 17, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 18496)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               4735232   
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense (Dense)                (None, 5)    

2023-01-13 09:59:28.617868: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 09:59:28.618840: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 09:59:28.619627: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 09:59:28.620395: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 09:59:28.621186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from S

In [18]:
y_train.value_counts()

0    5465
3    2689
1    2382
2    1999
4      44
Name: severity, dtype: int64

In [19]:
# model.fit(X_train, y_train, epochs=5)

# Train and eval

In [20]:
def train_(model, config=config, x_train=X_train, y_train=y_train, debug=None):
    """fits given model to x_train and y_train"""
    
    train_config = config['train']
    my_callbacks = []
    
    earlystopping = callbacks.EarlyStopping(patience=15, monitor='val_loss', restore_best_weights=True)
    my_callbacks.append(earlystopping)
    reduce_lr_on_plateau = callbacks.ReduceLROnPlateau(
                                            monitor="val_acc",
                                            factor=0.5,
                                            patience=5,
                                            verbose=1,
                                            mode="auto",
                                            min_delta=0.01,
                                            cooldown=0,
                                            min_lr=0)
    my_callbacks.append(reduce_lr_on_plateau)
    
    try:
        wandb_callback = wandb.keras.WandbCallback(
            monitor='val_loss',
            log_weights=True,
            log_gradients=True,
            save_model=False,
            training_data=(x_train, y_train),
            log_batch_frequency=None,
        )

        my_callbacks.append(wandb_callback)
    except:
        print('wandb not tracking')
        
    print(f'Training model... {config.name}')
    if debug == True:
        epochs = 1000
    else:
        epochs = train_config.epochs
    history = model.fit(
                x_train, y_train,
                epochs=epochs,
                batch_size=train_config.batch_size, 
                callbacks=my_callbacks, 
                validation_split=0.2, 
                shuffle=True, 
                verbose=1 
            )

    return model, history


def eval_(model, x_val=X_val, y_val=y_val):
    print('Evaluating model....')
    model.evaluate(x_val, y_val, return_dict=True)


In [21]:
def train_eval(model=None, mdtype='clf', X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, debug=False):
    
    if model is None:
        print('Getting New model')
        model = get_model()
    
    # train
    model, history = train_(model, config, X_train, y_train=y_train, debug=debug)  # try to overfit thsi batch
    # eval
    eval_(model)

    # classification report
    y_pred = model.predict(X_val)
    if mdtype == 'clf':
        y_pred_hard = np.argmax(y_pred, axis=1)             
    if mdtype == 'reg':
        y_pred_hard = np.round(y_pred)
        
    print(y_pred_hard)
    error = mse(y_val, y_pred_hard, squared=False)
    print("Comp Metric: ", error)
    cr = classification_report(y_val, y_pred_hard)     # +1 to account for 0-4 as it should be 1-5 originallly
    print(cr)
    
    return model, history


In [22]:
config.train.epochs = 100
config.train.lr = 1e-5
config.train

{'epochs': 100,
 'batch_size': 32,
 'validation_split': 0.2,
 'shuffle': True,
 'verbose': 1,
 'lr': 1e-05}

In [23]:
model.fit(X_train, y_train)

2023-01-13 09:59:34.410669: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2023-01-13 09:59:37.071560: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005




<keras.callbacks.History at 0x7f997c2cef90>

In [None]:
with wandb.init(project=config.PROJECT_NAME, config=config, name=config.name):
    model = get_model('clf')
    model, history = train_eval(model, 'clf', X_train=X_train, X_val=X_val, y_train=y_train, y_val=y_val)

[34m[1mwandb[0m: Currently logged in as: [33mk_loki[0m. Use [1m`wandb login --relogin`[0m to force relogin


Loading clf type model...
Training model... conv2d64_d128-1673603823


2023-01-13 10:00:08.551620: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 10:00:08.552523: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 10:00:08.553480: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
2023-01-13 10:00:08.553669: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2023-01-13 10:00:08.554428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-01-13 10:00:08.555148: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] su

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100

Epoch 00006: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-06.
Epoch 7/100
Epoch 8/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100

Epoch 00013: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-06.
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100

Epoch 00018: ReduceLROnPlateau reducing learning rate to 1.249999968422344e-06.
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 28/100

Epoch 00028: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-07.
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100

Epoch 00033: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-07.
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100

Epoch 00038: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-07.
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
preds = np.argmax(model.predict(X_val), axis=1)
mse(y_val, preds.ravel(), squared=False)

In [None]:
pd.Series(preds).value_counts(normalize=True)

In [None]:
y_val.value_counts(normalize=True)

In [None]:
history.history

In [None]:
# model = models.load_model('/kaggle/working/d128_rmse_lndsat8_raw_v1-1673283452.h5', custom_objects={'comp_loss': comp_loss})
# preds = model.predict(X_val)
# int_preds = np.round(preds)
# mse(y_val, int_preds, squared=False)

# Save something..

In [None]:
# save model
if config.SAVE_MODEL:
    model.save(config.name + '.h5')
    print("Model saved as ",config.name + '.h5')

# Make submission

In [None]:
model.summary()

In [None]:
X_test.shape

In [None]:
# test_preds = np.round(model.predict(X_test)).ravel()
test_preds = np.argmax(model.predict(X_test), axis=1)
test_preds = test_preds + 1
sub_format.severity = test_preds
sub_format.severity = sub_format.severity.astype(int) 
sub_format.severity.value_counts()

In [None]:
save_file_to = f'{config.name}_preds.csv'
print(f'saving file to {save_file_to}')
sub_format.to_csv(save_file_to, index=False) # expect @ 0.979 0.98

# So...

- NNs with log_loss not at all improving mostly coz of loss -func! --> I thought but
- NNs with log loss is better compared to rmse-loss??
- 0.9898369849328295 prev best of 0.97777 is with leaked metadata and a failuree!!

# ToDos:

- GET a bigger network to overfit and train it to max level maybe and see how far it can go??
- **Try to beat expanding avg_severity_by_region with the help of imgs, Other wise no use for img data**