In [1]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

import plotly.graph_objects as go

import datetime
from pathlib import Path

import intake

from sklearn.model_selection import train_test_split

from keras.regularizers import l2
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv3D, Activation, BatchNormalization, \
                                     Dropout, Flatten, Dense

2024-04-15 12:52:14.683660: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
catalog = intake.open_catalog(Path('../catalog.yml'))
source = getattr(catalog, 'treesat')
df = source.read()

In [3]:
selected_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B']

In [4]:
class Conv3DModelCreator:
    def normalise(self, a, p=1):
        upper = np.percentile(a, 100-p)
        lower = np.percentile(a, p)
    
        bounded_a = np.where(a > upper, np.median(a), a)
        bounded_a = np.where(a < lower, np.median(a), bounded_a)
        
        scaled_a = (bounded_a - lower)/(upper - lower)
        return scaled_a
        
    def split_and_preprocess(self, y, X, random_state=42):
        """Split and max scale."""
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.1, random_state=random_state)
        
        X_train = self.normalise(X_train)

        X_test = self.normalise(X_test)
        
        return X_train, X_test, y_train, y_test

    def build_model_3d(self, num_classes, input_size):
        m = tf.keras.Sequential()
        m.add(Input(input_size))
        for i in range(3): 
            m.add(Conv3D(
                32, kernel_size=(3, 3, 3), 
                strides=1, padding='same',
                kernel_initializer='he_normal',
                kernel_regularizer=l2(1e-6)
            ))
            m.add(BatchNormalization(axis=-1))
            m.add(Activation('relu'))
            m.add(Dropout(0.25))
            
        m.add(Flatten())

        m.add(Dense(64, kernel_initializer='he_normal', 
                  kernel_regularizer=l2(1e-6)))
        m.add(BatchNormalization(axis=-1))
        m.add(Activation('relu'))
        m.add(Dropout(0.25))
        m.add(Dense(
            num_classes, activation='softmax', 
            kernel_initializer='glorot_uniform',
            kernel_regularizer=l2(1.e-6)
        ))
        
        m.compile(
            optimizer='adam',
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            metrics=['accuracy']
        )
        return m
        
    def run(self, y, X, model_name, overwrite=False):
        X_train, X_test, y_train, y_test = self.split_and_preprocess(y, X)
        model_path = Path('models').joinpath(model_name)

        if model_path.is_file() and not overwrite:
            model = tf.keras.models.load_model(model_path)
        else:
            model = self.build_model_3d(np.unique(y).size, X_train.shape[1:])
            model.fit(X_train, y_train, epochs=10, verbose=1, batch_size=4)
            model.save(model_path)
              
        return model.evaluate(X_test, y_test, verbose=0) 

In [5]:
target = source.metadata['categories']['multi'] # multi / trinary
labels = df[target].astype('category').cat.codes
all_data = []

filepaths = sorted(list(Path('seasonal_median').glob(f'processed*3.npy')))
for filepath in filepaths:
    with open(filepath, 'rb') as f:
        data = np.load(f)
    all_data.append(data)
    
model_name = f'conv_all_springs_multi.keras'

features = np.stack(all_data, axis=3)
score = Conv3DModelCreator().run(labels, features, model_name, overwrite=False)
score


[0.9244375228881836, 0.7074816226959229]

In [6]:
# target = source.metadata['categories']['multi'] # multi / trinary
# labels = df[target].astype('category').cat.codes
# seasons = ['Spring', 'Summer', 'Autumn', 'Winter']
# all_data = []
# for season in seasons:
#     filepath = sorted(list(Path('seasonal_median').glob(f'{season}.npy')))
#     with open(filepath[0], 'rb') as f:
#         all_data.append(np.load(f))
    
# model_name = f'conv_all_mean_seasons_multi.keras'

# features = np.stack(all_data, axis=3)
# score = ConvModelCreator().run(labels, features, model_name)
# score

In [7]:
# traces = [go.Scatter(x=seasons, y=seasonal_scores)]
# go.Figure(
#     data=traces,
#     layout={
#         "xaxis": {"title": "Season"},
#         "yaxis": {"title": "Accuracy"},
#         "title": "Conv2d accuracies"}
# )

In [8]:
# target = source.metadata['categories']['trinary'] # multi / trinary
# labels = df[target].astype('category').cat.codes
# seasons = ['Spring', 'Summer', 'Autumn', 'Winter']
# all_data = []
# for season in tqdm(seasons):
#     filepath = sorted(list(Path('seasonal_median').glob(f'{season}.npy')))
#     with open(filepath[0], 'rb') as f:
#         all_data.append(np.load(f))
    
# model_name = f'conv_all_mean_seasons_trinary.keras'
# score = ConvModelCreator().run(labels, np.stack(all_data, axis=3), model_name)
# score

In [9]:
# target = source.metadata['categories']['trinary'] # multi / trinary

# mask = df[target] != 'cleared'

# labels = df[target][mask].astype('category').cat.codes

# seasons = ['Spring', 'Summer', 'Autumn', 'Winter']
# all_data = []
# for season in tqdm(seasons):
#     filepath = sorted(list(Path('seasonal_median').glob(f'{season}.npy')))
#     with open(filepath[0], 'rb') as f:
#         data = np.load(f)
#         all_data.append(data[mask])
        
# model_name = f'conv_all_mean_seasons_binary.keras'
# score = ConvModelCreator().run(labels, np.stack(all_data, axis=3), model_name)
# score

In [10]:
# filepaths = sorted(list(Path('london').glob('*.npy')), key=lambda x: int(x.stem.split('_')[-1]))

# n_chunks = 50000
# chunks = [gdf[i: i + n_chunks] for i in range(0, gdf.shape[0], n_chunks)]
# scores = []

# chunk[target].cat.codes

# for chunk, filepath in tqdm(zip(chunks, filepaths), total=len(filepaths)
#                            ):
#     score = LightModelCreator().run_and_eval(chunk[target].cat.codes, [filepath])
#     scores.append(score)