In [1]:
from IPython.display import display
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd

import datetime
from pathlib import Path

import intake

from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import classification_report, accuracy_score

import pickle

import tensorflow as tf
from tensorflow.keras import layers

2024-04-11 12:56:16.668061: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
catalog = intake.open_catalog(Path('../catalog.yml'))
source = getattr(catalog, 'treesat')
gdf = source.read()[source.metadata['usecols']]
gdf.crs = 25832
gdf = gdf.to_crs(epsg=4326)

In [3]:
selected_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B']

In [18]:
class Conv2DModelCreator:        
    def split_and_preprocess(self, y, X, random_state=42):
        """Split and max scale."""
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.1, random_state=random_state)
        
        train_band_max_values = [X_train[...,i].max() for i in range(X_train.shape[-1])]
        
        X_train = X_train/train_band_max_values

        test_band_max_values = [X_test[...,i].max() for i in range(X_test.shape[-1])]
        
        X_test = X_test/test_band_max_values

        return X_train, X_test, y_train, y_test
        
        
    def build_model(self, num_classes):
        padding = 'same'
        activation = 'relu'
        model = tf.keras.Sequential([
          layers.Conv2D(16, (3, 3), padding=padding, activation=activation),
          layers.MaxPooling2D(padding=padding),
          layers.Conv2D(32, (3, 3), padding=padding, activation=activation),
          layers.MaxPooling2D(padding=padding),
          layers.Conv2D(64, (3, 3), padding=padding, activation=activation),
          layers.MaxPooling2D(pool_size=2),
          layers.Flatten(),
          layers.Dense(128, activation=activation),
          layers.Dense(num_classes)
        ])

        model.compile(
            optimizer='adam',
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy']
        )

        return model
    
    def fit_eval(self, y, X, array_key, model):
        X_train, X_test, y_train, y_test = self.split_and_preprocess(y, X)
        
    
    def run(self, labels, filepaths, combined=False):
        y_test_combined = []
        X_test_combined = []

        for filepath in filepaths:
            X_key = filepath.stem

            with open(filepath, 'rb') as f:
                X = np.load(f)

            # X = X[...,:3]

            X_train, X_test, y_train, y_test = self.split_and_preprocess(labels, X)
            y_test_combined.append(y_test)
            X_test_combined.append(X_test)
            
            print(f'Evaluating {X_key}...', end=' ')

            model_single = self.build_model(labels.unique().size)
            model_single.fit(X_train, y_train, epochs=10, verbose=1)
            model_single.evaluate(X_test, y_test, verbose=0)

        
        return zeros

In [19]:
target = source.metadata['categories']['multi'] # multi / trinary
gdf[target] = gdf[target].astype('category')

In [20]:
%%time
filepaths = sorted(list(Path('processed_data').glob('treesat_*.npy')))
zeros = Conv2DModelCreator().run(
    gdf[target].cat.codes, filepaths, combined=True)

Evaluating treesat_201901_median... Epoch 1/10
[1m1417/1417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.2385 - loss: 2.0157
Epoch 2/10
[1m1417/1417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.3645 - loss: 1.7313
Epoch 3/10
[1m1417/1417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.3940 - loss: 1.6561
Epoch 4/10
[1m1417/1417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.4057 - loss: 1.6205
Epoch 5/10
[1m1417/1417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.4227 - loss: 1.5735
Epoch 6/10
[1m1417/1417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.4264 - loss: 1.5549
Epoch 7/10
[1m1417/1417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.4322 - loss: 1.5370
Epoch 8/10
[1m1417/1417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.4451 - l