In [1]:
from IPython.display import display
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd

import plotly.graph_objects as go

import datetime
from pathlib import Path

import intake

from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier

from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import classification_report, accuracy_score

import tensorflow as tf
from tensorflow.keras import layers

2024-04-13 19:56:23.427110: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
catalog = intake.open_catalog(Path('../catalog.yml'))
source = getattr(catalog, 'treesat')
gdf = source.read()[source.metadata['usecols']]
gdf.crs = 25832
gdf = gdf.to_crs(epsg=4326)

In [3]:
target = source.metadata['categories']['multi'] # multi / trinary
gdf[target] = gdf[target].astype('category')

In [4]:
selected_bands = [f'B{x}' for x in range(2, 9)] + ['B8A', 'B11', 'B12', 'TCI_R', 'TCI_G', 'TCI_B']

In [10]:
class Conv2DModelCreator:        
    def split_and_preprocess(self, y, X, random_state=42):
        """Split and max scale."""
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.4, random_state=random_state)
        
        train_band_max_values = [X_train[...,i].max() for i in range(X_train.shape[-1])]
        X_train = 2*X_train/train_band_max_values - 1

        test_band_max_values = [X_test[...,i].max() for i in range(X_test.shape[-1])]
        X_test = 2*X_test/test_band_max_values - 1

        return X_train, X_test, y_train, y_test
        
        
    def build_model(self, num_classes):
        padding = 'same'
        activation = 'relu'
        k_size = (3, 3)
        model = tf.keras.Sequential([
          layers.Conv2D(16, k_size, padding=padding, activation=activation),
          layers.MaxPooling2D(padding=padding),
          layers.Conv2D(32, k_size, padding=padding, activation=activation),
          layers.MaxPooling2D(padding=padding),
          layers.Conv2D(64, k_size, padding=padding, activation=activation),
          layers.MaxPooling2D(pool_size=2),
          layers.Flatten(),
          layers.Dense(128, activation=activation),
          layers.Dense(num_classes)
        ])

        model.compile(
            optimizer='adam',
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy']
        )
        return model
        
    def run(self, labels, filepaths, overwrite=False, acc_name=None):
        scores = []
        
        acc_model = self.build_model(labels.unique().size)
        X_test_acc, y_test_acc = [], []
        
        for filepath in tqdm(filepaths, leave=False):
            with open(filepath, 'rb') as f:
                X = np.load(f)
            # X = X[...,:3]

            X_train, X_test, y_train, y_test = self.split_and_preprocess(labels, X)

            model_name = f'conv2d_{filepath.parent}_{filepath.stem}.keras'
            model_path = Path('models').joinpath(model_name)
            if model_path.is_file and not overwrite:
                model = tf.keras.models.load_model(model_path)
            else:
                model = self.build_model(labels.unique().size)
                model.fit(X_train, y_train, epochs=10, verbose=0)
                model.save(model_path)

            if acc_name:
                acc_model.fit(X_train, y_train, epochs=10, verbose=1)
                X_test_acc.append(X_test)
                y_test_acc.append(y_test)
                
            score = model.evaluate(X_test, y_test, verbose=0)
            scores.append(score[1])
            
        if acc_name:
            acc_model.evaluate(
                np.concatenate(X_test_acc, axis=0), 
                np.concatenate(y_test_acc, axis=0), 
                verbose=1)
            model.save(Path('models').joinpath(acc_name))
        return scores

In [11]:
seasons = ['Spring', 'Summer', 'Autumn', 'Winter']
mean_seasonal_scores = []
for season in tqdm(seasons):
    filepath = sorted(list(Path('seasonal_median').glob(f'{season}.npy')))
    score = Conv2DModelCreator().run(
        gdf[target].cat.codes, filepath, overwrite=False, acc_name='conv2d_seasonal_means_acc.keras')
    mean_seasonal_scores.extend(score)

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 1/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3374 - loss: 1.7933
Epoch 2/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4406 - loss: 1.5207
Epoch 3/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4796 - loss: 1.4439
Epoch 4/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5188 - loss: 1.3466
Epoch 5/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5400 - loss: 1.2868
Epoch 6/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5592 - loss: 1.2425
Epoch 7/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5675 - loss: 1.2142
Epoch 8/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5716 - loss: 1.2047
Epoch 9/10
[1m945/945[0m [32m━━━━━━━━

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 1/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3552 - loss: 1.7060
Epoch 2/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4674 - loss: 1.4291
Epoch 3/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5191 - loss: 1.3274
Epoch 4/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5438 - loss: 1.2690
Epoch 5/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5538 - loss: 1.2439
Epoch 6/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5601 - loss: 1.2299
Epoch 7/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5723 - loss: 1.2020
Epoch 8/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5810 - loss: 1.1857
Epoch 9/10
[1m945/945[0m [32m━━━━━━━━

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 1/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3200 - loss: 1.8571
Epoch 2/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4503 - loss: 1.5294
Epoch 3/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4642 - loss: 1.4769
Epoch 4/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4824 - loss: 1.4296
Epoch 5/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4932 - loss: 1.4044
Epoch 6/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5192 - loss: 1.3553
Epoch 7/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5253 - loss: 1.3278
Epoch 8/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5359 - loss: 1.3062
Epoch 9/10
[1m945/945[0m [32m━━━━━━━━

  0%|          | 0/1 [00:00<?, ?it/s]

Epoch 1/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.2968 - loss: 1.8593
Epoch 2/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4081 - loss: 1.5680
Epoch 3/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4399 - loss: 1.4933
Epoch 4/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4524 - loss: 1.4653
Epoch 5/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4688 - loss: 1.4258
Epoch 6/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4755 - loss: 1.4082
Epoch 7/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4910 - loss: 1.3755
Epoch 8/10
[1m945/945[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4951 - loss: 1.3719
Epoch 9/10
[1m945/945[0m [32m━━━━━━━━

In [None]:
traces = [go.Scatter(x=seasons, y=mean_seasonal_scores)]
go.Figure(
    data=traces,
    layout={
        "xaxis": {"title": "Season"},
        "yaxis": {"title": "Accuracy"},
        "title": "Conv2d accuracies"}
)

In [None]:
traces = []
seasons = ['Spring', 'Summer', 'Autumn', 'Winter']
seasonal_scores = []
for year in tqdm(range(2017, 2024)):
    filepaths = sorted(list(Path('seasonal_median').glob(f'processed*{year}*.npy')))
    scores = Conv2DModelCreator().run(gdf[target].cat.codes, filepaths)
    seasonal_scores.extend(scores)
    traces.append(go.Scatter(
        x=seasons, y=scores, name=year))
    
go.Figure(
    data=traces,
    layout={
        "xaxis": {"title": "Season"},
        "yaxis": {"title": "Accuracy"},
        "title": "Conv2d accuracies"}
)

In [None]:
filepaths = sorted(list(Path('london').glob('*.npy')), key=lambda x: int(x.stem.split('_')[-1]))

n_chunks = 50000
chunks = [gdf[i: i + n_chunks] for i in range(0, gdf.shape[0], n_chunks)]
scores = []

chunk[target].cat.codes

for chunk, filepath in tqdm(zip(chunks, filepaths), total=len(filepaths)
                           ):
    score = LightModelCreator().run_and_eval(chunk[target].cat.codes, [filepath])
    scores.append(score)