# Transformer Model for Time Series Classification

Experimental Notebook

November 15 2022



# Introduction

This is the Transformer architecture from Attention Is All You Need, applied to timeseries instead of natural language.

This example requires TensorFlow 2.4 or higher.

Sample dataset provided through Keras
https://keras.io/examples/timeseries/timeseries_transformer_classification/

Load the dataset
We are going to use the same dataset and preprocessing as the TimeSeries Classification from Scratch example.

# Build the model
Our model processes a tensor of shape (batch size, sequence length, features), where sequence length is the number of time steps and features is each input timeseries.

You can replace your classification RNN layers with this one: the inputs are fully compatible!

We include residual connections, layer normalization, and dropout. The resulting layer can be stacked multiple times.

The projection layers are implemented through keras.layers.Conv1D.

The main part of our model is now complete. We can stack multiple of those transformer_encoder blocks and we can also proceed to add the final Multi-Layer Perceptron classification head. Apart from a stack of Dense layers, we need to reduce the output tensor of the TransformerEncoder part of our model down to a vector of features for each data point in the current batch. A common way to achieve this is to use a pooling layer. For this example, a GlobalAveragePooling1D layer is sufficient.

# Lets try it for our Landsat time series data



In [1]:
import os
import pandas as pd
import numpy as np
import datetime
import pprint
import time
import math
import random
import glob
from functools import reduce
from pprint import pprint


#Plotting
import matplotlib.pyplot as plt
from IPython.display import clear_output


In [2]:
# Tensorflow setup.

# Tensorflow version 2.4.1
import tensorflow as tf
print(tf.__version__) 

from tensorflow import keras
from tensorflow.keras import layers

# Keras setup.
import keras
from keras import layers
from keras.layers import Flatten
from keras import backend as K
from keras import regularizers
from keras import optimizers
from keras.regularizers import l2
from keras.layers import Input, Dense, Activation, BatchNormalization, Dropout, Flatten, Lambda, SpatialDropout1D, Concatenate
from keras.layers import Conv1D, Conv2D, AveragePooling1D, MaxPooling1D, GlobalMaxPooling1D, GlobalAveragePooling1D
from keras.callbacks import Callback, ModelCheckpoint, History, EarlyStopping
from keras.models import Model, load_model
from keras.utils.np_utils import to_categorical
from keras import backend as K

2022-11-17 11:30:50.892380: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


2.4.1


In [3]:
import sys

# Import from ~/sits folder
# Contains readingsits.py file to read and compute spectral features on SITS
sys.path.append("/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/temporalCNN/sits")
import readingsits

# Import from ~/deeplearning folder
# Contains multiple .py files with varying DL architectures 
sys.path.append("/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/temporalCNN/deeplearning")

import architecture_features
import architecture_complexity
import architecture_rnn
import architecture_regul
import architecture_batchsize
import architecture_depth
import architecture_spectro_temporal
import architecture_pooling

# Import from ~/outputfiles folder
# Contains evaluation.py and save.py files with fucntions to compute summary statistics, write predictions, and create confusion matrices
sys.path.append("/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/temporalCNN/outputfiles")

import evaluation
import save

In [4]:

# Set a model results path
res_path = '/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/temporalCNN'

# Creating output path if does not exist
if not os.path.exists(res_path):
  print("ResPath DNE")
  os.makedirs(res_path)

# Set the path to exported training/testing dataset
sits_path = '/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/datasets_oct22'    
    
# Set Architecture / Model Run Index (used if running in batch on MSI)
noarchi = 0
norun = 0
feature = "SB" #use only spectral bands provided (do not compute new bands, like NDVI, which are already computed)

# Parameters to set
n_channels = 7 #-- B G NDVI NIR Red SWIR1 SWIR2
val_rate = 0.1 # Validation data rate

# Evaluated metrics
eval_label = ['OA', 'train_loss', 'train_time', 'test_time']	
	
# String variables for the training and testing datasets
train_str = 'train_dataset_allyears_full_oct22'
test_str = 'test_dataset_allyears_full_oct22'					

# Get filenames
train_file = sits_path + '/' + train_str + '.csv'
test_file = sits_path + '/' + test_str + '.csv'
print("train_file: ", train_file)
print("test_file: ", test_file)
	
# Output files			
res_path = res_path + '/Archi' + str(noarchi) + '/'
if not os.path.exists(res_path):
  os.makedirs(res_path)
  print("noarchi: ", noarchi)

# Create output files to capture model results
str_result = feature + '-' + train_str + '-noarchi' + str(noarchi) + '-norun' + str(norun) 
res_file = res_path + '/resultOA-' + str_result + '.csv'
res_mat = np.zeros((len(eval_label),1))
traintest_loss_file = res_path + '/trainingHistory-' + str_result + '.csv'
conf_file = res_path + '/confMatrix-' + str_result + '.csv'
out_model_file = res_path + '/bestmodel-' + str_result + '.h5'



train_file:  /panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/datasets_oct22/train_dataset_allyears_full_oct22.csv
test_file:  /panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/datasets_oct22/test_dataset_allyears_full_oct22.csv


In [5]:
from tensorflow.keras.utils import to_categorical

# Read in SITS training and testing datasets
X_train, polygon_ids_train, y_train = readingsits.readSITSData(train_file)
X_test,  polygon_ids_test, y_test = readingsits.readSITSData(test_file)
print(X_test)  #verify spectral band data looks correct
print(X_test.shape) #num_samples, 63 bands (9 timesteps * 7 bands/timestep = 63)


# Number of unique classes in y_train and y_test datasets should = 9
n_classes_test = len(np.unique(y_test))
print(n_classes_test)
n_classes_train = len(np.unique(y_train))
print(n_classes_train)

# heck equal number of classes in training and testing dataset
if(n_classes_test != n_classes_train):
  print("WARNING: different number of classes in train and test")

n_classes = max(n_classes_train, n_classes_test) # 9 classes
y_train_one_hot = to_categorical(y_train) # specify number of classes explicity - may need to recode classes sequentially (1-9) to work correctly?
y_test_one_hot = to_categorical(y_test)

print(y_test_one_hot) #verify one hot encoding was successful
print(y_test_one_hot.shape)
print(y_test_one_hot[0])





[[0.07200938 0.1070581  0.1344206  ... 0.2114825  0.1468712  0.3813307 ]
 [0.1770388  0.2509725  0.2587963  ... 0.38377    0.3168075  0.3029786 ]
 [0.04046    0.0668875  0.095625   ... 0.2135931  0.08785625 0.8973987 ]
 ...
 [0.158765   0.1885475  0.227625   ... 0.36491188 0.2431075  0.22428876]
 [0.0438975  0.0541275  0.0639725  ... 0.23465125 0.10305    0.8873988 ]
 [0.06847563 0.08204    0.09367938 ... 0.30855063 0.23769687 0.21444525]]
(618292, 63)
9
9
[[0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
(618292, 10)
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]


In [6]:

#---- Extracting a validation set (if necesary)
if val_rate > 0:
  #Number of samples to take from Training dataset based on validation rate
  val_num_samples = int(math.ceil(X_train.shape[0] * val_rate))

  #Select random indices for val_num_samples to select validation set
  val_indices = random.sample(range(1, X_train.shape[0]), val_num_samples)
  #remove these indices from the training set
  train_indices = np.delete(range(1, X_train.shape[0]), val_indices)

  #Create training and validation sets 
  X_val = X_train[val_indices, :]
  y_val = y_train[val_indices]
  X_train = X_train[train_indices, :]
  y_train = y_train[train_indices]

  #--- Computing the one-hot encoding (recomputing it for train)
  y_train_one_hot = to_categorical(y_train)
  y_val_one_hot = to_categorical(y_val)

  n_classes_val = len(np.unique(y_val))
  print(n_classes_val)
  n_classes_train = len(np.unique(y_train))
  print(n_classes_train)

  #Check equal number of classes in training and testing dataset
  if(n_classes_val != n_classes_train):
    print("WARNING: different number of classes in train and test")
  

print(X_train.shape, y_train_one_hot.shape, X_val.shape, y_val_one_hot.shape, X_test.shape, y_test_one_hot.shape)


9
9
(5008141, 63) (5008141, 10) (556461, 63) (556461, 10) (618292, 63) (618292, 10)


In [7]:
#Format of X and Y training data for input in Transformer model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)



(5008141, 63, 1)
(556461, 63, 1)
(618292, 63, 1)


# Define model variables

In [None]:
###
# Define Model Variables
###

# Model variables
n_epochs = 100
batch_size = 5000

input_shape = X_train.shape[1:]

# inverse of frequency
class_weights = {0: 0,
                 1: 7.046028630719989,
                 2: 3.6421837069230087,
                 3: 31.37461158722999,
                 4: 0.7614511317372198,
                 5: 0.6015453322153169,
                 6: 0.3652990948014909,
                 7: 0.39487324200412083,
                 8: 4.334510403657227,
                 9: 13.275284755853498}

print(class_weights)


# Model Callbacks

In [None]:

#Plot Loss and Accuracy Callback
class PlotLearning(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        self.f1 = []
        self.val_f1 = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.f1.append(logs.get('accuracy'))
        self.val_f1.append(logs.get('val_accuracy'))
        self.i += 1
        f, (ax1, ax2) = plt.subplots(1, 2, sharex=True)
        
        clear_output(wait=True)
        
        ax1.set_yscale('log')
        ax1.plot(self.x, self.losses, label="loss")
        ax1.plot(self.x, self.val_losses, label="val loss")
        ax1.legend()
        
        ax2.plot(self.x, self.f1, label="Acc")
        ax2.plot(self.x, self.val_f1, label="val Acc ")
        ax2.legend()
        
        plt.show();
        
plot_losses = PlotLearning()


# Learning Rate Warmup and Decay Callback
def lr_warmup_cosine_decay(global_step,
                           warmup_steps,
                           hold = 0,
                           total_steps=0,
                           start_lr=0.0,
                           target_lr=1e-3):
    # Cosine decay
    learning_rate = 0.5 * target_lr * (1 + np.cos(np.pi * (global_step - warmup_steps - hold) / float(total_steps - warmup_steps - hold)))

    # Target LR * progress of warmup (=1 at the final warmup step)
    warmup_lr = target_lr * (global_step / warmup_steps)

    # Choose between `warmup_lr`, `target_lr` and `learning_rate` based on whether `global_step < warmup_steps` and we're still holding.
    # i.e. warm up if we're still warming up and use cosine decayed lr otherwise
    if hold > 0:
        learning_rate = np.where(global_step > warmup_steps + hold,
                                 learning_rate, target_lr)
    
    learning_rate = np.where(global_step < warmup_steps, warmup_lr, learning_rate)
    return learning_rate

#Plot the learning rate schedule
#steps = np.arange(0, 100, 1)
#lrs = []

#for step in steps:
#  lrs.append(lr_warmup_cosine_decay(step, total_steps=len(steps), warmup_steps=10, hold=5))
#plt.plot(lrs)


class WarmupCosineDecay(keras.callbacks.Callback):
    def __init__(self, total_steps=0, warmup_steps=0, start_lr=0.0, target_lr=1e-3, hold=0):

        super(WarmupCosineDecay, self).__init__()
        self.start_lr = start_lr
        self.hold = hold
        self.total_steps = total_steps
        self.global_step = 0
        self.target_lr = target_lr
        self.warmup_steps = warmup_steps
        self.lrs = []

    def on_batch_end(self, batch, logs=None):
        self.global_step = self.global_step + 1
        lr = model.optimizer.lr.numpy()
        self.lrs.append(lr)

    def on_batch_begin(self, batch, logs=None):
        lr = lr_warmup_cosine_decay(global_step=self.global_step,
                                    total_steps=self.total_steps,
                                    warmup_steps=self.warmup_steps,
                                    start_lr=self.start_lr,
                                    target_lr=self.target_lr,
                                    hold=self.hold)
        K.set_value(self.model.optimizer.lr, lr)
        
        
# If already batched
# If not batched
total_steps = 100
# 5% of the steps
warmup_steps = int(0.05*total_steps)

warmup_callback = WarmupCosineDecay(total_steps=total_steps, 
                             warmup_steps=warmup_steps,
                             hold=int(warmup_steps/2), 
                             start_lr=0.0, 
                             target_lr=1e-3)






# Model Checkpoint Callback

# Build the transformer model

In [None]:



def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res


def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(10, activation="softmax")(x)
    return keras.Model(inputs, outputs)


model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.1,
    dropout=0.1,
)

model.compile(
    loss = "categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["accuracy"],
)




# Train the model

In [None]:

# Model callbacks
#checkpoint = ModelCheckpoint(out_model_file, monitor='val_loss', verbose=1, save_best_only=True, mode='min', restore_best_weights=True)
#early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=1, mode='auto')

start_train_time = time.time()

hist = model.fit(x = X_train,
                 y = y_train_one_hot,
                 epochs = n_epochs,
                 batch_size = batch_size,
                 shuffle=True,
                 validation_data=(X_val, y_val_one_hot),
                 verbose=1,
                 callbacks=[warmup_callback],
                 class_weight=class_weights)

train_time = round(time.time()-start_train_time, 2)


In [None]:

# Save the Trained Model as a .h5 file
model.save(r'/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/temporalCNN/Archi0/draft_transformer_modeL_20epochs_nov152022.h5')



In [8]:

# Load a trained model
model = keras.models.load_model(r'/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/temporalCNN/Archi0/draft_transformer_model_20epochs_nov152022.h5')

#model.summary()


2022-11-17 11:32:23.527191: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-11-17 11:32:23.529696: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-11-17 11:32:24.540040: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:4c:00.0 name: NVIDIA A40 computeCapability: 8.6
coreClock: 1.74GHz coreCount: 84 deviceMemorySize: 44.56GiB deviceMemoryBandwidth: 648.29GiB/s
2022-11-17 11:32:24.540083: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-11-17 11:32:24.847253: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-11-17 11:32:24.847309: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2022-11-17 

In [9]:

print(X_test.shape)
print(y_test.shape)


(618292, 63, 1)
(618292,)


In [10]:

from sklearn.metrics import multilabel_confusion_matrix
from tabulate import tabulate

# Predict the model on withheld testing dataset
y_pred = model.predict(X_test)

print(y_pred.shape)

2022-11-17 11:40:01.874441: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-11-17 11:40:01.874927: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2445590000 Hz
2022-11-17 11:40:02.178330: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-11-17 11:41:46.759144: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2022-11-17 11:56:10.291965: W tensorflow/stream_executor/gpu/asm_compiler.cc:63] Running ptxas --version returned 256
2022-11-17 11:56:10.367141: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: ptxas exited with non-zero error code 256, output: 
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


(618292, 10)


In [11]:
#Prediction should be 10 classes

print(y_pred.shape)
print(y_pred[0:2, :])
y_pred_argmax =  np.argmax(y_pred, axis=-1)
print(y_pred_argmax[0:2])
print(y_pred_argmax.shape)


print(y_pred_argmax[0:10])
print(y_test[0:10])

(618292, 10)
[[2.4843008e-08 2.6338676e-02 1.6415882e-01 2.5248085e-03 9.2164755e-02
  5.9947867e-02 1.4070335e-01 3.0800840e-01 1.7370293e-01 3.2450337e-02]
 [7.7092857e-13 2.9097457e-06 7.8903930e-03 9.2650025e-06 5.4540495e-05
  3.5978055e-06 4.7841109e-03 9.8689854e-01 3.5166452e-04 4.9108730e-06]]
[7 7]
(618292,)
[7 7 4 5 6 7 4 4 6 5]
[8 7 5 6 6 5 4 4 6 6]


In [12]:
np.set_printoptions(suppress=True) #suppress scientific notation printing

y_pred_argmax = np.argmax(y_pred, axis=-1)
#y_pred_flat = y_pred.flatten()
#y_pred_flat = y_pred_flat.astype(int)

#y_test = y_test.astype(int)    
#y_test_flat = y_test.flatten()


# Calculate confusion matrix
class_names = ["Water", "Developed", "BarrenLand", "Forest", "Shrub/Scrub", "Grassland/Herbaceous", "Croplands", "EmergentWetlands", "LeafySpurge"]
class_labels = [1, 2, 3, 4, 5, 6, 7, 8, 9]
c = multilabel_confusion_matrix(y_test, y_pred_argmax, labels = class_labels)
model_output_metrics = []
for i in range(len(class_labels)):
    tn=c[i, 0, 0]
    tp=c[i, 1, 1]
    fn=c[i, 1, 0]
    fp=c[i, 0, 1]
    accuracy = (tp+tn)/(tp+tn+fp+fn)
    TPR_Sens_Recall = tp/(tp+fn)
    TNR_Spec = tn/(tn+fp)
    FPR = fp/(fp+tn)
    FNR = fn/(fn+tp)
    precision = tp/(tp+fp)
    jaccard = tp/(tp+fp+fn)
    beta = 0.5
    F05 = ((1 + beta**2) * precision * TPR_Sens_Recall) / (beta**2 * precision + TPR_Sens_Recall)
    beta = 1
    F1 = ((1 + beta**2) * precision * TPR_Sens_Recall) / (beta**2 * precision + TPR_Sens_Recall)
    beta = 2
    F2 = ((1 + beta**2) * precision * TPR_Sens_Recall) / (beta**2 * precision + TPR_Sens_Recall)
    outputs = [class_names[i], tp, tn, fp, fn, accuracy, TPR_Sens_Recall, TNR_Spec, FPR, FNR, precision, jaccard, F1]
    model_output_metrics.append(outputs)

# Print and format outputs
print(tabulate(model_output_metrics, floatfmt=".2f", headers=["Class Name", "TP", "TN", "FP", "FN", "Accuracy", "TPR/Sens/Recall", "TNR/Spec", "FPR", "FNR", "Precision", "Jaccard", "F1"]))


Class Name                  TP         TN         FP         FN    Accuracy    TPR/Sens/Recall    TNR/Spec    FPR    FNR    Precision    Jaccard      F1
--------------------  --------  ---------  ---------  ---------  ----------  -----------------  ----------  -----  -----  -----------  ---------  ------
Water                  1094.00  549273.00   59414.00    8511.00        0.89               0.11        0.90   0.10   0.89         0.02       0.02    0.03
Developed              1539.00  562882.00   36611.00   17260.00        0.91               0.08        0.94   0.06   0.92         0.04       0.03    0.05
BarrenLand              465.00  478371.00  137720.00    1736.00        0.77               0.21        0.78   0.22   0.79         0.00       0.00    0.01
Forest                 9114.00  502548.00   25439.00   81191.00        0.83               0.10        0.95   0.05   0.90         0.26       0.08    0.15
Shrub/Scrub           14268.00  468098.00   35897.00  100029.00        0.78       

  F05 = ((1 + beta**2) * precision * TPR_Sens_Recall) / (beta**2 * precision + TPR_Sens_Recall)
  F1 = ((1 + beta**2) * precision * TPR_Sens_Recall) / (beta**2 * precision + TPR_Sens_Recall)
  F2 = ((1 + beta**2) * precision * TPR_Sens_Recall) / (beta**2 * precision + TPR_Sens_Recall)


In [None]:

#Model Prediction on small TIF raster file

import rasterio
from rasterio.plot import show
import glob

# Not used, but might be needed to index across tiles
tile_index = 302

# Input prediction .tif path
image_path = r'/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/datasets_oct22/rasters_302_2019/'

# Output prediction file path
outpath = r'/panfs/roc/groups/7/moeller/shared/leafy-spurge-demography/datasets_oct22/raster_predictions_transformer/'

# List all .tif files in /rasters folder for prediction
tif_image_list = glob.glob(image_path + '*.tif')

print(tif_image_list[0:1])

# Loop through every tif file for prediction.
for t in range(len(tif_image_list)):
    
    # Open .tif array image with rasterio, read to numpy array
    with rasterio.open(tif_image_list[t], 'r') as ds:
        arr = ds.read()  # read all raster values

    # Define shape of input .tif image
    bands, width, height = arr.shape
    #print(arr.shape) # (63, 413, 413), we target (618292, 63, 1)
    
    # Convert Data Type to float32 by division.
    arr = arr/10000
    
    # Reshape .tif array axes for correct format so model can predict. (618292, 63, 1)
    arr = np.moveaxis(arr, 0, -1) #move axis to channels last
    #print(arr.shape)
    #new_arr = arr.reshape(-1, arr.shape[-1]) #reshape to row and column
    num_pixels = width*height
    new_arr2 = arr.reshape(num_pixels, bands)
    #print(new_arr2.shape)
    new_arr3 = new_arr2.reshape((new_arr2.shape[0], new_arr2.shape[1], 1))
    print(new_arr3.shape)
    
    p = model.predict(new_arr3) # p is prediction from the DL model
    print(p.shape)
    # Predict model and reshape to export.
    #p = model.predict(new_arr3) # p is prediction from the DL model
    pim = p.reshape(width, height, 10) # Dimension of prediction in rows, columns, bands (10 classes)
    pim2 = np.moveaxis(pim, 2, 0) # move axis so bands is first

    # ArgMax for Segmentation.
    pim3 = np.argmax(pim2, axis=0) # take softmax of predictions for segmentation
    print(pim3.shape)

    # Get the file name (landsat_image_170_t.tif) by splitting input path.
    fileout_string = os.path.split(tif_image_list[t])

    # Output prediction raster .
    out_meta = ds.meta.copy()

    # Get Output metadata.
    out_meta.update({'driver':'GTiff',
                     'width':ds.shape[1],
                     'height':ds.shape[0],
                     'count':1,
                     'dtype':'float64',
                     'crs':ds.crs, 
                     'transform':ds.transform,
                     'nodata':0})

    # Write predicted raster to file.
    with rasterio.open(fp=outpath + "/prediction_" + fileout_string[-1], #outputpath_name
                 mode='w',**out_meta) as dst:
                 dst.write(pim3, 1) # the numer one is the number of bands

    print("Writing file...")


In [None]:
print(p.shape)

In [None]:

# Predict model and reshape to export.
#p = model.predict(new_arr3) # p is prediction from the DL model
pim = p.reshape(width, height, 10) # Dimension of prediction in rows, columns, bands (10 classes)
pim2 = np.moveaxis(pim, 2, 0) # move axis so bands is first

# ArgMax for Segmentation.
pim3 = np.argmax(pim2, axis=0) # take softmax of predictions for segmentation
print(pim3.shape)

# Get the file name (landsat_image_170_t.tif) by splitting input path.
fileout_string = os.path.split(tif_image_list[t])

# Output prediction raster .
out_meta = ds.meta.copy()

# Get Output metadata.
out_meta.update({'driver':'GTiff',
                 'width':ds.shape[1],
                 'height':ds.shape[0],
                 'count':1,
                 'dtype':'float64',
                 'crs':ds.crs, 
                 'transform':ds.transform,
                 'nodata':0})

# Write predicted raster to file.
with rasterio.open(fp=outpath + "/prediction_" + fileout_string[-1], #outputpath_name
             mode='w',**out_meta) as dst:
             dst.write(pim3, 1) # the numer one is the number of bands

print("Writing file...")
    


In [None]:

import matplotlib.pyplot as plt

plt.imshow(pim3)

