In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import tensorflow as tf

from keras.models import Model, Sequential
from keras import layers
from keras.optimizers import Adam
from keras.regularizers import L1, L2, Regularizer, L1L2
from keras import ops
from keras.callbacks import ReduceLROnPlateau

import imblearn

from time import perf_counter

## Init

In [2]:
stresses = ['Gm', 'Drought', 'Nutrient_Deficiency', 'Fs', 'Salinity']

In [3]:
csv_path = r'..\combined.csv'
df = pd.read_csv(csv_path)
df.drop(columns=['Unnamed: 0'], inplace=True, errors='ignore')
df.drop(columns=['Fungal_infection'], inplace=True, errors='ignore')
df[stresses] = df[stresses].astype(bool)

  df = pd.read_csv(csv_path)


In [4]:
spec_cols = [col for col in df.columns if col[0] == 'X']

In [5]:
trait_cols = np.array(['Photo',
       'Ci', 'Cond', 'CTleaf', 'Trmmol', 'WUEi', 'WUEin', 'Fv_Fm', 'Fv_Fo',
       'PI', 'SLA', 'LWC', 'Suc', 'OP', 'OP100', 'RWC', 'WP', 'N', 'C',
       'Neoxanthin', 'Violaxanthin', 'Lutein', 'Zeaxanthin', 'Chl_b', 'Chl_a',
       'B_carotene', 'Glucose', 'Fructose', 'Sucrose', 'Sugars', 'Starch',
       'Ellagic', 'Gal', 'Rut', 'CTs'])

In [6]:
x_spec = df[spec_cols].values
yb = df[stresses].values.any(axis=1)

# del df

### Helper Functions

In [7]:
def fuzzy_dx_init(shape, dtype=None):
    half_shape = list(shape)
    half_shape[0] //= 2
    half_shape = tuple(half_shape)
    return np.vstack((np.ones(half_shape) * -1/half_shape[0], np.ones(half_shape)/half_shape[0]))

In [8]:
def cnn_reshape(x):
    return x.reshape((-1, x.shape[1], 1))

## Preprocessing

In [9]:
x_spec = ((x_spec - x_spec.min(axis=0))/(x_spec.max(axis=0)-x_spec.min(axis=0)))

In [10]:
x_spec_train, x_spec_val, yb_train, yb_val = train_test_split(x_spec, yb, test_size=.2)

## Custom Stuff

In [86]:
def ResBlock1D(x):
    # padding has to be 'same' for add to work
    
    fx = layers.Conv1D(10, 3, activation='relu', padding='same', kernel_regularizer=L2(.001))(x)
    fx = layers.BatchNormalization()(fx)
    fx = layers.Conv1D(10, 3, activation='relu', padding='same', kernel_regularizer=L2(.001))(fx)

    out = layers.Add()([x, fx])
    out = layers.ReLU()(out)
    out = layers.BatchNormalization()(out)
    out = layers.MaxPooling1D(5)(out)
    return out

## Data Augmentation

###

In [51]:
aug_sel = np.random.randint(x_spec_train.shape[0], size=1000)
x_spec_train_aug = x_spec_train[aug_sel]
yb_train_aug = yb_train[aug_sel]

#### Magnify

In [52]:
x_spec_train_aug += (np.random.random(size=x_spec_train_aug.shape[0])*.0 + 1).reshape((-1,1))

In [53]:
x_spec_train_aug += np.random.normal(scale=.01, size=x_spec_train_aug.shape)

#### Noise

## CNN

In [113]:
fuzzy_win = 5
blur_factor = 1

spatial_dropout_k = 0.0
dropout_k = .15

cnn_model_layers = [
    layers.GaussianNoise(.05),
    layers.Conv1D(1, fuzzy_win*2, trainable=False, kernel_initializer=fuzzy_dx_init),
    layers.AveragePooling1D(blur_factor),
    ResBlock1D,
    layers.SpatialDropout1D(spatial_dropout_k),
    layers.Flatten(),
    layers.Dense(20, activation='relu', kernel_regularizer=L2(.001)),
    layers.Dropout(dropout_k),
    layers.Dense(10, activation='relu', kernel_regularizer=L2(.001)),
    layers.Dropout(dropout_k),
    layers.Dense(1, activation='sigmoid')
]

cnn_model_inputs = layers.Input(shape=(x_spec_train.shape[1],1))

fx = cnn_model_inputs
for layer in cnn_model_layers:
    fx = layer(fx)

cnn_model_outputs = fx
cnn_model = Model(inputs=cnn_model_inputs, outputs=cnn_model_outputs)

cnn_model.compile(optimizer=Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
start = perf_counter()

history = cnn_model.fit(
    cnn_reshape(x_spec_train),
    yb_train,
    epochs=500,
    validation_data=(cnn_reshape(x_spec_val), yb_val),
    batch_size=5,
    # callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.4, patience=20, min_lr=1e-5)]
)

print(perf_counter() - start)

Epoch 1/500
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.6267 - loss: 0.7382 - val_accuracy: 0.6570 - val_loss: 0.7073
Epoch 2/500
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6525 - loss: 0.7184 - val_accuracy: 0.6570 - val_loss: 0.7070
Epoch 3/500
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6495 - loss: 0.7110 - val_accuracy: 0.6039 - val_loss: 0.7373
Epoch 4/500
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6445 - loss: 0.7356 - val_accuracy: 0.5604 - val_loss: 0.7519
Epoch 5/500
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6626 - loss: 0.7262 - val_accuracy: 0.5845 - val_loss: 0.7411
Epoch 6/500
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6918 - loss: 0.7026 - val_accuracy: 0.5652 - val_loss: 0.7480
Epoch 7/500
[1m165/1

In [None]:
((cnn_model.predict(cnn_reshape(x_spec_val)) > .5) == yb_val).mean()

In [None]:
(yb_val).mean()

In [None]:
fig = plt.figure()
ax = plt.gca()

ax.plot(history.history['loss'], label='Training')
ax.plot(
    np.convolve(np.array(history.history['val_loss']), np.ones(10)/10, mode='valid'),
    label='Validation (Running Mean)'
)
ax.legend()
ax.set_xlabel('# Epochs')
ax.set_ylabel('Loss')
# ax.set_ylim((0, 5))
ax.set_title('CNN Loss over Training')

In [None]:
fig = plt.figure()
ax = plt.gca()

ax.plot(history.history['accuracy'], label='Training')
ax.plot(
    np.convolve(np.array(history.history['val_accuracy']), np.ones(10)/10, mode='valid'),
    label='Validation (Running Mean)'
)
ax.legend()
ax.set_xlabel('# Epochs')
ax.set_ylabel('Accuracy')
ax.set_ylim((0, 1))
ax.set_title('CNN Accuracy over Training')