In [1]:
import tensorflow.keras as keras
import tensorflow_addons as tfa
import tensorflow as tf
import pandas as pd
import numpy as np
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from sklearn.model_selection import train_test_split

from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Activation, Dense, Dropout, Flatten, Conv2D,\
                        BatchNormalization, Concatenate, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import models
from tensorflow.keras.utils import plot_model

from src.analysis_tools import plot_confusion_matrix, extract_data
from src.cnn_models import basic_cnn, simple_cnn, dense_cnn, coeff_determination, rmse

%load_ext autoreload
%autoreload 2

In [2]:
def z_score(df):
    m,s = np.mean(df), np.std(df)
    return (df-m)/s

In [3]:
stations = ['ParkersBrook','BrownsBrook','GrantPoole' ]
stations = ['ParkersBrook' ]

dataset_names = ['train', 'val', 'test']
for station in stations:
    d = f'/datadrive/stream_data/training/{station}'
    dataset = {i:{'directory':f'{d}/{i}',\
                    'dataframe':pd.read_csv(f'{d}/{i}_table.csv')} for i in dataset_names}
#need to apply standard scaler
from sklearn.preprocessing import StandardScaler
for d in dataset_names:
    dataset[d]['dataframe']['DISCHARGE_SCALED'] = z_score(dataset[d]['dataframe']['DISCHARGE'])

In [4]:
#flow from dataframe
batch_size = 1 
seed = 1337
#input_shape = (480,600)
input_shape = (240,300)

train_datagen = ImageDataGenerator(
        rescale=1./255)
#        width_shift_range=0.05,\
#        height_shift_range=0.05,\
#        rotation_range=1,\
#        brightness_range=(0.8,1))
#        shear_range=0.1)
#        zoom_range=0.2)
#        horizontal_flip=True)

train_generator=train_datagen.flow_from_dataframe(\
    dataframe=dataset['train']['dataframe'],\
    directory=dataset['train']['directory'],\
    x_col='FILENAME',\
    y_col='DISCHARGE_SCALED',\
    subset='training',\
    batch_size=batch_size,\
    seed=seed,\
    shuffle=True,\
    class_mode='raw',\
    target_size=input_shape)

valid_generator=train_datagen.flow_from_dataframe(\
    dataframe=dataset['val']['dataframe'],\
    directory=dataset['val']['directory'],\
    x_col='FILENAME',\
    y_col='DISCHARGE_SCALED',\
    subset='training',\
    batch_size=batch_size,\
    seed=seed,\
    shuffle=True,\
    class_mode='raw',\
    target_size=input_shape)
    
test_datagen=ImageDataGenerator(rescale=1./255.)
test_generator=test_datagen.flow_from_dataframe(\
    dataframe=dataset['test']['dataframe'],\
    directory=dataset['test']['directory'],\
    x_col='FILENAME',\
    y_col='DISCHARGE_SCALED',\
    subset='training',\
    batch_size=batch_size,\
    seed=seed,\
    shuffle=True,\
    class_mode='raw',\
    target_size=input_shape)

Found 4056 validated image filenames.
Found 869 validated image filenames.
Found 870 validated image filenames.


In [5]:
#input_shape = (480,600,3)
input_shape = (240,300,3)
output_shape=(1)
n_dblocks=1
#model = basic_cnn(input_shape=input_shape,\
#                  output_shape=output_shape,\
#                  final_activation='relu')
#model = simple_cnn(input_shape=input_shape, output_shape=output_shape, final_activation='relu')
model = dense_cnn(input_shape=input_shape, n_dblocks=n_dblocks,\
                  output_shape=output_shape, final_activation='relu')
model.summary()
#opt = Adam(lr=0.006)
opt = tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.1)
loss = 'mean_squared_error'
#loss = rmse
model.compile(loss=loss, optimizer=opt, metrics=[coeff_determination])

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_layer (InputLayer)        [(None, 240, 300, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 240, 300, 16) 48          input_layer[0][0]                
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 240, 300, 16) 64          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 240, 300, 16) 0           batch_normalization[0][0]        
_______________________________________________________________________________________

In [112]:
EPOCHS = 2
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=EPOCHS)

Epoch 1/2

KeyboardInterrupt: 

In [16]:
model.evaluate_generator(generator=valid_generator, steps=STEP_SIZE_TEST)
test_generator.reset()
pred=model.predict_generator(test_generator, steps=STEP_SIZE_TEST, verbose=1)



  numdigits = int(np.log10(self.target)) + 1


OverflowError: cannot convert float infinity to integer

In [99]:
z = test_generator.next()

In [100]:
pred = model.predict(z[0])

In [42]:
from sklearn.metrics import r2_score
r2_score(t, pred)

-2.0376375052069307

In [62]:
y_true = z[1].flatten()

In [63]:
y_pred = pred.flatten()

In [53]:
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
r2_score(y_true, y_pred)

0.9486081370449679

In [73]:
ssr = np.sum((np.array(y_true)-np.array(y_pred))**2)
sst = np.sum((np.array(y_true)-np.mean(np.array(y_pred)))**2)
tot = 1-(ssr/sst+K.epsilon()) 

In [74]:
tot

-1.1713545267078018e-07

In [69]:
sst

3.9444782826652087

In [None]:

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

input_shape = (480,600)
batch_size = 32

train_generator = train_datagen.flow_from_directory(
        data_dirs['train'],
        target_size=input_shape,
        batch_size=batch_size,
        class_mode='binary')

validation_generator = val_datagen.flow_from_directory(
        data_dirs['val'],
        target_size=input_shape,
        batch_size=batch_size,
        class_mode='binary')

test_generator = test_datagen.flow_from_directory(
        data_dirs['test'],
        target_size=input_shape,
        batch_size=batch_size,
        class_mode='binary')

model.fit(
        train_generator,
        steps_per_epoch=2000,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=800)

In [None]:
shift = 7 
r_range = 5 
batch_size = 8 
brightness_range=(0.8,1)
datagen = ImageDataGenerator(rescale=1,\
                             rotation_range=r_range,\
                             width_shift_range=shift,\
                             height_shift_range=shift,\
                             brightness_range=brightness_range)
datagen.fit(x_train, augment=True, seed=1234)