In [1]:
# main file to manange CAE for PHLUID
# @oscars47

import os
from keras import layers
from keras.models import Sequential
from keras.optimizers import Adam
import wandb
from wandb.keras import WandbCallback
from caehelper import *

# define img dimesions
IMG_HEIGHT = 104
IMG_WIDTH = 104
input_shape = layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)) # do care about color

# load data
DATA_DIR= '/home/oscar47/Desktop/physics/swarm_data/cae_output'

train_ds = np.load(os.path.join(DATA_DIR, 'train_ds.npy'))
val_ds = np.load(os.path.join(DATA_DIR, 'val_ds.npy'))


# build autoencoder-------------
def build_cae(input_shape, conv2d1_size=32, conv2d2_size=32, conv2d3_size=32, convtrans1_size=32, convtrans2_size=32, convtrans3_size=32, learning_rate=0.01):
    # encoder
    model = Sequential()
    model.add(input_shape)
    #model.add(layers.Flatten())
    model.add(layers.Conv2D(conv2d1_size, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2), padding='same'))
    model.add(layers.Conv2D(conv2d2_size, (3,3), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2), padding='same'))
    model.add(layers.Conv2D(conv2d3_size, (3,3), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2), padding='same', name='FV')) # name this layer the FV (feature vector) so we can pull from it later

    # decoder
    model.add(layers.Conv2DTranspose(convtrans1_size, (3,3), activation='relu', padding='same'))
    model.add(layers.UpSampling2D((2,2)))
    model.add(layers.Conv2DTranspose(convtrans2_size, (3,3), activation='relu', padding='same'))
    model.add(layers.UpSampling2D((2,2)))
    model.add(layers.Conv2DTranspose(convtrans3_size, (3,3), activation='relu', padding='same'))
    model.add(layers.UpSampling2D((2,2)))
    model.add(layers.Conv2D(3, (3,3), padding='same', name='OUT'))

    optimizer = Adam(learning_rate = learning_rate)
    model.compile(optimizer=optimizer, loss='mse')

    model.summary()

    return model

def train(config=None):
    with wandb.init(config=config):
    # If called by wandb.agent, as below,
    # this config will be set by Sweep Controller
      config = wandb.config

      #pprint.pprint(config)

      #initialize the neural net; 
      global model
      model = build_cae(input_shape, conv2d1_size=config.conv2d1_size, conv2d2_size=config.conv2d2_size, conv2d3_size=config.conv2d3_size, convtrans1_size=config.convtrans1_size, 
         convtrans2_size=config.convtrans2_size, convtrans3_size=config.convtrans1_size, learning_rate = config.learning_rate)
      
      #now run training
      history = model.fit(
        train_ds, train_ds,
        batch_size = config.batch_size,
        validation_data=(val_ds, val_ds),
        shuffle=False,
        epochs=config.epochs,
        callbacks=[WandbCallback()] #use callbacks to have w&b log stats; will automatically save best model                     
      )

def train_custom():
   global model
   model = build_cae(input_shape)
      
   #now run training
   history = model.fit(
      train_ds, train_ds,
      batch_size = 32,
      validation_data=(val_ds, val_ds),
      shuffle=False,
      epochs=5,                    
   )


# set dictionary with random search; optimizing val_loss--------------------------
sweep_config= {
    'method': 'random',
    'name': 'val_accuracy',
    'goal': 'maximize'
}

sweep_config['metric']= 'val_accuracy'

parameters_dict = {
    'epochs': {
       'distribution': 'int_uniform',
       'min': 15,
       'max': 20
    },
    # for build_dataset
     'batch_size': {
       'values': [x for x in range(32, 161, 32)]
    },
    'conv2d1_size': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'conv2d2_size': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'conv2d3_size': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'convtrans1_size': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'convtrans2_size': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'convtrans3_size': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'learning_rate':{
         #uniform distribution between 0 and 1
         'distribution': 'uniform', 
         'min': 0,
         'max': 0.1
     }
}

# append parameters to sweep config
sweep_config['parameters'] = parameters_dict 

# login to wandb----------------
wandb.init(project="PHLUID-SwarmCAE1", entity="oscarscholin")

# initialize sweep agent
sweep_id = wandb.sweep(sweep_config, project='PHLUID-SwarmCAE1', entity="oscarscholin")
wandb.agent(sweep_id, train, count=20)

#train_custom()


2022-11-27 02:21:02.510714: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-27 02:21:02.595682: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-11-27 02:21:02.597877: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-27 02:21:02.597883: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc



Create sweep with ID: zy5108b1
Sweep URL: https://wandb.ai/oscarscholin/PHLUID-SwarmCAE1/sweeps/zy5108b1


wandb: Waiting for W&B process to finish... (success).
wandb: Synced driven-resonance-1: https://wandb.ai/oscarscholin/PHLUID-SwarmCAE1/runs/1w1cn7sg
wandb: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
wandb: Find logs at: ./wandb/run-20221127_022107-1w1cn7sg/logs
[34m[1mwandb[0m: Agent Starting Run: dyebso85 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv2d1_size: 213
[34m[1mwandb[0m: 	conv2d2_size: 183
[34m[1mwandb[0m: 	conv2d3_size: 69
[34m[1mwandb[0m: 	convtrans1_size: 227
[34m[1mwandb[0m: 	convtrans2_size: 90
[34m[1mwandb[0m: 	convtrans3_size: 79
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.05240793517295496


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 104, 104, 213)     5964      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 52, 52, 213)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 52, 52, 183)       350994    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 26, 26, 183)      0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 26, 26, 69)        113712    
                                                                 
 FV (MaxPooling2D)           (None, 13, 13, 69)        0

2022-11-27 02:21:24.897066: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-27 02:21:24.897697: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/oscar47/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2022-11-27 02:21:24.897734: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/oscar47/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2022-11-27 02:21:24.897756: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could 

Epoch 1/20

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Error in callback <function _WandbInit._pause_backend at 0x7f4441076ca0> (for post_run_cell):


BrokenPipeError: [Errno 32] Broken pipe