In [1]:
# need to run nn.py from Juypter in order to use wandb

In [3]:
# file to implement the cnn
# @oscars47
# first call mastercnn prep to generate np arrays; then run this file

import os
import numpy as np
from keras import layers
from keras.models import Model, Sequential, load_model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
import wandb
from wandb.keras import WandbCallback


# define directories---------
MAIN_DIR = '/home/oscar47/Desktop/P-ai'
TRAIN_DIR = os.path.join(MAIN_DIR, 'train_data') # to store out .npy files

# get np arrays for training!------
train_x_ds = np.load(os.path.join(TRAIN_DIR, 'train_x_ds.npy')) 
val_x_ds = np.load(os.path.join(TRAIN_DIR, 'val_x_ds.npy')) 
train_y_ds = np.load(os.path.join(TRAIN_DIR, 'train_y_ds.npy')) 
val_y_ds = np.load(os.path.join(TRAIN_DIR, 'val_y_ds.npy'))

# define shape of incoming and outgoing factors
input_shape = train_x_ds[0].shape
output_shape = len(train_y_ds[0])
#print(input_shape, output_shape)

# define cnn---------------
def build_model(input_shape, output_len, size1, size2, size3, size4, size5, dense1, learning_rate):
    model = Sequential() # initialize Sequential model object so we can add layers sequentially
    model.add(layers.InputLayer(input_shape)) # add the shape of our input x training vectors
    # add a sequence of 5 convolutional layers, alternating Conv2D and MaxPooling
    model.add(layers.Conv2D(size1, (3, 3), activation='relu', padding='same')) # the (3,3) is size of the kernel -- this is a hyperparam we can use wanb to investigate as well
    model.add(layers.MaxPool2D((2,2), padding='same'))

    model.add(layers.Conv2D(size2, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2), padding='same'))

    model.add(layers.Conv2D(size3, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2), padding='same'))

    model.add(layers.Conv2D(size4, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2), padding='same'))

    model.add(layers.Conv2D(size5, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2), padding='same'))

    model.add(layers.Flatten()) # convert array to vector
    model.add(layers.Dense(dense1, activation='relu')) # add a final dense layer
    model.add(layers.Dense(output_len)) # match output size: which should just be size 1 (a single number)

    optimizer = Adam(learning_rate = learning_rate) # compile the model!
    model.compile(optimizer=optimizer, loss='mse')

    return model

# function for training
def train(config=None):
    with wandb.init(config=config):
    # If called by wandb.agent, as below,
    # this config will be set by Sweep Controller
      config = wandb.config

      #pprint.pprint(config)

      #initialize the neural net; 
      global model
      model = build_model(input_shape, output_shape, config.size_1,  config.size_2, config.size_3, 
              config.size_4, config.size_5, 
              config.dense1, config.learning_rate)
      
      #now run training
      history = model.fit(
        train_x_ds, train_y_ds,
        batch_size = config.batch_size,
        validation_data=(val_x_ds, val_y_ds),
        epochs=config.epochs,
        callbacks=[WandbCallback()] #use callbacks to have w&b log stats; will automatically save best model                     
      )

# set dictionary with random search; optimizing val_loss--------------------------
sweep_config= {
    'method': 'random',
    'name': 'val_accuracy',
    'goal': 'maximize'
}

sweep_config['metric']= 'val_accuracy'
parameters_dict = {
    'epochs': {
       'distribution': 'int_uniform',
       'min': 20,
       'max': 100
    },
    # for build_dataset
     'batch_size': {
       'values': [32, 64, 96, 128]
    },
    'size_1': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'size_2': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_3': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_4': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_5': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'dense1': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'learning_rate':{
         #uniform distribution between 0 and 1
         'distribution': 'uniform', 
         'min': 0,
         'max': 0.1
     }
}

# append parameters to sweep config
sweep_config['parameters'] = parameters_dict 


# login to wandb----------------
wandb.init(project="Oscar CNN1", entity="p-ai")

# initialize sweep agent
sweep_id = wandb.sweep(sweep_config, project="Oscar CNN1", entity="p-ai")
wandb.agent(sweep_id, train, count=100)

Error in callback <function _WandbInit._resume_backend at 0x7f8030103e50> (for pre_run_cell):


BrokenPipeError: [Errno 32] Broken pipe





Create sweep with ID: imyyurrb
Sweep URL: https://wandb.ai/p-ai/Oscar%20CNN1/sweeps/imyyurrb


wandb: Waiting for W&B process to finish... (success).
wandb: Synced winter-sweep-3: https://wandb.ai/p-ai/Oscar%20CNN1/runs/vpohx0c3
wandb: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
wandb: Find logs at: ./wandb/run-20221120_080441-vpohx0c3/logs
[34m[1mwandb[0m: Agent Starting Run: 1gwgjtv8 with config:
[34m[1mwandb[0m: 	batch_size: 96
[34m[1mwandb[0m: 	dense1: 106
[34m[1mwandb[0m: 	epochs: 65
[34m[1mwandb[0m: 	learning_rate: 0.08858964939096996
[34m[1mwandb[0m: 	size_1: 91
[34m[1mwandb[0m: 	size_2: 134
[34m[1mwandb[0m: 	size_3: 224
[34m[1mwandb[0m: 	size_4: 126
[34m[1mwandb[0m: 	size_5: 104




VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: [32m[41mERROR[0m Run 1gwgjtv8 errored: ValueError('Data cardinality is ambiguous:\n  x sizes: 6\n  y sizes: 1447\nMake sure all arrays contain the same number of samples.')
[34m[1mwandb[0m: Agent Starting Run: ruzvtmzi with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense1: 115
[34m[1mwandb[0m: 	epochs: 91
[34m[1mwandb[0m: 	learning_rate: 0.027879085721748333
[34m[1mwandb[0m: 	size_1: 199
[34m[1mwandb[0m: 	size_2: 235
[34m[1mwandb[0m: 	size_3: 219
[34m[1mwandb[0m: 	size_4: 65
[34m[1mwandb[0m: 	size_5: 114


[34m[1mwandb[0m: [32m[41mERROR[0m Run ruzvtmzi errored: ValueError('Data cardinality is ambiguous:\n  x sizes: 6\n  y sizes: 1447\nMake sure all arrays contain the same number of samples.')
[34m[1mwandb[0m: Agent Starting Run: meoa0h9g with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense1: 213
[34m[1mwandb[0m: 	epochs: 99
[34m[1mwandb[0m: 	learning_rate: 0.047486949151492645
[34m[1mwandb[0m: 	size_1: 244
[34m[1mwandb[0m: 	size_2: 171
[34m[1mwandb[0m: 	size_3: 196
[34m[1mwandb[0m: 	size_4: 176
[34m[1mwandb[0m: 	size_5: 222


[34m[1mwandb[0m: [32m[41mERROR[0m Run meoa0h9g errored: ValueError('Data cardinality is ambiguous:\n  x sizes: 6\n  y sizes: 1447\nMake sure all arrays contain the same number of samples.')
[34m[1mwandb[0m: [32m[41mERROR[0m Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: To disable this check set WANDB_AGENT_DISABLE_FLAPPING=true


Error in callback <function _WandbInit._pause_backend at 0x7f80300b8dc0> (for post_run_cell):


BrokenPipeError: [Errno 32] Broken pipe