## File to implement nn with wandb
(note that we can't run wandb in terminal)

In [1]:
# file to load the nn

import pandas as pd
import numpy as np
import os
import tensorflow as tf
from keras import layers
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
import wandb
from wandb.keras import WandbCallback

# read in our data
DATA_DIR = '/home/oscar47/Desktop/astro101/data/g_band/var_output/'

# check if keras recognizes gpu
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

train_x_ds = np.load(os.path.join(DATA_DIR, 'train_x_ds.npy'))
val_x_ds = np.load(os.path.join(DATA_DIR, 'val_x_ds.npy'))
train_y_ds = np.load(os.path.join(DATA_DIR, 'train_y_ds.npy'))
val_y_ds = np.load(os.path.join(DATA_DIR, 'val_y_ds.npy'))

input_shape = train_x_ds[0].shape
output_len = len(train_y_ds[0])

# build model functions--------------------------------
def build_model(size1, size2, size3, size4, size5, dropout, learning_rate):
    model = Sequential()

    model.add(layers.Dense(size1))
    model.add(layers.Dense(size2))
    model.add(layers.Dense(size3))
    model.add(layers.Dense(size4))
    model.add(layers.Dense(size5))

    model.add(layers.Dropout(dropout))
    model.add(layers.Dense(output_len))

    # return len of class size
    model.add(layers.Dense(output_len))
    model.add(layers.Activation('softmax'))

    optimizer = Adam(learning_rate = learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy')

    return model


def train(config=None):
    with wandb.init(config=config):
    # If called by wandb.agent, as below,
    # this config will be set by Sweep Controller
      config = wandb.config

      #pprint.pprint(config)

      #initialize the neural net; 
      global model
      model = build_model(config.size_1,  config.size_2, config.size_3, 
              config.size_4, config.size_5, 
              config.dropout, config.learning_rate)
      
      #now run training
      history = model.fit(
        train_x_ds, train_y_ds,
        batch_size = config.batch_size,
        validation_data=(val_x_ds, val_y_ds),
        epochs=config.epochs,
        callbacks=[WandbCallback()] #use callbacks to have w&b log stats; will automatically save best model                     
      )

def train_manual():
    global model
    model = build_model(128, 128, 128, 
            128, 128, 
            .1, .001)
    
    #now run training
    history = model.fit(
    train_x_ds, train_y_ds,
    batch_size = 64,
    validation_data=(val_x_ds, val_y_ds),
    epochs=10
    )

# set dictionary with random search; optimizing val_loss--------------------------
sweep_config= {
    'method': 'random',
    'name': 'val_accuracy',
    'goal': 'maximize'
}

sweep_config['metric']= 'val_accuracy'

# now name hyperparameters with nested dictionary
# parameters_dict = {
#     'epochs': {
#        'distribution': 'int_uniform',
#        'min': 10,
#        'max': 20
#     },
#     # for build_dataset
#      'batch_size': {
#        'distribution': 'q_log_uniform',  #we want to specify a distribution type to more efficiently iterate through these hyperparams
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#     'size_1': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#     'size_2': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#      'size_3': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#      'size_4': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#      'size_5': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#     'dropout': {
#       'distribution': 'uniform',
#        'min': 0,
#        'max': 0.6
#     },
#     'learning_rate':{
#          #uniform distribution between 0 and 1
#          'distribution': 'uniform', 
#          'min': 0,
#          'max': 0.1
#      }
# }

parameters_dict = {
    'epochs': {
       'distribution': 'int_uniform',
       'min': 20,
       'max': 100
    },
    # for build_dataset
     'batch_size': {
       'values': [32, 64, 96, 128]
    },
    'size_1': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'size_2': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_3': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_4': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_5': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'dropout': {
      'distribution': 'uniform',
       'min': 0,
       'max': 0.6
    },
    'learning_rate':{
         #uniform distribution between 0 and 1
         'distribution': 'uniform', 
         'min': 0,
         'max': 0.1
     }
}

# append parameters to sweep config
sweep_config['parameters'] = parameters_dict 

# login to wandb----------------
wandb.init(project="Astro101_Project_v2", entity="oscarscholin")

# initialize sweep agent
sweep_id = wandb.sweep(sweep_config, project='Astro101_Project_v2', entity="oscarscholin")
wandb.agent(sweep_id, train, count=100)

#train_manual()

2022-11-18 04:45:40.620608: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-18 04:45:40.690566: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-11-18 04:45:40.692276: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-18 04:45:40.692282: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc

Num GPUs Available:  0


2022-11-18 04:45:41.565993: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-18 04:45:41.566456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-18 04:45:41.566478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-11-18 04:45:41.566493: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-11-18 04:45:41.566508: W tensorflow/c



Create sweep with ID: jp6ffgsr
Sweep URL: https://wandb.ai/oscarscholin/Astro101_Project_v2/sweeps/jp6ffgsr


wandb: Waiting for W&B process to finish... (success).
wandb: Synced chocolate-wind-1: https://wandb.ai/oscarscholin/Astro101_Project_v2/runs/2bk9pzob
wandb: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
wandb: Find logs at: ./wandb/run-20221118_044542-2bk9pzob/logs
[34m[1mwandb[0m: Agent Starting Run: o2j2quim with config:
[34m[1mwandb[0m: 	batch_size: 96
[34m[1mwandb[0m: 	dropout: 0.1886337325733656
[34m[1mwandb[0m: 	epochs: 41
[34m[1mwandb[0m: 	learning_rate: 0.04671489595440448
[34m[1mwandb[0m: 	size_1: 238
[34m[1mwandb[0m: 	size_2: 182
[34m[1mwandb[0m: 	size_3: 209
[34m[1mwandb[0m: 	size_4: 162
[34m[1mwandb[0m: 	size_5: 73


2022-11-18 04:45:59.605961: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/41


[34m[1mwandb[0m: Adding directory to artifact (/home/oscar47/Desktop/astro101/Astro101-Final-Project/nn_v0.0.1/wandb/run-20221118_044555-o2j2quim/files/model-best)... Done. 0.0s


Epoch 2/41


[34m[1mwandb[0m: Adding directory to artifact (/home/oscar47/Desktop/astro101/Astro101-Final-Project/nn_v0.0.1/wandb/run-20221118_044555-o2j2quim/files/model-best)... Done. 0.0s


Epoch 3/41
Epoch 4/41
Epoch 5/41
Epoch 6/41
Epoch 7/41
Epoch 8/41
Epoch 9/41
Epoch 10/41
Epoch 11/41
Epoch 12/41
Epoch 13/41
Epoch 14/41
Epoch 15/41
Epoch 16/41
Epoch 17/41
Epoch 18/41
Epoch 19/41
Epoch 20/41
Epoch 21/41
Epoch 22/41
Epoch 23/41
Epoch 24/41