## File to implement nn with wandb
(note that we can't run wandb in terminal)

In [2]:
# file to load the nn

import pandas as pd
import numpy as np
import os
import tensorflow as tf
from keras import layers
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
import wandb
from wandb.keras import WandbCallback

# read in our data
DATA_DIR = '/home/oscar47/Desktop/astro101/data/g_band/var_output/v0.1.1'

# check if keras recognizes gpu
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

train_x_ds = np.load(os.path.join(DATA_DIR, 'train_x_ds.npy'))
val_x_ds = np.load(os.path.join(DATA_DIR, 'val_x_ds.npy'))
train_y_ds = np.load(os.path.join(DATA_DIR, 'train_y_ds.npy'))
val_y_ds = np.load(os.path.join(DATA_DIR, 'val_y_ds.npy'))

input_shape = train_x_ds[0].shape
output_len = len(train_y_ds[0])

# build model functions--------------------------------
def build_model(size1, size2, size3, size4, size5, dropout, learning_rate):
    model = Sequential()

    model.add(layers.Dense(size1))
    model.add(layers.Dense(size2))
    model.add(layers.Dense(size3))
    model.add(layers.Dense(size4))
    model.add(layers.Dense(size5))

    model.add(layers.Dropout(dropout))
    model.add(layers.Dense(output_len))

    # return len of class size
    model.add(layers.Dense(output_len))
    model.add(layers.Activation('softmax'))

    optimizer = Adam(learning_rate = learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy')

    return model


def train(config=None):
    with wandb.init(config=config):
    # If called by wandb.agent, as below,
    # this config will be set by Sweep Controller
      config = wandb.config

      #pprint.pprint(config)

      #initialize the neural net; 
      global model
      model = build_model(config.size_1,  config.size_2, config.size_3, 
              config.size_4, config.size_5, 
              config.dropout, config.learning_rate)
      
      #now run training
      history = model.fit(
        train_x_ds, train_y_ds,
        batch_size = config.batch_size,
        validation_data=(val_x_ds, val_y_ds),
        epochs=config.epochs,
        callbacks=[WandbCallback()] #use callbacks to have w&b log stats; will automatically save best model                     
      )

def train_manual():
    global model
    model = build_model(128, 128, 128, 
            128, 128, 
            .1, .001)
    
    #now run training
    history = model.fit(
    train_x_ds, train_y_ds,
    batch_size = 64,
    validation_data=(val_x_ds, val_y_ds),
    epochs=10
    )

# set dictionary with random search; optimizing val_loss--------------------------
sweep_config= {
    'method': 'random',
    'name': 'val_accuracy',
    'goal': 'maximize'
}

sweep_config['metric']= 'val_accuracy'

# now name hyperparameters with nested dictionary
# parameters_dict = {
#     'epochs': {
#        'distribution': 'int_uniform',
#        'min': 10,
#        'max': 20
#     },
#     # for build_dataset
#      'batch_size': {
#        'distribution': 'q_log_uniform',  #we want to specify a distribution type to more efficiently iterate through these hyperparams
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#     'size_1': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#     'size_2': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#      'size_3': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#      'size_4': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#      'size_5': {
#        'distribution': 'q_log_uniform',
#        'q': 8,
#        'min': np.log(64),
#        'max': np.log(256)
#     },
#     'dropout': {
#       'distribution': 'uniform',
#        'min': 0,
#        'max': 0.6
#     },
#     'learning_rate':{
#          #uniform distribution between 0 and 1
#          'distribution': 'uniform', 
#          'min': 0,
#          'max': 0.1
#      }
# }

parameters_dict = {
    'epochs': {
       'distribution': 'int_uniform',
       'min': 20,
       'max': 100
    },
    # for build_dataset
     'batch_size': {
       'values': [x for x in range(32, 161, 32)]
    },
    'size_1': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'size_2': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_3': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_4': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },'size_5': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'dropout': {
      'distribution': 'uniform',
       'min': 0,
       'max': 0.6
    },
    'learning_rate':{
         #uniform distribution between 0 and 1
         'distribution': 'uniform', 
         'min': 0,
         'max': 0.1
     }
}

# append parameters to sweep config
sweep_config['parameters'] = parameters_dict 

# login to wandb----------------
wandb.init(project="Astro101_Project_NewData2", entity="oscarscholin")

# initialize sweep agent
sweep_id = wandb.sweep(sweep_config, project='Astro101_Project_NewData2', entity="oscarscholin")
wandb.agent(sweep_id, train, count=100)

#train_manual()

Error in callback <function _WandbInit._resume_backend at 0x7f48925ed4c0> (for pre_run_cell):


BrokenPipeError: [Errno 32] Broken pipe

Num GPUs Available:  0



Epoch 4/78

0,1
epoch,▁▅█

0,1
epoch,2.0
loss,
val_loss,




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016671129333311305, max=1.0…



Thread HandlerThread:
Traceback (most recent call last):
  File "/home/oscar47/anaconda3/lib/python3.9/site-packages/wandb/sdk/internal/internal_util.py", line 50, in run
    self._run()
  File "/home/oscar47/anaconda3/lib/python3.9/site-packages/wandb/sdk/internal/internal_util.py", line 101, in _run
    self._process(record)
  File "/home/oscar47/anaconda3/lib/python3.9/site-packages/wandb/sdk/internal/internal.py", line 263, in _process
    self._hm.handle(record)
  File "/home/oscar47/anaconda3/lib/python3.9/site-packages/wandb/sdk/internal/handler.py", line 131, in handle
    handler(record)
  File "/home/oscar47/anaconda3/lib/python3.9/site-packages/wandb/sdk/internal/handler.py", line 141, in handle_request
    handler(record)
  File "/home/oscar47/anaconda3/lib/python3.9/site-packages/wandb/sdk/internal/handler.py", line 674, in handle_request_run_start
    self._system_monitor.probe(publish=True)
  File "/home/oscar47/anaconda3/lib/python3.9/site-packages/wandb/sdk/internal/sy

Create sweep with ID: zbiih4kd
Sweep URL: https://wandb.ai/oscarscholin/Astro101_Project_NewData2/sweeps/zbiih4kd


wandb: ERROR Internal wandb error: file data was not synced
[34m[1mwandb[0m: Agent Starting Run: tjrejhx5 with config:
[34m[1mwandb[0m: 	batch_size: 96
[34m[1mwandb[0m: 	dropout: 0.3577125646502209
[34m[1mwandb[0m: 	epochs: 42
[34m[1mwandb[0m: 	learning_rate: 0.05445772284529308
[34m[1mwandb[0m: 	size_1: 81
[34m[1mwandb[0m: 	size_2: 154
[34m[1mwandb[0m: 	size_3: 232
[34m[1mwandb[0m: 	size_4: 196
[34m[1mwandb[0m: 	size_5: 102


Epoch 1/42
Epoch 2/42
Epoch 3/42
Epoch 4/42
Epoch 5/42
Epoch 6/42
Epoch 7/42
Epoch 8/42

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Error in callback <function _WandbInit._pause_backend at 0x7f48925ed790> (for post_run_cell):


BrokenPipeError: [Errno 32] Broken pipe

In [3]:
import numpy as np
DATA_DIR = '/home/oscar47/Desktop/astro101/data/g_band/var_output/v0.1.1'
train_x_ds = np.load(os.path.join(DATA_DIR, 'train_x_ds.npy'))
print(train_x_ds.shape)
print(train_x_ds)

Error in callback <function _WandbInit._resume_backend at 0x7f48925ed4c0> (for pre_run_cell):


BrokenPipeError: [Errno 32] Broken pipe

(151544, 36)
[[1.12026576e-03 9.52918279e-03 3.82409288e-08 ... 6.53834384e-04
  7.32369861e-07 3.78055018e-03]
 [5.85132640e-03 1.16994827e-02 0.00000000e+00 ... 2.67029609e-03
  8.47311384e-04 3.72233503e-02]
 [3.61437134e-04 1.60933772e-03 9.99933027e-01 ... 9.54538204e-04
  2.53752681e-06 2.45542082e-02]
 ...
 [1.38252012e-04 7.26818112e-03 6.71068139e-15 ... 5.61618673e-04
  9.13362445e-07 2.40955313e-01]
 [3.95040514e-04 1.18670953e-03 9.99972548e-01 ... 1.25218747e-03
  8.03309459e-07 4.20482068e-02]
 [3.90194461e-04 3.52843239e-03 9.33968167e-01 ... 6.93921701e-04
  3.22696000e-07 1.40796733e-02]]
Error in callback <function _WandbInit._pause_backend at 0x7f48925ed790> (for post_run_cell):


BrokenPipeError: [Errno 32] Broken pipe

In [10]:
import pandas as pd
mm_targ = pd.read_csv(os.path.join(DATA_DIR,'mm_2_n_targ.csv'))
print(mm_targ.shape)

(378861, 39)
