In [None]:
import wandb
from wandb.keras import WandbCallback
#connect with weights and biases api
import numpy as np
import pandas as pd

# all lightfm imports 
from lightfm.data import Dataset
from lightfm import LightFM
from lightfm import cross_validation
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
# using a customer history based and astrologer categorical segmentation based hybrid model

# imports re for text cleaning 
import re
from datetime import datetime, timedelta

# we will ignore pandas warning 
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
from tensorflow.keras.models import load_model

In [None]:
x = np.zeros((0, 0))
input_path = "/numpy_data.pkl"
interp_files=[]
for dir in os.listdir(input_path):
    for file in os.listdir(input_path + '/' + dir):
        if file.endswith(".npy"):
            interp_files.append(input_path + '/' + dir + '/' + file)

for file in interp_files:
    temp = np.load(file)
    # temp = temp.reshape((4800,11,1))
    x = np.append(x, temp, axis=0)

x = x[4800:, :]
print(x.shape)
x_vals = x[:, :2]
y_vals = x[:, 2:]
x_vals = x_vals.reshape((38400, 2, 1))
y_vals = y_vals.reshape((38400, 9, 1))
print(x_vals.shape + y_vals.shape)
# Splitting data into train, test and validation data
n = 38400
x_train = x_vals[0:int(0.7*n), :, :]
y_train = y_vals[0:int(0.7*n), :, :]
x_test = x_vals[int(0.7*n):, :, :]
y_test = y_vals[int(0.7*n):, :, :]

In [None]:
wandb.login()

def data_load():
    x = np.zeros((4800,11))
    input_path = "content/numpy_data"
    interp_files=[]
    for dir in os.listdir(input_path):
        for file in os.listdir(input_path + '/' + dir):
            if file.endswith(".npy"):
                interp_files.append(input_path + '/' + dir + '/' + file)

    for file in interp_files:
        temp = np.load(file)
        # temp = temp.reshape((4800,11,1))
        x = np.append(x, temp, axis=0)

    x = x[4800:, :]
    x_vals = x[:, :9]
    y_vals = x[:, 9:]
    x_vals = x_vals.reshape((38400, 2, 1))
    y_vals = y_vals.reshape((38400, 9, 1))
    print(x_vals.shape + y_vals.shape)

    n = 38400
    x_train = x_vals[0:int(0.7*n), :, :]
    y_train = y_vals[0:int(0.7*n), :, :]
    x_test = x_vals[int(0.7*n):, :, :]
    y_test = y_vals[int(0.7*n):, :, :]
    return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = data_load()

sweep_config = {
  'method': 'bayes', 
  'metric': {
      'name': 'val_loss',
      'goal': 'minimize'
  },
  'early_terminate':{
      'type': 'hyperband',
      'min_iter': 5
  },
  'parameters': {
      'batch_size': {
          'values': [32]
      },
      'learning_rate':{
          'values': [0.0001]
      },
      'neurons':{
          'values': [32, 64]
      },
      'activation':{
          'values': ['tanh', 'relu']
      }
  }
}

def get_compiled_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(wandb.config.neurons, activation=wandb.config.activation, input_shape=(2, 1)))
    model.add(tf.keras.layers.RepeatVector(9))
    model.add(tf.keras.layers.LSTM(wandb.config.neurons, activation=wandb.config.activation, return_sequences=True))
    model.add(tf.keras.layers.LSTM(wandb.config.neurons, activation=wandb.config.activation, return_sequences=True))
    model.add(tf.keras.layers.LSTM(wandb.config.neurons, activation=wandb.config.activation, return_sequences=True))
    model.add(tf.keras.layers.LSTM(wandb.config.neurons, activation=wandb.config.activation, return_sequences=True))
    model.add(tf.keras.layers.LSTM(wandb.config.neurons, activation=wandb.config.activation, return_sequences=True))
    model.add(tf.keras.layers.LSTM(wandb.config.neurons, activation=wandb.config.activation, return_sequences=True))
    model.add(tf.keras.layers.LSTM(wandb.config.neurons, activation=wandb.config.activation, return_sequences=True))
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)))
    # model.compile(optimizer='adam', loss='mse')
    return model

def train():
    # Specify the hyperparameter to be tuned along with
    # an initial value
    config_defaults = {
        'batch_size': 32,
        'learning_rate': 0.0001,
        'neurons': 32,
        'activation': 'tanh'
    }

    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults)

    # Specify the other hyperparameters to the configuration, if any
    wandb.config.epochs = 300

    # Prepare trainloader
    trainloader = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    trainloader = trainloader.shuffle(1024).batch(wandb.config.batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    # prepare testloader 
    testloader = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    testloader = testloader.batch(wandb.config.batch_size).prefetch(tf.data.experimental.AUTOTUNE)

    # Iniialize model with hyperparameters
    keras.backend.clear_session()
    model = get_compiled_model()
    
    # Compile the model
    opt = tf.keras.optimizers.Adam(learning_rate=wandb.config.learning_rate) # optimizer with different learning rate specified by config
    model.compile(opt, metrics=['acc'], loss='mse')
    
    # Train the model
    _ = model.fit(trainloader,
                  epochs=wandb.config.epochs, 
                  validation_data=testloader,
                  callbacks=[WandbCallback()]) # WandbCallback to automatically track metrics
                            
    # Evaluate    
    loss, accuracy = model.evaluate(testloader, callbacks=[WandbCallback()])
    print('Test Error Rate: ', round((1-accuracy)*100, 2))
    wandb.log({'Test Error Rate': round((1-accuracy)*100, 2)}) # wandb.log to track custom metrics

sweep_id = wandb.sweep(sweep_config, project="ddp-second_run", entity="ddp_profpatra")
wandb.agent(sweep_id, function=train)

In [None]:
plt.figure()
plt.rcParams.update({'font.size': 12})
plt.plot(df['Step'], df['swept-sweep-5 - val_loss'], label='tanh - 64 neurons')
plt.plot(df['Step'], df['divine-sweep-3 - val_loss'], label='relu - 64 neurons')
plt.plot(df['Step'], df['twilight-sweep-2 - val_loss'], label='tanh - 32 neurons')
plt.plot(df['Step'], df['glad-sweep-1 - val_loss'], label='relu - 32 neurons')
# plt.plot(df['Step'], df['vague-sweep-1 - val_loss'], '--', label='Upto 8% strain as input')
plt.ylabel('MSE')
plt.xlabel('No. of epochs')
# plt.scatter(eff_strain, vonmises, label='LSTM Predictions')
plt.legend()
plt.savefig('300_epoch_train.png', dpi=700, bbox_inches="tight")
plt.show()