In [1]:
# file to train network
# @oscars47

import os
import numpy as np
from keras.callbacks import LambdaCallback, ModelCheckpoint, ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, Activation
from keras.optimizers import RMSprop
import tensorflow as tf
import wandb
from wandb.keras import *

# check GPU num
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

from dataprep2 import TextData # import TextData class for processing
from modelpredict2 import * # get functions to interpret output

# define path
MAIN_DIR = '/home/oscar47/Desktop/thinking_parrot'
DATA_DIR = os.path.join(MAIN_DIR, 'texts_prep') # main
#DATA_DIR = os.path.join(MAIN_DIR, 'texts_prep', 'test') # for testing

# define master txt file
MASTER_TEXT_PATH = os.path.join(MAIN_DIR, 'texts', 'master.txt')
#MASTER_TEXT_PATH = os.path.join(MAIN_DIR, 'texts', 'toaster_man.txt')

# initialize text object
maxChar = 100
master=TextData(MASTER_TEXT_PATH, maxChar)
# get alphabet
alphabet = master.alphabet
char_to_int= master.char_to_int
int_to_char = master.int_to_char
text = master.text

# read in files for training
x_train = np.load(os.path.join(DATA_DIR, 'x_train.npy'))
y_train = np.load(os.path.join(DATA_DIR, 'y_train.npy'))
x_val = np.load(os.path.join(DATA_DIR, 'x_val.npy'))
y_val = np.load(os.path.join(DATA_DIR, 'y_val.npy'))

# build model functions--------------------------------
def build_model(LSTM_layer_size_1,  LSTM_layer_size_2, LSTM_layer_size_3, 
          LSTM_layer_size_4, LSTM_layer_size_5, 
          dropout, learning_rate):
    # call initialize function
    
    model = Sequential()
    # RNN layers for language processing
    model.add(LSTM(LSTM_layer_size_1, input_shape = (2*maxChar, len(alphabet)), return_sequences=True))
    model.add(LSTM(LSTM_layer_size_2, return_sequences=True))
    model.add(LSTM(LSTM_layer_size_3, return_sequences=True))
    model.add(LSTM(LSTM_layer_size_4, return_sequences=True))
    model.add(LSTM(LSTM_layer_size_5))

    model.add(Dropout(dropout))

    model.add(Dense(len(alphabet)))
    model.add(Activation('softmax'))


    # put structure together
    optimizer = RMSprop(learning_rate = learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy')

    return model

def train(config=None):
    with wandb.init(config=config):
    # If called by wandb.agent, as below,
    # this config will be set by Sweep Controller
      config = wandb.config

      #pprint.pprint(config)

      #initialize the neural net; 
      global model
      model = build_model(config.LSTM_layer_size_1,  config.LSTM_layer_size_2, config.LSTM_layer_size_3, 
              config.LSTM_layer_size_4, config.LSTM_layer_size_5, 
              config.dropout, config.learning_rate)
      
      #now run training
      history = model.fit(
        x_train, y_train,
        batch_size = config.batch_size,
        validation_data=(x_val, y_val),
        epochs=config.epochs,
        callbacks=callbacks #use callbacks to have w&b log stats; will automatically save best model                     
      ) 

# helper functions from Keras

# do this each time we begin a new epoch    
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = np.random.randint(1, len(text) - maxChar - 1)
    # need to check how much to pad
    if start_index < maxChar:
        sentence0 = text[0:start_index]
        sentence1 = text[start_index+1: start_index+start_index]
        sentence = sentence0+sentence1
    else:
        stdev = (1/2)*(maxChar - 1)
        mean = (maxChar - 1)
        toast_len = int(np.random.normal(mean, stdev)) # get normalized-skewed toast length
        sentence0 = text[start_index-toast_len:start_index]
        sentence1 = text[start_index+1: start_index+toast_len]
        sentence =  sentence0+ sentence1
    

    # 1. compute difference from maxChar and len/2
    diff = maxChar - int(len(sentence)/2)
    # 2. initialize new string for each sentence
    complete_sentence = ''
    for i in range(diff):
        complete_sentence+='£' # appending forbidden
    # 3. now add 'real' sentence
    complete_sentence+=sentence
    # 4. append forbidden again
    for i in range(diff):
        complete_sentence+='£'


    for diversity in [0.1, 0.5,1.2]:
        print('----- diversity:', diversity)

        generated = ''
        #generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        #sys.stdout.write(generated)

        # generate 400 characters worth of test
        for i in range(400):
            # prepare chosen sentence as part of new dataset
            x_pred = np.zeros((1, 2*maxChar, len(alphabet)))
            for t, char in enumerate(sentence):
                if char != '£': # encode 1 iff it's not padded
                    x_pred[0, t, char_to_int[char]] = 1.

            # use the current model to predict what outputs are
            preds = model.predict(x_pred, verbose=0)[0]
            # call the function above to interpret the probabilities and add a degree of freedom
            next_index = sample(preds, diversity)
            #convert predicted number to character
            next_char = int_to_char[next_index]

            generated+=next_char

            # check size of sentence; if still small can keep old stuff in sentence0
            if len(sentence) >= 2*maxChar:
                sentence0 = sentence0[1:]
            sentence0 += next_char # append new middle character
            sentence=sentence0+sentence1 # append to main sentence

            # print the new character as we create it
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

# define search parameters-----------------
# holds wandb config nested dictionaries
# @oscars47

# set dictionary with random search; optimizing val_loss
sweep_config= {
    'method': 'random',
    'name': 'val_loss',
    'goal': 'minimize'
}

sweep_config['metric']= 'val_loss'

# now name hyperparameters with nested dictionary
parameters_dict = {
    'epochs': {
       'value':5
    },
    # for build_dataset
     'batch_size': {
       'distribution': 'int_uniform',  #we want to specify a distribution type to more efficiently iterate through these hyperparams
       'min': 64,
       'max': 128
    },
    'LSTM_layer_size_1': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'LSTM_layer_size_2': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'LSTM_layer_size_3': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'LSTM_layer_size_4': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
    'LSTM_layer_size_5': {
       'distribution': 'int_uniform',
       'min': 64,
       'max': 256
    },
     'dropout': {
             'distribution': 'uniform',
       'min': 0,
       'max': 0.6
    },
    'learning_rate':{
         #uniform distribution between 0 and 1
         'distribution': 'uniform', 
         'min': 0,
         'max': 0.1
     }
}

# append parameters to sweep config
sweep_config['parameters'] = parameters_dict

# login to wandb-------------------------
wandb.init(project="Thinking-Parrot2.0", entity="oscarscholin")

# finish with callbacks------------
# use the two helper functions above to create the LambdaCallback 
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

# define two other callbacks
# save model
# if no directory "models" exists, create it
if not(os.path.exists('models')):
    os.mkdir('./models/')
modelpath = "models/shakespeare_v0.0.1.hdf5"
checkpoint = ModelCheckpoint(modelpath, monitor='loss',
                             verbose=1, save_best_only=True,
                             mode='min')
# if learning stals, reduce the LR
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=1, min_lr=0.001)

# compile the callbacks
callbacks = [print_callback, checkpoint, reduce_lr, WandbCallback()]

# initialize sweep!

sweep_id = wandb.sweep(sweep_config, project="Thinking-Parrot2.0", entity="oscarscholin")

# 'train' tells agent function is train
# 'count': number of times to run this
wandb.agent(sweep_id, train, count=100)

2022-12-10 04:16:45.830716: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-10 04:16:45.939366: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-12-10 04:16:45.942086: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-10 04:16:45.942098: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc

Num GPUs Available:  0


2022-12-10 04:16:47.089600: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-10 04:16:47.090160: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-10 04:16:47.090182: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-12-10 04:16:47.090198: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-12-10 04:16:47.090215: W tensorflow/c

[' ', '(', ')', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'Y', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '‘', '’']


[34m[1mwandb[0m: Currently logged in as: [33moscarscholin[0m. Use [1m`wandb login --relogin`[0m to force relogin




Create sweep with ID: 69u12s4r
Sweep URL: https://wandb.ai/oscarscholin/Thinking-Parrot2.0/sweeps/69u12s4r


wandb: Waiting for W&B process to finish... (success).
wandb: Synced solar-wind-9: https://wandb.ai/oscarscholin/Thinking-Parrot2.0/runs/2640hjoe
wandb: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
wandb: Find logs at: ./wandb/run-20221210_041648-2640hjoe/logs
[34m[1mwandb[0m: Agent Starting Run: byrutwea with config:
[34m[1mwandb[0m: 	LSTM_layer_size_1: 206
[34m[1mwandb[0m: 	LSTM_layer_size_2: 64
[34m[1mwandb[0m: 	LSTM_layer_size_3: 83
[34m[1mwandb[0m: 	LSTM_layer_size_4: 218
[34m[1mwandb[0m: 	LSTM_layer_size_5: 72
[34m[1mwandb[0m: 	batch_size: 116
[34m[1mwandb[0m: 	dropout: 0.2596728364930626
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.09468993032858795


2022-12-10 04:17:00.426946: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/5
----- Generating text after Epoch: 0
----- diversity: 0.1
----- Generating with seed: "eheat function.DEFROST FUNCTION (Refe to Pg 5)When using bread straight f"
  g oi     cn neneanie nn  nbe ine  ueneeine y  edeee e eGo nia  e rceonbe gneee en  u e oe  edeh  er go. hee th oe   e cpn     t rN   r nn  uni N e  be  iep  n e  ta eecden  i th ne nshe     t    eiiif eeie eop  tn  ee e u   e     nern n rnee ee    ngn     i   e   in  a een  n  eeee   h eic  ci c nr  newleen  enci  hf hn.h   u e e   he  tn  ibeesi    ctht  e oNune ieneee   h n1enR .ee e    nb  no
----- diversity: 0.2
----- Generating with seed: " n rnee ee    ngn     i   e   in  a een  n  eeee   h eic  ci c nr  newleen  enci  hf hn.h   u e e   he  tn  ibeesi    ctht  e oNune ieneee   h n1enR .ee e    nb  no to Pg 5)When using bread straight f"
 i     o   e enehoe p eene e necee eeeete eue in   d  i h    eeeeoiee e  n b  ng eeo enc  ie  c h ee l ep eia  eiiei  dcee   e e    eee huc  ine  o.escr  i s oe p    n e    p 

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Error in callback <function _WandbInit._pause_backend at 0x7ff61458ae50> (for post_run_cell):


BrokenPipeError: [Errno 32] Broken pipe

re t oc.ri e.o n    e  h h n  e d i   en oee  e   l e  ce c.b   ee nD.eono  d e ee ciec ib oee e  he ce.  iie e be.ei  n e  c h  eceo   c ennii  n i  euoie eye    ea i    e r eeene db nee e  l e. ie e d  c  a eeeee e  eod eseee     eo re   e n na  ce  e oe ee e ne nouee n n    ncco eo pnn  c  eo ao n  en hh.c no   e eR o ec niboe   eug.
----- diversity: 1.5
----- Generating with seed: " eeene db nee e  l e. ie e d  c  a eeeee e  eod eseee     eo re   e n na  ce  e oe ee e ne nouee n n    ncco eo pnn  c  eo ao n  en hh.c no   e eR o ec niboe   eug. to Pg 5)When using bread straight f"
e   e ee isE   eC  n el e.eo ie eee     re.  h  e   ntbe.nee  eneeecl ie   ee ee e renene   ui i eeeennn  eae ee ner ic penen  e ei e s  nceeo ne   oo eeo oee c yee ce t coh  o     e enso cn n  eeene  ehcc e neeeehn    o ieetiedn eec eseeeee e e ee hde n es nan  Io iE eee b  ee oe enh ee  a  eet cee eeo eee n ene en i     e n  c   O n  e .ne o     o nirn   anetcebcc  l ne  nr see  es e oy eenee ia
----- di



INFO:tensorflow:Assets written to: /home/oscar47/Desktop/thinking_parrot/Literary-RNN/model_v0.1.0/wandb/run-20221210_041648-2640hjoe/files/model-best/assets


INFO:tensorflow:Assets written to: /home/oscar47/Desktop/thinking_parrot/Literary-RNN/model_v0.1.0/wandb/run-20221210_041648-2640hjoe/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/home/oscar47/Desktop/thinking_parrot/Literary-RNN/model_v0.1.0/wandb/run-20221210_041648-2640hjoe/files/model-best)... Done. 0.0s


Epoch 2/5
  8/126 [>.............................] - ETA: 58s - loss: 3.2324