In [1]:
"""
This file should contain CNN-type neural networks that process sentences as frame+variable
Presently, it is implemented with keras
"""
import warnings # suppress some warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Input
from tensorflow.keras.layers import Conv1D, Conv2D, Dense, MaxPooling1D, MaxPooling2D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay, PiecewiseConstantDecay, PolynomialDecay, InverseTimeDecay
from tensorflow.keras.utils import to_categorical, plot_model

In [3]:
import numpy as np

In [4]:
import spacy
nlp = spacy.load("en_core_web_md")

In [5]:
# pretend_X = []
# pretend_sentences = ["I have one dream .", "The plane landed outside of the airport .", "testing testing"]
# pretend_max_sent_len = 10
# for s in pretend_sentences:
#     vecs = np.zeros((pretend_max_sent_len, 300))
#     tokens = nlp(s)
#     for i, t in enumerate(tokens):
#         vecs[i] = t.vector
#     pretend_X.append(vecs)
# pretend_X = np.array(pretend_X)

# pretend_Y = [3, 5, 0]
# pretend_Y = to_categorical(pretend_Y, num_classes=pretend_max_sent_len)

In [6]:
# TODO: can add attention? can use recurrent structure?
class textToVarNN:
    # can increase if computer has GPU (may want to check if GPU is in use by tensorflow)
    batch_size = 16 
    # at least 200 for actual training, use less epochs for tweaking when you can see patterns quickly
    epochs = 20
    max_sentence_len = 10 # EDITable value; make sure same for both networks
    word_vec_len = 300 # shape of spacy word vec

    def __init__(self):
        model = Sequential()
        
        # input indicates shape of X for a single entry
        model.add(Input(shape=(self.max_sentence_len, self.word_vec_len)))
        
        # the middle layers can be TWEAKed
        # to find a configuration that gives good results
        # candidate layers: Conv1D, Conv2D, Dense, Dropout, Flatten, MaxPooling1D, MaxPooling2D
        # candidate activations: relu, softmax, sigmoid, None
        # an advanced activation is also available as a layer: LeakyReLU()
        # Note: it is not usually useful to have more than 10 layers
        model.add(Dense(128, activation='relu'))
        model.add(Dense(48))
        
        # output has to be within sentence len, 
        # since training Y is going to be one-hot vectors 
        # indicating which one of the input words is the variable.
        # alternatively, look into the CategoryEncoding layer 
        model.add(Flatten())
        model.add(Dense(self.max_sentence_len, activation='softmax'))
        
        # the learning rate can be TWEAKed
        # see https://keras.io/api/optimizers/ 
        # see https://keras.io/api/optimizers/learning_rate_schedules/
        lr_schedule = ExponentialDecay(
            initial_learning_rate=1e-2,
            decay_steps=10000,
            decay_rate=0.9)
        optimizer = Adam(learning_rate=lr_schedule)
        
        model.compile(loss='categorical_crossentropy',
                     optimizer=optimizer)
        self.model = model

        # Note: if it runs very slow, or stalls the computer, 
        # the reason might be too many trainable parameters.
        # remember a batch of X, Y, and the parameters are all
        # held in memory when this is being trained
        print(self.model.summary())

    # takes 2 ndarrays as input
    def train(self, X, Y, batch_size=batch_size, epochs=epochs):
        # when it trains, the validation error should be trending down
        self.model.fit(X, Y, batch_size=batch_size, epochs=epochs, validation_split=0.2)

    # takes 1 ndarray as input
    def predict(self, X):            
        # return one from predictions using weighted chance
        pred_Y = self.model.predict(X)
        # picks a random index, using probabilities weighted by pred_Y[i]
        pred = [np.random.choice(range(len(x)), p=x) for x in pred_Y]
        return pred

In [None]:
# test = textToVarNN()
# test.train(pretend_X, pretend_Y, epochs=4)
# testpred = test.predict(pretend_X)

In [None]:
# plot_model(test.model, "temp.png", show_shapes=True)

In [None]:
# INCOMPLETE
class frameToVarNN:
    # can increase if computer has GPU (may want to check if GPU is in use by tensorflow)
    batch_size = 16 
    # at least 200 for actual training, use less epochs for tweaking when you can see patterns quickly
    epochs = 20
    max_sentence_len = 10 # EDITable value; make sure same for both networks
    word_vec_len = 300 # shape of spacy word vec
    
    def __init__(self):
        frame_input = Input(shape=(self.max_sentence_len, self.word_vec_len), name='frame')
        varindex_input = Input(shape=(self.max_sentence_len, ), name='varindex') # can use embedding layer here
        
        
    def train(self):
        # when it trains, the validation error should be trending down
        pass

    def predict(self):
        # return one from predictions using weighted chance
        pass