In [1]:
import mygrad as mg
import numpy as np
from mynn.layers.dense import dense
from mynn.initializers.glorot_normal import glorot_normal
from mynn.optimizers.adam import Adam
from mygrad.nnet.losses import softmax_crossentropy
from gensim.models.keyedvectors import KeyedVectors
from noggin import create_plot

In [2]:
params = np.load("model.npy", allow_pickle=True)

In [3]:
class RNN:  # The RNN class, which passes the data through a gated recurrent unit to convert each sentence into an array
    def __init__(self, dim_input, dim_recurrent, dim_output):
        """ Initializes all layers needed for RNN

        Parameters
        ----------
        dim_input: int
            Dimensionality of data passed to RNN (C)

        dim_recurrent: int
            Dimensionality of hidden state in RNN (D)

        dim_output: int
            Dimensionality of output of RNN (K)
        """

        self.fc_h2y = dense(dim_recurrent, dim_output, weight_initializer=glorot_normal)
        self.Uz = mg.Tensor(
            np.random.randn(dim_input * dim_recurrent).reshape(dim_input, dim_recurrent)
        )
        self.Wz = mg.Tensor(
            np.random.randn(dim_recurrent * dim_recurrent).reshape(
                dim_recurrent, dim_recurrent
            )
        )
        self.bz = mg.Tensor(np.random.randn(dim_recurrent))
        self.Ur = mg.Tensor(
            np.random.randn(dim_input * dim_recurrent).reshape(dim_input, dim_recurrent)
        )
        self.Wr = mg.Tensor(
            np.random.randn(dim_recurrent * dim_recurrent).reshape(
                dim_recurrent, dim_recurrent
            )
        )
        self.br = mg.Tensor(np.random.randn(dim_recurrent))
        self.Uh = mg.Tensor(
            np.random.randn(dim_input * dim_recurrent).reshape(dim_input, dim_recurrent)
        )
        self.Wh = mg.Tensor(
            np.random.randn(dim_recurrent * dim_recurrent).reshape(
                dim_recurrent, dim_recurrent
            )
        )
        self.bh = mg.Tensor(np.random.randn(dim_recurrent))

    def __call__(self, x):
        """ Performs the full forward pass for the RNN.

        Note that we only care about the last y - the final classification scores for the full sequence

        Parameters
        ----------
        x: Union[numpy.ndarray, mygrad.Tensor], shape=(T, C)
            The one-hot encodings for the sequence

        Returns
        -------
        mygrad.Tensor, shape=(1, K)
            The final classification of the sequence
        """

        h = mg.nnet.gru(
            x,
            self.Uz,
            self.Wz,
            self.bz,
            self.Ur,
            self.Wr,
            self.br,
            self.Uh,
            self.Wh,
            self.bh,
        )
        return self.fc_h2y(h[-1])

    @property
    def parameters(self):
        """ A convenience function for getting all the parameters of our model.

        This can be accessed as an attribute, via `model.parameters`

        Returns
        -------
        Tuple[Tensor, ...]
            A tuple containing all of the learnable parameters for our model
        """
        return self.fc_h2y.parameters + (
        self.Uz, self.Wz, self.bz, self.Ur, self.Wr, self.br, self.Uh, self.Wh, self.bh)


In [4]:
model = RNN(50,16,2)
model.fc_h2y.weight, model.fc_h2y.bias, model.Uz, model.Wz, model.bz, model.Ur, model.Wr, model.br, model.Uh, model.Wh, model.bh= (
        params[0],
        params[1],
        params[2],
        params[3],
        params[4],
        params[5],
        params[6],
        params[7],
        params[8],
        params[9],
        params[10]
    )

In [5]:
glove = KeyedVectors.load_word2vec_format("glove.6B.50d.txt.w2v", binary=False)

In [6]:
MAXLEN = 100
def to_glove(sentence):
    out = []
    for word in sentence.split():
        word = word.lower()
        try:
            out.append(glove[word])
        except:
            continue
    if len(out) > MAXLEN:
        out = out[:MAXLEN]
    elif len(out) < MAXLEN:
        for _ in range(len(out), MAXLEN):
            out.append(np.zeros(50))
    if len(out) != MAXLEN:
        print("BAAAAAAAAD")
    return out

In [21]:
Keys = ["UP", "DOWN"]

In [22]:
"""
Takes in a single sentence and runs inference to determine whether the stock value will increase or decrease
"""

def predict(sentence):
    sentence = to_glove(sentence)
    w = np.ascontiguousarray(np.swapaxes(np.array(sentence).reshape(1, 100, 50), 0, 1))
    pred = Keys[np.argmax(model(w))]
    print(pred)

In [23]:
"""
Takes in a list of sentences about a given stock and determines whether the value of the stock will increase or decrease depending on whether there are more positive results or more negative results
"""
def predict(multiple_sentences):
    good = 0
    bad = 0
    pred = 0
    for sentence in multiple_sentences:
        sentence = to_glove(sentence)
        w = np.ascontiguousarray(np.swapaxes(np.array(sentence).reshape(1, 100, 50), 0, 1))
        pred = np.argmax(model(w))
        if pred==1:
            good +=1
        else:
            bad += 1
    if good > bad:
        pred = Keys[1]
        print(pred, good/(good+bad)*100, "percent sure")
    else:
        pred = Keys[0]
        print(pred, bad/(good+bad)*100, "percent sure")
    

In [24]:
predict(["Elon Musk died today", "I like pie"])

DOWN 100.0 percent sure
