In [7]:
from sklearn.ensemble import RandomForestClassifier
from raise_utils.learners.learner import Learner
from keras import Sequential
from keras.layers import Dense, LSTM, Embedding, SpatialDropout1D
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer

import numpy as np


In [None]:
class RandomForest(Learner):
    """Random forest classifier"""
    def __init__(self, weighted=False, *args, **kwargs):
        """Initializes the classifier."""
        super(RandomForest, self).__init__(*args, **kwargs)

        if weighted:
            self.learner = RandomForestClassifier(class_weight="balanced")
        else:
            self.learner = RandomForestClassifier()
        self.random_map = {
            "criterion": ["gini", "entropy"],
            "n_estimators": (10, 100)
        }
        self._instantiate_random_vals()

In [8]:
class BILSTM(Learner):
    def __init__(self, epochs=10, max_words=1000, embedding=5,n_layers=1, *args, **kwargs):
        """
        Initializes the BILSTM Classifier.
        :param epochs: Number of epochs to train for
        :param max_words: Maximum number of top words to consider
        :param embedding: Embedding dimensionality
        :param n_layers: Number of LSTM layers
        :param args: Args passed to Learner
        :param kwargs: Keyword args passed to Learner
        """
        super(BILSTM, self).__init__(*args, **kwargs)
        self.epochs = epochs
        self.max_words = max_words
        self.embed_dim = embedding
        self.n_layers = n_layers
        # This is where we set our random attributes
        self.random_map = {
            "max_words": (500, 5000),
            "n_layers": (1, 4)
        }
        self.learner = self
        self._instantiate_random_vals()

    def fit(self):
        self._check_data()
        model = Sequential()
        model.add(Embedding(self.max_words, self.embed_dim,
                            input_length=self.x_train.shape[1]))
        model.add(SpatialDropout1D(0.2))
        for _ in range(self.n_layers):
            model.add(Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam')

        self.learner = model

        if self.hooks is not None:
            if self.hooks.get('pre_train', None):
                for hook in self.hooks['pre_train']:
                    hook.call(self)

        model.fit(self.x_train, self.y_train,
                  batch_size=64, epochs=self.epochs)

        if self.hooks is not None:
            if self.hooks.get('post_train', None):
                for hook in self.hooks['post_train']:
                    hook.call(model)

    def predict_on_test(self) -> np.ndarray:
        """
        Makes predictions
        :param x_test: Test data
        :return: np.ndarray
        """
        return self.learner.predict_classes(self.x_test)

    def predict(self, x_test):
        """
        Overrides parent method, ignoring argument passed.
        :param x_test: Ignored.
        :return: Array of preds.
        """
        warnings.warn("predict() should not be used with TextDeepLearner. Instead, use predict_on_test" +
                      ". The argument is ignored")
        return self.predict_on_test()