# ULM baseline experiments

My first attempts at using tensorflow

In [16]:
import tensorflow as tf
from sys import stdin
from collections import defaultdict
from scipy.io import mmread
from sklearn.model_selection import train_test_split
import numpy as np
import re
import datetime as dt
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from os import path

In [34]:
data_path = '/mnt/store/hlt/Language/Hungarian/Crawl/Web2/ana/xaa.tagged'

In [35]:
class ResearchDiary:
    diary_file = "baseline_experiment_results.tsv"
        
    @staticmethod
    def load_or_create_dataframe():
        if path.exists(ResearchDiary.diary_file):
            result_diary = pd.read_table(ResearchDiary.diary_file)
        else:
            result_diary = pd.DataFrame(columns=[
                                            'train_accuracy',
                                            'test_accuracy',
                                            'timestamp',
                                            'data_path',
                                            'theoretical_max',
                                            'architecture',
                                            'min_sample_per_class',
                                            'max_sample_per_class',
                                            'sample_count',
                                            'feature_count',
                                            'max_lines',
                                            'N',
                                            'last_char',
                                            'full_tag',
                                            'tag_filter',
                                            'include_smaller_ngrams',
                                            'use_padding',
                                            'epochs',
                                            'layers',
                                            'activation',
                                            'batch_size',
                                            'optimizer',
                                            'optimizer_kwargs',
                                            'gpu_memory_fracion',
                                            'running_time'
                                        ])
        return result_diary
        
    def __init__(self):
        self.df = ResearchDiary.load_or_create_dataframe()
        
    def __enter__(self):
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.df.to_csv(ResearchDiary.diary_file, sep='\t', index=False)
    
    def add_experiment(self, data, create_cols_if_new=True):
        new_index = len(self.df)
        if create_cols_if_new:
            for new_col in set(data.keys()) - set(self.df.columns):
                self.df[new_col] = None
        else:
            missing_cols = set(data.keys()) - set(self.df.columns)
            for c in missing_cols:
                del data[c]
        self.df.loc[new_index] = data

In [36]:
import re
from collections import defaultdict
from sklearn.feature_extraction import DictVectorizer


class Featurizer:
    
    class InvalidTag(Exception):
        pass
    
    tag_re = re.compile(r'^[A-Z]+')
    
    def __init__(self, last_char, N, full_tag=False, use_padding=True, use_word_as_feature=False, tag_filter=None,
                 include_smaller_ngrams=False, **kwargs):
        self.last_char = last_char
        self.N = N
        self.full_tag = full_tag
        self.use_padding = use_padding
        self.use_word_as_feature = use_word_as_feature
        self.include_smaller_ngrams = include_smaller_ngrams
        if tag_filter is not None:
            self.tag_filter = set(tag_filter)
        else:
            self.tag_filter = None
        self.raw_input = []
        
    def featurize(self, data_path, encoding='utf-8', min_sample_per_class=0, max_sample_per_class=0, max_lines=0,
                 **kwargs):
        X = []
        y = []
        self.min_sample_per_class = min_sample_per_class
        self.max_sample_per_class = max_sample_per_class
        self.max_lines = max_lines
        sample_cnt = defaultdict(int)
        line_cnt = 0
        with open(data_path, encoding=encoding) as f:
            for line in f:
                line_cnt += 1
                if max_lines > 0 and line_cnt > max_lines:
                    break
                try:
                    word, tag = self.extract_word_and_tag(line)
                except Featurizer.InvalidTag:
                    continue
                if not word.strip() or not tag.strip():
                    continue
                if self.tag_filter is not None and tag not in self.tag_filter:
                    continue
                if max_sample_per_class > 0 and sample_cnt[tag] >= max_sample_per_class:
                    continue
                if all(v >= max_sample_per_class for v in sample_cnt.values()) and len(sample_cnt) > 1:
                    break
                self.__featurize_and_store_sample(word, tag, X, y)
                self.raw_input.append((word, tag))
                sample_cnt[tag] += 1
        return self.create_feature_matrix(X, y)
                
    def create_feature_matrix(self, X, y):
        self.X = X
        self.y = y
        self.X_mtx = self.__get_or_create_vectorizer('X_vectorizer', X)
        self.y_vec = self.__get_or_create_vectorizer('y_vectorizer', y)
        return self.X_mtx, self.y_vec
        
    def __get_or_create_vectorizer(self, name, data):
        if not hasattr(self, name):
            dv = DictVectorizer()
            v = dv.fit_transform(data)
            setattr(self, name, dv)
        return v
    
    def extract_word_and_tag(self, line):
        fd = line.strip().split('\t')
        word = fd[0]
        tag = fd[-1].split('/')[-1]
        if self.full_tag is False:
            try:
                tag = Featurizer.tag_re.match(tag).group(0)
            except AttributeError:
                raise Featurizer.InvalidTag()
        return word, tag
    
    def __featurize_and_store_sample(self, word, tag, X, y):
        if self.use_word_as_feature:
            f = {'word': word}
        else:
            f = self.__featurize_ngram(word)
        X.append(f)
        y.append({'class': tag})
            
    def __featurize_ngram(self, word):
        feats = {}
        if self.last_char > 0:
            word = word[-self.last_char:]
        if self.include_smaller_ngrams:
            for n in range(1, self.N+1):
                feats.update(Featurizer.extract_ngrams(word, n, self.use_padding))
        else:
            feats.update(Featurizer.extract_ngrams(word, self.N, self.use_padding))
        return feats
    
    @staticmethod
    def extract_ngrams(text, N, padding=False):
        if padding is True:
            text = '{0}{1}{0}'.format(" " * (N-1), text)
        feats = {}
        for i in range(len(text)-N+1):
            feats['{0}_{1}'.format(N, len(text) - i)] = text[i:i+N]
        return feats
            
    def get_theoretical_max(self):
        samples = defaultdict(lambda: defaultdict(int))
        for i in range(len(self.X)):
            xi = self.X[i]
            yi = self.y[i]
            f_str = ','.join('{}:{}'.format(feat, val) for feat, val in sorted(xi.items()))
            samples[f_str][yi['class']] += 1
        return sum(max(v.values()) for v in samples.values()) / len(self.X)

In [37]:
import logging
from scipy.sparse import issparse


class FFNN:
    
    def __init__(self, n_feature, n_class, layers, batch_size=0, epochs=5000, verbose=False,
                 activation=tf.sigmoid, gpu_memory_fraction=0.5,
                 optimizer="GradientDescentOptimizer", optimizer_kwargs={}):
        self.n_feature = n_feature
        self.n_class = n_class
        self.shape = layers
        self.batch_size = batch_size
        self.n_input = tf.placeholder(tf.float32, shape=[None, n_feature],
                         name="n_input")
        self.n_output = tf.placeholder(tf.float32, shape=[None, n_class],
                          name="n_output")
        self.bias = []
        self.W = []
        self.hidden = []
        self.activation = activation
        self.create_input_layer()
        self.create_hidden_layers()
        self.create_output_layer()
        self.cost = tf.reduce_mean(tf.square(self.n_output - self.output))
        self.optimizer_kwargs = optimizer_kwargs
        self.optimizer = getattr(tf.train, optimizer)(**optimizer_kwargs)
        self.train = self.optimizer.minimize(self.cost)
        self.init = tf.initialize_all_variables()
        self.gpu_memory_fraction = gpu_memory_fraction
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        self.sess.run(self.init)
        self.verbose = verbose
        self.epochs = epochs
        
    def create_input_layer(self):
        self.bias.append(tf.Variable(tf.random_normal([self.shape[0]]), name="bias0"))
        self.W.append(tf.Variable(tf.random_normal([self.n_feature, self.shape[0]]), name="weights0"))
        self.hidden.append(self.activation(tf.matmul(self.n_input, self.W[0]) + self.bias[0]))
        
    def create_hidden_layers(self):
        for i in range(1, len(self.shape)):
            self.bias.append(tf.Variable(tf.random_normal([self.shape[i]]), name="bias{}".format(i)))
            self.W.append(tf.Variable(tf.random_normal([self.shape[i-1], self.shape[i]]), name="weights0"))
            self.hidden.append(self.activation(tf.matmul(self.hidden[-1], self.W[-1]) + self.bias[-1]))
            
    def create_output_layer(self):
        self.bias.append(tf.Variable(tf.random_normal([self.n_class]), name="bias{}".format(len(self.shape)-1)))
        self.W.append(tf.Variable(tf.random_normal([self.shape[-1], self.n_class]), name="weights{}".format(len(self.shape)-1)))
        self.output = self.activation(tf.matmul(self.hidden[-1], self.W[-1]) + self.bias[-1])
        
    def dotrain(self, X_train, y_train):
        X_train = FFNN.convert_sparse_if_needed(X_train)
        y_train = FFNN.convert_sparse_if_needed(y_train)
        cvalues = []
        step = self.epochs // 10
        step = 1 if step < 1 else step
        cnt = 0
        for epoch in range(0, self.epochs):
            if self.batch_size > 0:
                X_batch, y_batch = self.get_minibatch(X_train, y_train)
            else:
                X_batch = X_train
                y_batch = y_train
            cvalues.append(self.sess.run([self.train, self.cost] + self.W + self.bias,
                    feed_dict={self.n_input: X_batch, self.n_output: y_batch}))
            cnt += 1
            if cnt % step == 0 and self.verbose is True:
                print('{0} epochs, cvalue: {1}'.format(epoch+1, cvalues[-1][1]))
        self.correct_prediction = tf.equal(tf.argmax(self.n_output,1), tf.argmax(self.output,1))
        return cvalues
        
    def get_minibatch(self, X, y):
        batch_index = np.random.choice(np.arange(0, X.shape[0]), self.batch_size)
        return X[batch_index], y[batch_index]
    
    def dotest(self, X_test, y_test):
        X_test = FFNN.convert_sparse_if_needed(X_test)
        y_test = FFNN.convert_sparse_if_needed(y_test)
        accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
        prediction = self.correct_prediction.eval(session=self.sess,
                                                  feed_dict={self.n_input: X_test, self.n_output: y_test})
        return accuracy.eval(session=self.sess, feed_dict={self.n_input: X_test, self.n_output: y_test}), prediction
        
    @staticmethod
    def convert_sparse_if_needed(mtx):
        if issparse(mtx):
            mtx = mtx.todense()
        return mtx
    

class Experiment:
    
    def __init__(self, config):
        self.config = config
        if 'global' not in config:
            glob_config = {}
        else:
            glob_config = config['global']
        self.featurizer = Featurizer(**config['featurizer'])
        X, y = self.featurizer.featurize(**config['featurizer'])
        self.X = X
        self.y = y
        self.test_size = glob_config.get('test_size', .1)
        verbose = glob_config.get('verbose', False)
        self.ffnn = FFNN(X.shape[1], y.shape[1], verbose=verbose, **config['ffnn'])
        
    def run(self):
        self.train_mask = np.random.random(size=self.X.shape[0]) > self.test_size
        X_train = self.X[self.train_mask]
        X_test = self.X[np.invert(self.train_mask)]
        y_train = self.y[self.train_mask]
        y_test = self.y[np.invert(self.train_mask)]
        cvalues = self.ffnn.dotrain(X_train, y_train)
        train_acc, train_pred = self.ffnn.dotest(X_train, y_train)
        test_acc, test_pred = self.ffnn.dotest(X_test, y_test)
        self.test_pred = test_pred
        self.train_acc = train_acc
        self.test_acc = test_acc
        return train_acc, train_pred, test_acc, test_pred
    
    def run_decision_tree(self):
        self.clf = DecisionTreeClassifier()
        return cross_val_score(self.clf, self.X.toarray(), self.y.toarray(), cv=10)
        
    def get_test_errors(self):
        test_samples = [p[1] for p in filter(lambda x: not self.train_mask[x[0]], enumerate(self.featurizer.raw_input))]
        errors = []
        for i, s in enumerate(test_samples):
            if not self.test_pred[i]:
                errors.append(s)
        return errors 
    
    def add_results_to_diary(self, diary):
        d = {
            'timestamp': dt.datetime.now(),
            'data_path': data_path,
            'last_char': self.featurizer.last_char,
            'N': self.featurizer.N,
            'full_tag': self.featurizer.full_tag,
            'use_padding': self.featurizer.use_padding,
            'include_smaller_ngrams': self.featurizer.include_smaller_ngrams,
            'tag_filter': self.featurizer.tag_filter,
            'min_sample_per_class': self.featurizer.min_sample_per_class,
            'max_sample_per_class': self.featurizer.max_sample_per_class,
            'max_lines': self.featurizer.max_lines,
            'sample_count': self.featurizer.X_mtx.shape[0],
            'feature_count': self.featurizer.X_mtx.shape[1],
            'architecture': 'FFNN',
            'layers': self.ffnn.shape,
            'batch_size': self.ffnn.batch_size,
            'epochs': self.ffnn.epochs,
            'activation': self.ffnn.activation.__name__,
            'gpu_memory_fracion': self.ffnn.gpu_memory_fraction,
            'optimizer': self.ffnn.optimizer.__class__,
            'optimizer_kwargs': self.ffnn.optimizer_kwargs,
            'train_accuracy': self.train_acc,
            'test_accuracy': self.test_acc,
            'theoretical_max': self.featurizer.get_theoretical_max(),
            'running_time': self.running_time,
        }
        diary.add_experiment(d)
    
    def run_and_save(self):
        start = dt.datetime.now()
        train_acc, train_pred, test_acc, test_pred = self.run()
        end = dt.datetime.now()
        self.running_time = end - start
        print("Training accuracy: {0}\nTest accuracy: {1}".format(train_acc, test_acc))
        with ResearchDiary() as rd:
            self.add_results_to_diary(rd)

In [38]:
%%time
e = Experiment({
        'global': {
            'verbose': True,
        },
        'featurizer': {
            'last_char': 5,
            'N': 1,
            'tag_filter': ("NOUN", "VERB"),
            'data_path': data_path,
            'encoding': 'latin2',
            'min_sample_per_class': 3,
            'max_sample_per_class': 2500,
            'max_lines': 2000000,
        },
        'ffnn': {
            'layers': (40, 40, 40),
            'optimizer': 'MomentumOptimizer',
            'batch_size': 1000,
            'optimizer_kwargs': {
                'learning_rate': 1,
                'momentum': .1,
            },
            'gpu_memory_fraction': 1,
            'epochs': 50,
        }
})
e.run_and_save()

5 epochs, cvalue: 0.353938490152359
10 epochs, cvalue: 0.27243995666503906
15 epochs, cvalue: 0.2680909037590027
20 epochs, cvalue: 0.261005699634552
25 epochs, cvalue: 0.25506535172462463
30 epochs, cvalue: 0.25461551547050476
35 epochs, cvalue: 0.25099408626556396
40 epochs, cvalue: 0.2432461529970169
45 epochs, cvalue: 0.24172766506671906
50 epochs, cvalue: 0.23861713707447052
Training accuracy: 0.6244697570800781
Test accuracy: 0.5988483428955078
CPU times: user 3.61 s, sys: 0 ns, total: 3.61 s
Wall time: 3.47 s


In [153]:
e.run_decision_tree()

array([ 0.918,  0.968,  0.954,  0.894,  0.898,  0.954,  0.94 ,  0.898,
        0.926,  0.954])

In [39]:
%%time
e = Experiment({
        'global': {
            'verbose': True,
        },
        'featurizer': {
            'last_char': 5,
            'N': 1,
            'use_word_as_feature': False,
            'tag_filter': ("NOUN", "VERB"),
            'data_path': data_path,
            'encoding': 'latin2',
            'min_sample_per_class': 3,
            'max_sample_per_class': 50000,
            'max_lines': 2000000,
        },
        'ffnn': {
            'layers': (40, 40),
            'batch_size': 1000,
            'optimizer': 'MomentumOptimizer',
            'optimizer_kwargs': {
                'learning_rate': 1,
                'momentum': .1,
            },
            'gpu_memory_fraction': 1,
            'epochs': 10000,
        }
})
e.run_and_save()

1000 epochs, cvalue: 0.1127418652176857
2000 epochs, cvalue: 0.0853806883096695
3000 epochs, cvalue: 0.07170069962739944
4000 epochs, cvalue: 0.06646432727575302
5000 epochs, cvalue: 0.05410183593630791
6000 epochs, cvalue: 0.058784421533346176
7000 epochs, cvalue: 0.04342120513319969
8000 epochs, cvalue: 0.05368638038635254
9000 epochs, cvalue: 0.04481387510895729
10000 epochs, cvalue: 0.04545537754893303
Training accuracy: 0.9506864547729492
Test accuracy: 0.9454238414764404
CPU times: user 49.8 s, sys: 2.66 s, total: 52.4 s
Wall time: 38.3 s


In [137]:
errors = e.get_test_errors()
errors[:10]
d = defaultdict(int)
for i in errors:
    d[i[1]] += 1
d
errors[:20]

[('Viharjuk', 'NOUN'),
 ('értem', 'NOUN'),
 ('MacDonald', 'NOUN'),
 ('Monrningstar', 'NOUN'),
 ('heute', 'NOUN'),
 ('operál', 'VERB'),
 ('jelent', 'NOUN'),
 ('érzékel', 'VERB'),
 ('exhibicionista', 'NOUN'),
 ('alja', 'NOUN'),
 ('ér', 'VERB'),
 ('annak', 'NOUN'),
 ('univerzum', 'NOUN'),
 ('annak', 'NOUN'),
 ('annak', 'NOUN'),
 ('foglaltak', 'NOUN'),
 ('Oszlop', 'NOUN'),
 ('(hárs)méz', 'VERB'),
 ('sztirolhab', 'NOUN'),
 ('jogosult', 'NOUN')]

In [138]:
%%time
e = Experiment({
        'global': {
            'verbose': True,
        },
        'featurizer': {
            'last_char': 5,
            'N': 2,
            'use_word_as_feature': False,
            'tag_filter': ("NOUN", "VERB"),
            'data_path': data_path,
            'encoding': 'latin2',
            'min_sample_per_class': 3,
            'max_sample_per_class': 2500,
            'max_lines': 2000000,
        },
        'ffnn': {
            'layers': (40, 40, 40),
            'batch_size': 100,
            'optimizer': 'MomentumOptimizer',
            'optimizer_kwargs': {
                'learning_rate': 1,
                'momentum': .1,
            },
            'gpu_memory_fraction': 1,
            'epochs': 5000,
        }
})
e.run_and_save()

500 epochs, cvalue: 0.136236310005188
1000 epochs, cvalue: 0.08828959614038467
1500 epochs, cvalue: 0.06086685135960579
2000 epochs, cvalue: 0.045603714883327484
2500 epochs, cvalue: 0.03561263531446457
3000 epochs, cvalue: 0.028374725952744484
3500 epochs, cvalue: 0.022943317890167236
4000 epochs, cvalue: 0.019268445670604706
4500 epochs, cvalue: 0.01685773767530918
5000 epochs, cvalue: 0.01519788708537817
Training accuracy: 0.9856663942337036
Test accuracy: 0.9121495485305786
CPU times: user 2min 8s, sys: 3.06 s, total: 2min 11s
Wall time: 2min 2s


In [139]:
%%time
e = Experiment({
        'global': {
            'verbose': True,
        },
        'featurizer': {
            'last_char': 5,
            'N': 5,
            'use_padding': False,
            'use_word_as_feature': False,
            'tag_filter': ("NOUN", "VERB"),
            'data_path': data_path,
            'encoding': 'latin2',
            'min_sample_per_class': 3,
            'max_sample_per_class': 3000,
            'max_lines': 200000,
        },
        'ffnn': {
            'layers': (300, ),
            'optimizer': 'MomentumOptimizer',
            'optimizer_kwargs': {
                'learning_rate': 1,
                'momentum': .1,
            },
            'gpu_memory_fraction': 1,
            'epochs': 5000,
        }
})
e.run_and_save()

500 epochs, cvalue: 0.29633498191833496
1000 epochs, cvalue: 0.2696171998977661
1500 epochs, cvalue: 0.24280379712581635
2000 epochs, cvalue: 0.22048281133174896
2500 epochs, cvalue: 0.20246997475624084
3000 epochs, cvalue: 0.1871405839920044
3500 epochs, cvalue: 0.17296195030212402
4000 epochs, cvalue: 0.1607976108789444
4500 epochs, cvalue: 0.15112482011318207
5000 epochs, cvalue: 0.14164234697818756
Training accuracy: 0.7874769568443298
Test accuracy: 0.714035153388977
CPU times: user 2min 57s, sys: 48.8 s, total: 3min 46s
Wall time: 3min 46s


In [146]:
%%time
e = Experiment({
        'global': {
            'verbose': True,
        },
        'featurizer': {
            'last_char': 5,
            'N': 2,
            'use_word_as_feature': False,
            'tag_filter': ("NOUN", "VERB"),
            'data_path': data_path,
            'encoding': 'latin2',
            'min_sample_per_class': 3,
            'max_sample_per_class': 30000,
            'max_lines': 200000,
        },
        'ffnn': {
            'layers': (40, 40, 40),
            'batch_size': 1000,
            'optimizer': 'MomentumOptimizer',
            'optimizer_kwargs': {
                'learning_rate': 1,
                'momentum': .1,
            },
            'gpu_memory_fraction': 1,
            'batch_size': 500,
            'epochs': 5000,
        }
})

e.run_and_save()

500 epochs, cvalue: 0.1694050133228302
1000 epochs, cvalue: 0.11332681775093079
1500 epochs, cvalue: 0.09956234693527222
2000 epochs, cvalue: 0.1011488139629364
2500 epochs, cvalue: 0.08270842581987381
3000 epochs, cvalue: 0.07109708338975906
3500 epochs, cvalue: 0.06922101229429245
4000 epochs, cvalue: 0.06034207344055176
4500 epochs, cvalue: 0.06162016838788986
5000 epochs, cvalue: 0.05050254613161087
Training accuracy: 0.9430909752845764
Test accuracy: 0.9342857003211975
CPU times: user 1min 4s, sys: 2.61 s, total: 1min 6s
Wall time: 1min 28s


In [141]:
%%time
e = Experiment({
        'global': {
            'verbose': True,
        },
        'featurizer': {
            'last_char': 3,
            'N': 3,
            'use_padding': False,
            'use_word_as_feature': False,
            'tag_filter': ("NOUN", "VERB"),
            'data_path': data_path,
            'encoding': 'latin2',
            'min_sample_per_class': 3,
            'max_sample_per_class': 30000,
            'max_lines': 200000,
        },
        'ffnn': {
            'layers': (30, 30),
            'batch_size': 1000,
            'optimizer': 'MomentumOptimizer',
            'optimizer_kwargs': {
                'learning_rate': 1,
                'momentum': .1,
            },
            'gpu_memory_fraction': 1,
            'epochs': 5000,
        }
})
e.run_and_save()

KeyboardInterrupt: 

## use last 10 characters of each word as a single feature

Basically, we're building a memory.

In [142]:
e = Experiment({
        'global': {
            'verbose': True,
        },
        'featurizer': {
            'last_char': 10,
            'N': 1,
            'use_word_as_feature': True,
            
            'tag_filter': ("NOUN", "VERB"),
            'data_path': data_path,
            'encoding': 'latin2',
            'min_sample_per_class': 3,
            'max_sample_per_class': 3000,
            'max_lines': 200000,
        },
        'ffnn': {
            'layers': (40, 40, 40),
            'optimizer': 'MomentumOptimizer',
            'optimizer_kwargs': {
                'learning_rate': .1,
                'momentum': .1,
            },
            'gpu_memory_fraction': .2,
            'epochs': 5000,
        }
})

e.run_and_save()

500 epochs, cvalue: 0.2523937225341797
1000 epochs, cvalue: 0.24883531033992767
1500 epochs, cvalue: 0.247022345662117
2000 epochs, cvalue: 0.24572668969631195
2500 epochs, cvalue: 0.2446475476026535


KeyboardInterrupt: 

In [143]:
e = Experiment({
        'global': {
            'verbose': True,
        },
        'featurizer': {
            'last_char': 10,
            'N': 1,
            'use_word_as_feature': True,
            
            'tag_filter': ("NOUN", "VERB"),
            'data_path': data_path,
            'encoding': 'latin2',
            'min_sample_per_class': 3,
            'max_sample_per_class': 300,
            'max_lines': 200000,
        },
        'ffnn': {
            'layers': (40, 40, 40),
            'optimizer': 'MomentumOptimizer',
            'optimizer_kwargs': {
                'learning_rate': .1,
                'momentum': .1,
            },
            'gpu_memory_fraction': .2,
            'epochs': 5000,
        }
})

e.run_and_save()

KeyboardInterrupt: 