This repository has been archived by the owner on Jun 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
util.py
106 lines (89 loc) · 3.35 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from collections import Counter, defaultdict
import json
import numpy as np
import random
import sys
import theano
import theano.tensor as T
import time
import tokenise_parse
def tokens_in_sentences(eg, parse_mode):
return (tokenise_parse.tokens_for(eg, 1, parse_mode),
tokenise_parse.tokens_for(eg, 2, parse_mode))
LABELS = ['contradiction', 'neutral', 'entailment']
def label_for(eg):
try:
return LABELS.index(eg['gold_label'])
except ValueError:
return None
def symmetric_example(label):
return LABELS[label] != 'entailment'
def load_data(dataset, vocab, max_egs=None, update_vocab=True,
parse_mode="BINARY_WITHOUT_PARENTHESIS"):
stats = Counter()
x, y = [], []
for line in open(dataset, "r"):
eg = json.loads(line)
l = label_for(eg)
if l is None:
stats['n_ignored'] += 1
else:
s1, s2 = tokens_in_sentences(eg, parse_mode)
s1 = vocab.ids_for_tokens(s1, update_vocab)
stats['n_tokens'] += len(s1)
stats['n_unk'] += len(s1) - len(filter(None, s1))
s2 = vocab.ids_for_tokens(s2, update_vocab)
stats['n_tokens'] += len(s2)
stats['n_unk'] += len(s2) - len(filter(None, s2))
x.append((s1, s2))
y.append(l)
if len(x) == max_egs:
break
return x, y, stats
def shared(values, name):
return theano.shared(np.asarray(values, dtype='float32'), name=name, borrow=True)
def sharedMatrix(n_rows, n_cols, name, scale=0.05, orthogonal_init=True):
if orthogonal_init and n_rows < n_cols:
print >>sys.stderr, "warning: can't do orthogonal init of %s, since n_rows (%s) < n_cols (%s)" % (name, n_rows, n_cols)
orthogonal_init = False
w = np.random.randn(n_rows, n_cols) * scale
if orthogonal_init:
w, _s, _v = np.linalg.svd(w, full_matrices=False)
return shared(w, name)
def eye(size, scale=1):
return np.eye(size) * scale
def zeros(shape):
return np.zeros(shape, dtype='float32')
def accuracy(confusion):
# ratio of on diagonal vs not on diagonal
return np.sum(confusion * np.identity(len(confusion))) / np.sum(confusion)
def mean_sd(v):
return {"mean": float(np.mean(v)), "sd": float(np.std(v))}
def dts():
return time.strftime("%Y-%m-%d %H:%M:%S")
def coin_flip():
return random.random() > 0.5
def norms(layers):
norms = defaultdict(dict)
for l in layers:
# TODO: doesn't include embeddings (tied or otherwise)
for p in l.params_for_l2_penalty():
if p.name is None:
continue
try:
norms[l.name()][p.name] = float(np.linalg.norm(p.get_value()))
except AttributeError:
pass # no get_value (?)
return dict(norms)
# TODO: there is http://www.deeplearning.net/software/theano/library/gradient.html#theano.gradient.grad_clip for this (apparently)
def _clip(gradient, rescale=5.0):
grad_norm = gradient.norm(L=2)
rescaling_factor = rescale / T.maximum(rescale, grad_norm)
return gradient * rescaling_factor
def clipped(gradients, rescale=5.0):
if type(gradients) == list:
return [_clip(g, rescale) for g in gradients]
else:
return _clip(gradients, rescale)
def zeros_in_the_shape_of(p):
return theano.shared(np.zeros(p.get_value().shape, dtype=p.get_value().dtype))