# Theano Print

In [None]:
import numpy as np
import theano
import theano.tensor as T
rng = np.random
# Training data
N = 400
feats = 784
D = (rng.randn(N, feats).astype(theano.config.floatX), rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
training_steps = 10000
# Declare Theano symbolic variables
x = T.matrix("x")
y = T.vector("y")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
b = theano.shared(np.asarray(0., dtype=theano.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w)-b)) # Probability of having a one
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
# Compute gradients
xent = -y*T.log(p_1) - (1-y)*T.log(1-p_1) # Cross-entropy
cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
gw,gb = T.grad(cost, [w,b])
# Training and prediction function
train = theano.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
predict = theano.function(inputs=[x], outputs=prediction, name = "predict")

In [None]:
theano.printing.pprint(prediction) 

In [None]:
theano.printing.pydotprint(prediction, outfile="logreg_pydotprint_prediction.png", var_with_name_simple=True)  

# CoreNLP

In [None]:
sent_data = []
cnt = 0
import time
start = time.time()
from pycorenlp import StanfordCoreNLP
nlp = StanfordCoreNLP('http://localhost:9000')
# total 302736
with open("./data/train1.ner.txt", "r") as f:
    for sent in f:
        cnt += 1
        if(cnt % 1e2==0):
            print (cnt, time.time()-start)
        sent = sent.replace("\n", "")
        res = nlp.annotate(sent, properties={'annotators':"pos", 'outputFormat':'json'})
        res = res["sentences"]
        #print(sent)
        noun_list = []
        for word in res[0]['tokens']:
            if(word['pos'][0]=='N'):
                noun_list.append(word['word'])
        sent_format = (sent, noun_list)
        sent_data.append(sent_format)
import json
with open("./data/sents.json", "w") as f:
    json.dump(sent_data, f)

100 1.4687469005584717


# PPT

In [77]:
import json
import numpy as np
with open("data/structural_match.json", "r") as f:
    data = json.load(f)

In [78]:
def normalize(v):
    norm=np.linalg.norm(v, ord=1)
    if norm==0:
        norm=np.finfo(v.dtype).eps
    return v/norm


for p in data:
    a = [0, 0, 0]
    for i in data[p]:
        a = np.add(a, normalize(np.array(data[p][i])))
    a = normalize(a)
    if(a[0]==1 and len(data[p])>10):
        print(p, a, len(data[p]))

( $N$ ) $W$ $W$ [ 1.  0.  0.] 13
$N$ - $N$ $W$ ) [ 1.  0.  0.] 11
$W$ / + [ 1.  0.  0.] 13
$N$ ' - $W$ $W$ [ 1.  0.  0.] 28
$W$ $N$$W$$N$ [ 1.  0.  0.] 11
$W$ $N$ ' - $W$ [ 1.  0.  0.] 16
$W$ $W$ ( $N$ ) [ 1.  0.  0.] 16
$W$ ( $N$ ) [ 1.  0.  0.] 21
$W$$N$ - $N$ [ 1.  0.  0.] 15
$W$$N$ . $N$ [ 1.  0.  0.] 34
$N$ ' - $W$ [ 1.  0.  0.] 120
$N$$W$ - $N$ [ 1.  0.  0.] 35
$W$ / $W$ ) $W$ [ 1.  0.  0.] 16
$W$ $W$ $W$ . $N$ [ 1.  0.  0.] 22
$W$ $W$$N$ . $N$ $W$ [ 1.  0.  0.] 13
$W$ $N$ . $N$ [ 1.  0.  0.] 73
$N$$W$ - $N$$W$ [ 1.  0.  0.] 41
$W$ . $N$ [ 1.  0.  0.] 37
$W$$N$ . $N$ $W$ $W$ [ 1.  0.  0.] 26
$N$ . $N$ [ 1.  0.  0.] 328
$N$ ' $W$ [ 1.  0.  0.] 35
$W$ $W$$N$ . $N$ [ 1.  0.  0.] 17
