-
Notifications
You must be signed in to change notification settings - Fork 3
/
run.py
81 lines (57 loc) · 2.11 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 10 21:35:56 2017
@author: thanhan
"""
import util
import data_pp
import pickle
import pystan
import numpy as np
import pandas as pd
def clean_body_labels(data):
"""
clean the stance labels for body text
"""
n = len(data)
data.index = range(n)
for i in range(n):
if data.iloc[i].astance == 'ignoring':
data.set_value(i, 'astance', 'observing')
#if data.iloc[i].astance == '':
# data.set_value(i, 'astance', data.iloc[i].articleHeadlineStance)
return data
def get_processed_data():
import features
(train_data, X_train, val_data, X_val, test_data, X_test) = features.get_data()
all_data = pd.concat( [train_data, val_data, test_data], ignore_index = True)
data = data_pp.process_data(all_data)
clean_body_labels(data)
train_data_pp = data[: len(train_data)]
val_data_pp = data[len(train_data): len(train_data) + len(val_data)]
test_data_pp = data[len(train_data) + len(val_data): ]
return (train_data_pp, X_train, val_data_pp, X_val, test_data_pp, X_test)
#(train_data, X_train, val_data, X_val, test_data, X_test) = pickle.load( open('edata.pkl'))
#all_data = pd.concat( [train_data, val_data, test_data], ignore_index = True)
#data = data_pp.process_data(all_data)
#train_data_pp = data[: len(train_data)]
#val_data_pp = data[len(train_data): len(train_data) + len(val_data)]
#test_data_pp = data[len(train_data) + len(val_data): ]
#(train_data_pp, X_train, val_data_pp, X_val, test_data_pp, X_test) = \
#pickle.load( open('edata_pp.pkl') )
#stan_input = data_pp.make_stan_input(train_data_pp, X_train, val_data_pp, X_val)
#sm = pickle.load( open('fact_model.pkl') )
#fit = sm.sampling(data=stan_input)
sm = None
def compile_stan():
global sm
sm = pystan.StanModel(file = 'fact_model.stan')
def run_stan(stan_input):
fit = sm.vb(stan_input)
return fit
def run_fact_model(n_em = 5):
for em_it in range(n_em):
# E-step
fit = run_stan(stan_input)
# M-step