## predict gene upregulation/downregulation
- Using Expectation Reflection to infer the gene network and predict gene upregulation/downregulation at the next time point
- initial measurements: $t[0-4]$
- final measurements: $t[1-5]$
- midpoint measurements: $\dfrac{t[0-4] + t[1-5])}{2}$

In [1]:
from train import *
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle

In [2]:
def EM_inference(X, y, kf=5, regu=0.005):
    kfold = KFold(n_splits=kf, shuffle=False, random_state=1)
    accuracy_v = []
    
    for (tr_ind, te_ind) in kfold.split(y):
        X_tr, y_tr = X[tr_ind], y[tr_ind]
        X_te, y_te = X[te_ind], y[te_ind]
        
#         W_full = np.zeros((X_tr.shape[1], y_tr.shape[1]))
#         h0_full = np.zeros(y_tr.shape[1])
        accuracy = 0
        
        for n in range(y_tr.shape[1]):
            h0, w = fit(X_tr, y_tr[:,n], niter_max=100, regu=0.005)
            h_pred = h0 + X_te.dot(w)
            y_pred = np.sign(h_pred)
            accuracy += accuracy_score(y_te[:,n], y_pred, normalize=False)
#             W_full[:,n] = w
#             h0_full[n] = h0
#         print(y_te.size)
        accuracy = accuracy/y_te.size
        accuracy_v.append(accuracy)
    return np.mean(accuracy_v), np.std(accuracy_v)

In [None]:
data_complete = np.loadtxt('../data_complete.txt')
data_init, data_fin, data_midpt, data_deriv = make_data(data_complete, n_bin=6)

names = ["initial measurements", "final measurements", "midpoint measurements"]
data_list = [data_init, data_fin, data_midpt]

for i, data in enumerate(data_list):
    X, y = shuffle(data, data_deriv)
    mean, std = EM_inference(X, y, kf=5, regu=0.005)
    print(names[i],": ", mean, std)
    