In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt
from node import Node
import pandas as pd
import numpy.linalg as la

In [2]:
# expected = np.random.normal(100, 20, 3)
poissons = np.vectorize(lambda x: np.random.poisson(3*x))
# observed = poissons(expected)

In [3]:
expected = np.array([ 86.50358743, 113.54704169, 101.3176554 ])
observed = np.array([265, 345, 324])

In [4]:
def llri(obs, exp, q): #score function for poisson (one element)
    return obs*np.log(q) + exp*(1-q)

def llri_pen(obs, exp, q, delta): #penalized score function for poisson (one element)
    return obs*np.log(q) + exp*(1-q) - delta

llr = np.vectorize(llri) #generate unpenalized scores for a given q value

llr_pen = np.vectorize(llri_pen) #genereate penalized scores for a given q value



In [5]:
np.log(3)*observed[1] + (1-3)*expected[1]
llri(observed[1], expected[1], 3)
# obs = 345
# exp = 113.5
# q = 3
# obs*np.log(q) + exp*(1-q)

151.92715621049788

In [6]:
def q_min(obs, exp):

    minimum = 0.000001
    qmle = 1

    while abs(qmle - minimum) > 0.00000001:
        q_mid = (minimum + qmle)/2

        if llri(obs, exp, q_mid) > 0:
            qmle = qmle - (qmle - minimum)/2
        else:
            minimum = minimum + (qmle - minimum)/2
    return (minimum + qmle)/2

def q_max(obs, exp):

    maximum = 10000000
    qmle = 1

    while abs(maximum - qmle) > 0.000001:
        q_mid = (maximum + qmle)/2

        if llri(obs, exp, q_mid) < 0:
            maximum = maximum - (maximum-qmle)/2
        else:
            qmle = qmle + (maximum-qmle)/2

    return (maximum + qmle)/2

###################################################
def q_min_pen(obs, exp, delta):  #need to fix


    minimum = 0.000001
    qmle = obs/exp

    while abs(qmle - minimum) > 0.00000001:
        q_mid = (minimum + qmle)/2

        if llri_pen(obs, exp, q_mid, delta) > 0:
            qmle = qmle - (qmle - minimum)/2
        else:
            minimum = minimum + (qmle - minimum)/2
    return (minimum + qmle)/2
####################################################

def q_max_pen(obs, exp, delta):

    maximum = 10000000
    qmle = 1

    while abs(maximum - qmle) > 0.000001:
        q_mid = (maximum + qmle)/2

        if llri_pen(obs, exp, q_mid, delta) < 0:
            maximum = maximum - (maximum-qmle)/2
        else:
            qmle = qmle + (maximum-qmle)/2

    return (maximum + qmle)/2



def minmax(obs, exp):
    return (q_min(obs, exp), q_max(obs, exp))

def minmax_pen(obs, exp, delta):
    return (q_min_pen(obs, exp, delta), q_max_pen(obs, exp, delta))

#Get q intervals
def get_INTERVALS(obs, exp, delta):
    return [q_min(obs, exp), q_min_pen(obs, exp, delta), q_max_pen(obs, exp, delta), q_max(obs, exp)]
    

def relu_scores(scores, delta):
    return min(np.abs(scores), delta)

def ReLU(scores):
    return (scores > 0).view('i1')

In [12]:
intervals = get_INTERVALS(observed[1], expected[1], delta=5)

# for i in range(len(intervals)):
    # print("i: {}, val: {}".format(i, intervals[i]))

qtest = (intervals[1] + intervals[0])/2
qtest2 = 1.3

scores1 = llr(observed, expected, qtest)
scores2 = llr_pen(observed, expected, qtest, delta=1)
scores3 = np.array([-5.2, -4.7, -6.1])

# initial = ReLU(scores2)
initial = np.array([0, 1, 0])
weights = np.array([0.5, 1, 1])

filter1 = np.array([0, 1, 1])
filter2 = np.array([1, 1, 1])
filter3 = np.array([1, 1, 1])

theta = pd.DataFrame([filter1, filter2, filter3])
tree = Node(theta= theta, name= '*')
tree.build_tree(theta=theta, min_filters_to_split=3)

# eq = (filter1 != filter3).view('i1')
# pens = np.minimum(np.abs(scores1), 5)
# sum(pens[filter2 != filter3])

In [18]:
def compute_new_subset(scores, filter, delta):
    
    subset = np.zeros(len(filter))
    weights = np.zeros(len(filter))

    for i in range(len(filter)):
        if (scores[i] > delta):

            subset[i] = 1 #always include element in subset
            weights[i] = delta #w_i = delta

        elif (np.abs(scores[i]) <= delta):

            subset[i] = filter[i] #include iff in filter
            weights[i] = scores[i] #w_i = score_i

        else:

            if filter[i] == 1: #penalize by delta if in filter
                pass #penalty of delta

            subset[i] = 0 #always exclude from subset
            weights[i] = delta #w_i = delta
    return subset, weights

def GRAD_F(obs, exp, q):
    return (obs/q) - exp

Grad_vec = np.vectorize(GRAD_F)

def compute_qmle(obs, exp, guess):
    q = guess
    numiter = 0
    prev = 0
    while la.norm(np.abs(llr(obs, exp, q) - prev)) > 1e-10:
    # while la.norm(Grad_vec(obs, exp, q)) > 1e-5:
        prev = llr(obs, exp, q)
        q = q + 0.001*(sum(Grad_vec(obs, exp, q)))
        numiter = numiter + 1
        
        # print("Current q: {}, Function Value: {}, F(q+1) - F(q): {}".format(q, llr(obs, exp, q), np.abs(llr(obs, exp, q) - prev)))
    return q


In [14]:
scores = llr(observed, expected, 1.3)
test = np.array([0, 1, 0])
scores[test == 0] = 0
scores

array([ 0.        , 56.45155873,  0.        ])

In [21]:
#ALGORITHM:
###########################################################################

converged = False
delta = 1

for i in range(len(intervals) - 1):

    qmid = (intervals[i+1] + intervals[i])/2

    print("Initial q value: {}".format(qmid))

    scores = llr(observed, expected, qmid)
    print("Scores for data elements: {}".format(scores))

    S = ReLU(scores=scores)
    print("initial subset: {}".format(S))

    weights = np.ones(len(initial)) #chg
    print("Weights for initial subset: {}".format(weights))

    Fmax = sum(scores[scores > 0])
    print("Score of initial subset: {}".format(Fmax))

    while converged == False:

        filter, distance = tree.traverse(S, weights=weights)
        print("Best matched filter for subset: {}".format(filter))
        
        pens = np.minimum(np.abs(scores), delta)
        
        F = Fmax - sum(pens[filter != initial]) #new score
        print("New score: {}".format(F))

        S, W = compute_new_subset(scores=scores, filter=filter, delta=delta)
        print("New subset: {}".format(S))
        print("Weights for new S: {}".format(W))

        newS = scores
        newS[S == 0] = 0
        qmle = compute_qmle(newS, expected, 1) #compute qmle of new S
        print("Q_MLE for new subset: {}".format(qmle))
        
        if qmle == qmid:
            converged = True
        else:
            qmid = qmle
    
    

    

Initial q value: 1.0109783951359455
Scores for data elements: [1.94375049 2.52034236 2.42531143]
initial subset: [1 1 1]
Weights for initial subset: [1. 1. 1.]
Score of initial subset: 6.889404283894808
Best matched filter for subset: 0    1
1    1
2    1
Name: 1, dtype: int32
New score: 4.889404283894808
New subset: [1. 1. 1.]
Weights for new S: [1. 1. 1.]
Q_MLE for new subset: 1
Best matched filter for subset: 0    1
1    1
2    1
Name: 1, dtype: int32
New score: 4.889404283894808
New subset: [1. 1. 1.]
Weights for new S: [1. 1. 1.]
Q_MLE for new subset: 1
Initial q value: 3.8931530341704317
Scores for data elements: [109.92501872 140.4217172  147.25959632]
initial subset: [1 1 1]
Weights for initial subset: [1. 1. 1.]
Score of initial subset: 397.606332237121
Initial q value: 6.804131421272302
Scores for data elements: [ 6.0672574   2.50589396 33.21872941]
initial subset: [1 1 1]
Weights for initial subset: [1. 1. 1.]
Score of initial subset: 41.79188076894019


In [12]:
#compare scores to 0 and to delta
#case 1 (score_i > delta):
#always include this element in the subset (always 1)

#case 2 (0 < score_i < delta)
#include in subset iff in filter

#case 3 (-delta < score_i < 0)
#include in subset iff in filter

#case 4 (score_i < -delta)
#always exclude this element from subset (always 0)

filter1 = np.array([0, 1, 1])
filter2 = np.array([1, 1, 1])
filter3 = np.array([1, 0, 0])

delta = 5

def calc_weights(observed, expected, q, filter, delta):
    
    scores = llr_pen(observed, expected, q, delta)
    weights = np.zeros(3)
    subset = np.zeros(3)
    
    for i in range(len(filter)):
        if (scores[i] > delta):

            subset[i] = 1 #always include element in subset
            weights[i] = delta #w_i = delta

        elif (np.abs(scores[i]) <= delta):

            subset[i] = filter[i] #include iff in filter
            weights[i] = scores[i] #w_i = score_i

        else:

            if filter[i] == 1: #penalize by delta if in filter
                pass #penalty of delta

            subset[i] = 0 #always exclude from subset
            weights[i] = delta #w_i = delta

    return subset, weights