In [1]:
import numpy as np

In [2]:
revenue = np.array([[
    [50,10,0],
    [0,0,0],
    [40,70,0],
    [10,10,0]
]])

In [3]:
clicks = np.array([[
    [100,20,0],
    [200,360,0],
    [100,100,0],
    [100,100,0]
]])

In [4]:
cost_per_click = np.array([[
    [0.5,0.5,0],
    [0,0,0],
    [0.4,0.7,0],
    [0.1,0.1,0]
]])

In [5]:
def k_l_divergence(p,q):
    return p * np.log2(p / q)

In [6]:
def j_s_divergence(p,q):
    return (p*np.log2((2*p)/(p+q)) + q*np.log2((2*q)/(p+q))) / 2

In [31]:
def root_cause_identification(datasets,Teep=0.1,Tep=0.6):
    # m: number of measures;i: number of Dimensions;
    # j: number of values in Dim i
    m,i,j,k = datasets.shape
    F,A = datasets[:,:,0,0], datasets[:,:,0,1]
    P = datasets[:,:,1:,0] / F[:, np.newaxis]
    Q = datasets[:,:,1:,1] / A[:, np.newaxis]
    S = np.nan_to_num(j_s_divergence(P,Q))
    S_sort = np.argsort(S,axis=2)[:,:,::-1]
    ExplanatorySet = []
    for x in range(m):
        for y in range(i):
            Candidate,Explains,Surprise = {'Add':[],'Surprise':0},0,0
            for z in S_sort[x,y,:]:
                if np.abs(datasets[x,y,z,2]) > Teep:
                    Candidate['Add'].append(z)
                    Surprise += S[x,y,z]
                    Explains += datasets[x,y,z,2]
                if Explains > Tep:
                    Candidate['Surprise'] = Surprise
                    ExplanatorySet.append(Candidate)
                    break
    return ExplanatorySet

In [8]:
dataset_origin = np.array([
    revenue,
    clicks,
    cost_per_click
])

In [9]:
dataset_agg = np.insert(dataset_origin,0,np.sum(dataset_origin,axis=2),axis=2)
dataset_agg[:,:,:,2] = dataset_agg[:,:,:,1] - dataset_agg[:,:,:,0]

In [10]:
sum_arr = np.sum(dataset_agg[:,:,0,2],axis=1).repeat(4).reshape(3,1,4)
dataset_agg[:,:,1:,2] = np.divide(dataset_agg[:,:,1:,2],sum_arr)
dataset_agg[:,:,0,2] = dataset_agg[:,:,0,2] / dataset_agg[:,:,0,0]

In [11]:
print(dataset_agg)  # data in dataset_agg[2] is incorrect, Cauze it's a dirived measure of privious dimensions.

[[[[ 1.0e+02  9.0e+01 -1.0e-01]
   [ 5.0e+01  1.0e+01  4.0e+00]
   [ 0.0e+00  0.0e+00 -0.0e+00]
   [ 4.0e+01  7.0e+01 -3.0e+00]
   [ 1.0e+01  1.0e+01 -0.0e+00]]]


 [[[ 5.0e+02  5.8e+02  1.6e-01]
   [ 1.0e+02  2.0e+01 -1.0e+00]
   [ 2.0e+02  3.6e+02  2.0e+00]
   [ 1.0e+02  1.0e+02  0.0e+00]
   [ 1.0e+02  1.0e+02  0.0e+00]]]


 [[[ 1.0e+00  1.3e+00  3.0e-01]
   [ 5.0e-01  5.0e-01  0.0e+00]
   [ 0.0e+00  0.0e+00  0.0e+00]
   [ 4.0e-01  7.0e-01  1.0e+00]
   [ 1.0e-01  1.0e-01  0.0e+00]]]]


In [32]:
root_cause = root_cause_identification(dataset_agg[:-1])

  


In [33]:
print(root_cause)

[[[0.09654381 0.         0.04448635 0.00021102]]

 [[0.04661156 0.01734682 0.00073768 0.00073768]]]
