## Picking counterfactuals

Alternative implementations of diversity optimization for picking counterfactuals by function q instead of g

Evaluation, generation function and n_covers algorithm only.

In [None]:
""" naive max cover """
def ncover(aset, f=None, k=None, df=None, verbose=False):
    if k is None or k >= len(aset):
        k = len(aset)
    pset = list(aset)
    # covers is accumulated greedy set
    covers = []
    for i in range(k):
        if df is None: # optimize f
            bestp, value = argmax(pset, lambda x: f(covers + [x]) )
        else: # optimize df
            bestp, value = argmax(pset, lambda x: df(covers, x) )
            if value <= 0:
                break
        covers.append(pset[bestp])
        if verbose:
            if df is None:
                print('it:', i, 'x:', pset[bestp], 'f:', value)
            else:
                print('it:', i, 'x:', pset[bestp], 'f:', df(covers), 'df:', value)
        del pset[bestp]
    return covers

def EVAL_find_counter_exemplars(latent_representation_original, Z, idxs, counter_exemplar_idxs):
    """
    Compute the values of the goal function.
    
    Change goal function to q (or f)
    
    """
    # prepare the data to apply the diversity optimization 
    data = np.zeros((len(idxs), np.shape(Z)[1]))
    for i in range(len(idxs)):
        data[i] = Z[idxs[i]]            
        
    # min-max normalization (applied on ALL examples)
    scaler = MinMaxScaler()
    scaler.fit_transform(data)
    
    # list of points
    points = [row for row in scaler.transform(data)]
    # MIN MAX normalize instance to explain
    instance = scaler.transform((latent_representation_original))
    
    lconst = 1.5
    
    #### original definition of optimizing function
    def f(points):
        n = len(points)
        if n==0:
            return 0
        dpoints = dx = 0
        for i, p1 in enumerate(points):
            dx += dist(p1, instance)
            for p2 in points[i+1:]:
                dpoints += dist(p1, p2)
        dpoints /= (n*n)/2
        dx /= n
        return dpoints - lconst * dx

    
    # get the extracted CF
    extracted_CF_data = []
    for i in range(len(counter_exemplar_idxs)):
        extracted_CF_data.append(Z[counter_exemplar_idxs[i]]) 
        
    # apply scaling
    extracted_CF_data = scaler.transform((extracted_CF_data))
    
    return f(extracted_CF_data)

        

def DIVERSITY_find_counter_exemplars(latent_representation_original, Z, idxs, metric, count):
    """
    Pick CF based on diversity optimization.
    Additionally, the consecutive values of the goal function is returned
    
    Change this function: from g to q (or f)
    """
        
    # prepare the data to apply the diversity optimization 
    data = np.zeros((len(idxs), np.shape(Z)[1]))
    for i in range(len(idxs)):
        data[i] = Z[idxs[i]]            
        
    # min-max normalization (applied on ALL examples)
    scaler = MinMaxScaler()
    scaler.fit_transform(data)
    
    # list of points
    points = [row for row in scaler.transform(data)]
    # MIN MAX normalize instance to explain
    instance = scaler.transform((latent_representation_original))
    
    lconst = 1.5
            
    #### original definition of optimizing function
    def f(points):
        n = len(points)
        if n==0:
            return 0
        dpoints = dx = 0
        for i, p1 in enumerate(points):
            dx += dist(p1, instance)
            for p2 in points[i+1:]:
                dpoints += dist(p1, p2)
        dpoints /= (n*n)/2
        dx /= n
        return dpoints - lconst * dx

    # Greedy algorithms
    #from acover import ncover, acover, argmax

    # cannot use acover since f is ...
    covers = ncover(points, f, k=5, verbose=False)
    # from list back to matrix
    cov = np.concatenate([ [a] for a in covers])
    # and rescale to original coordinates
    cov = scaler.inverse_transform( cov )
    
    # get back the indices of the elements that are chosen by the greedy algorithm
    
    data_indices = []
    
    for i in range(len(cov)):
        for j in range(len(data)):
            if np.allclose(cov[i], data[j]):
                data_indices.append(j)
            
    # convert into CF indices so that we can return a list that holds only CF elements, generated by greedy alg
    
    final_indices = []
    
    for i in range(len(data_indices)):
        final_indices.append(idxs[data_indices[i]])
        
    # get back the original data
    # print(Z[final_indices[i]])

    return final_indices