# Processing

## Preparation

In [2]:
import numpy as np
from collections import namedtuple
from src.decorators import benchmark
from src.structures import DefaultOrderedDict

## Spatial harmonization

In [53]:
# TODO should it work inplace?
def binary_jaccard(arr1, arr2, return_matrix=False):
    """
    Calculates the jaccard index of two equal sized binary arrays or vectors.
    If return_matrix is set to true the method provides the jaccard index
    and the necessary calculation matrix as a named tuple.
    
    :param arr1: numpy.ndarray, list, tuple
    :param arr2: numpy.ndarray, list, tuple
    :param return_matrix: boolean, optional
    :return: float OR (float, namedtuple)
    """
    A, B = np.array(arr1, dtype=np.int8), np.array(arr2, dtype=np.int8)
    
    if np.sum(np.logical_or(A<0,A>1)) != 0 or np.sum(np.logical_or(B<0,B>1)) != 0:
        raise ValueError('Attributes should contain only binary attributes!')
  
    C = A + B
    a = (B - C) + B
    b = (A - C) + A
    
    # Total number of attributes where A == 1 and B == 1
    m11 = np.sum(C==2)
    # Total number of attributes where A == 1 and B == 0
    m10 = np.sum(a==-1)
    # Total number of attributes where A == 0 and B == 1
    m01 = np.sum(b==-1)
    
    jaccard = m11 / (m10 + m01 + m11)
    
    if return_matrix:
        Matrix = namedtuple('Matrix', 'm11 m10 m01 m00')
        return jaccard, Matrix(m11, m10, m01, 0)
    return jaccard

def simple_matching_coefficient(arr1, arr2, return_matrix=False):
    """
    :param arr1: numpy.ndarray, list, tuple
    :param arr2: numpy.ndarray, list, tuple
    :param return_matrix: boolean, optional
    :return: float OR (float, namedtuple)
    """
    _, matrix = binary_jaccard(arr1, arr2, True)
    A = np.array(arr1, dtype=np.int8)
    
    # Total number of attributes where A == 0 and B == 0
    m00 = A.size - sum(matrix)
    
    smc = (matrix.m11 + m00) / A.size

    if return_matrix:
        matrix = matrix._replace(m00=m00)
        return smc, matrix
    return smc