<h1 style='text-align: center; color: blue;'>ME 781: Assignment 3 (Dissimilarity and Similarity measures)</h1>
<h3 style='text-align: right; color: red;'>~ Shubham Lohiya, 18D100020</h3>

In [1]:
import numpy as np

In [2]:
def get_measure_func(abbrev):
    norm = np.linalg.norm
    def sim2dis(sim): # convert similarity to dissimilarity
        return 1/sim - 1
    
    def dis2sim(dis): # convert dissimilarity to similarity
        return 1/(1 + dis)

    def euclidean(a, b, dummy_param):
        dis = np.sqrt(np.sum((a-b)**2))
        return dis, dis2sim(dis)

    def frobenius(a, b, dummy_param):
        x = a-b
        A = np.ones((a.shape[1], a.shape[1]))
        dis = np.sqrt(np.squeeze(x @ A @ x.T))
        return dis, dis2sim(dis)

    def diagonal(a, b, weights):
        try: weights = np.asarray(weights)
        except: raise ValueError("diag distance weights input is not array-like")
        
        if np.prod(weights.shape) != a.shape[1]:
            raise ValueError("diagonal norm weights input has invalid dimension.")

        try: A = np.diag(weights)
        except: raise ValueError("Incorrect input format of weights for diagonal norm calculation. Please pass a 1-d array of weights.")
        x = a-b
        dis = np.squeeze(x @ A @ x.T)
        if dis < 0:
            raise ValueError("dissimilarity turns out to be the square root of a negative number")
        dis = np.sqrt(dis)
        return dis, dis2sim(dis)

    
    def mahalanobis(a, b, additional_data):
        additional_data = np.asarray(additional_data)
        if not additional_data.shape[1] == a.shape[1]:
            raise ValueError("Incorrect format of additional_data for calculation of covariance matrix.")
        x = a-b

        try: A = np.linalg.inv(np.cov(additional_data, rowvar=False))
        except: raise ValueError("The distribution results in a non-invertible covariance matrix.")

        assert len(A) == a.shape[1]

        dis = np.sqrt(np.squeeze(x @ A @ x.T))
        return dis, dis2sim(dis)


    def lebesgue(a, b, alpha):
        if alpha < 1:
            raise ValueError("Similarity/ Dissimilarity measures not defined")
        dis = np.sum(np.abs(a-b)**alpha)**(1/alpha)
        return dis, dis2sim(dis)

    def cosine(a, b, dummy_param):
        sim = np.squeeze(a @ b.T) / (norm(a)*norm(b))
        return sim2dis(sim), sim

    def overlap(a, b, dummy_param):
        sim = np.squeeze((a @ b.T)) / min(norm(a), norm(b))**2
        if np.isnan(sim):
            print('Measures not defined; similarity has 0/0 form')
            return
        return sim2dis(sim), sim

    def dice(a, b, dummy_param):
        sim = 2*np.squeeze(a @ b.T) / (norm(a)**2 + norm(b)**2)
        return sim2dis(sim), sim

    def jaccard(a, b, dummy_param):
        intersection = np.squeeze(a @ b.T)
        union = norm(a)**2 + norm(b)**2 - intersection
        sim = intersection/union
        return sim2dis(sim), sim

    
    measures = {'EN': euclidean, 'HSN': frobenius, 'DN': diagonal,
                'MN': mahalanobis, 'LMN': lebesgue, 'CS': cosine,
                'OS': overlap, 'DS': dice, 'JS': jaccard}

    return measures[abbrev]

In [3]:
def get_measure(a, b, measure='EN', additional_data=None):
    try: a, b = np.asarray(a), np.asarray(b)
    except: raise ValueError("a or b are not array-like")

    if len(a.shape) > 2 or len(b.shape) > 2:
        raise ValueError("a and b must be 1-d numpy arrays.")
    a = a.reshape((1, np.prod(a.shape)))
    b = b.reshape((1, np.prod(b.shape)))

    if a.shape[1] != b.shape[1]:
        raise ValueError("dimensions of a and b do not match.")

    try: func = get_measure_func(measure)
    except: raise ValueError("incorrect measure abbreviation passed.")

    if measure in {'DN', 'MN', 'LMN'} and additional_data is None:
        raise ValueError(f"the additional_data parameter is compulsory for {measure} type measure.")

    return func(a, b, additional_data)

# Measures Demo

## Euclidean Norm

In [4]:
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='EN')

(6.782329983125268, 0.12849622184722817)

## Frobenius or Hilbert Schmidt Norm

In [5]:
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='HSN')

(2.0, 0.3333333333333333)

## Diagonal Norm

In [6]:
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='DN', additional_data=[7, 1, 10, -1])

(14.035668847618199, 0.06650851452866428)

In [7]:
get_measure([1, 2, 4], [-6, 0, 9], measure='DN', additional_data=[1, 1, 0, 1])

ValueError: diagonal norm weights input has invalid dimension.

In [8]:
get_measure([1,-2], [-11, 4], measure='DN', additional_data=[-11, 4])

ValueError: dissimilarity turns out to be the square root of a negative number

## Mahalanobis Norm

In [9]:
additional_data = [[4,18,21,37], [14,7,2,11], [3,5,2,13], [11,34,47,26], [71, 2, 1, 15]]
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='MN', additional_data=additional_data)

(3.5102967568357557, 0.22171490123890655)

In [10]:
additional_data = [[-13,2,1,6], [17,5,-8,2], [44, 23,73,-11]]
get_measure([61, 53, 8], [7, -2, 10], measure='MN', additional_data=additional_data)

ValueError: Incorrect format of additional_data for calculation of covariance matrix.

In [11]:
additional_data = [[-1,1], [0,0]]
get_measure([15, 41], [68, -29], measure='MN', additional_data=additional_data)

ValueError: The distribution results in a non-invertible covariance matrix.

## Lebesgue or Minkowski Norm

In [12]:
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='LMN', additional_data=6)

(5.2006744337772055, 0.16127277938552234)

In [13]:
get_measure([2, 13, 7, 11, 5], [-8, 31, 1, 6, 52], measure='LMN', additional_data=0)

ValueError: Similarity/ Dissimilarity measures not defined

## Cosine similarity

In [14]:
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='CS')

(0.14429494027266965, 0.8739005695172556)

In [15]:
get_measure([3, -2, 4], [4, -2, -4], measure='CS')

  after removing the cwd from sys.path.


(inf, 0.0)

## Overlap Similarity

In [16]:
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='OS')

(0.03267973856209139, 0.9683544303797469)

In [17]:
a = get_measure([0, 0, 0], [4, -2, -4], measure='OS')

Measures not defined; similarity has 0/0 form




## Dice Similarity

In [18]:
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='DS')

(0.15032679738562105, 0.8693181818181818)

## Jaccard Similarity

In [19]:
get_measure([4, 12, 3, 5], [9, 8, 2, 3], measure='JS')

(0.30065359477124187, 0.7688442211055276)