Question 1:


In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [8]:
def sim_dissim(x, y, measure, *argv): 
    try:
        x = np.asarray(x)
        y = np.asarray(y)
    except Exception as e:
        print("Invalid format for datapoint(s)")
        return 

    if x.ndim != 1:
        print("Datapoint 1 is not 1-dimensional array")
        return

    if y.ndim != 1:
        print("Datapoint 2 is not 1-dimensional array")
        return
    
    if x.shape != y.shape:
        print("Datapoints are not of the same dimensions")
        return

    if measure == "EN":
        d_xy = x - y
        A = np.eye(d_xy.shape[0])
        dissim = np.sqrt(np.matmul(np.matmul(d_xy, A), d_xy))
        sim = 1 / (1 + dissim)
        print((dissim,sim))
        return

    elif measure == "HSN":
        d_xy = x - y
        A = np.ones((d_xy.shape[0], d_xy.shape[0]))
        dissim = np.sqrt(np.matmul(np.matmul(d_xy, A), d_xy))
        sim = 1 / (1 + dissim)
        print((dissim,sim))
        return

    elif measure == "DN":
       
        if len(argv) == 0:
            print("Entries for diagonal matrix not passed to the function")
 
        try:
            d = np.asarray(argv[0])
        except Exception as e:
            print("Invalid format for diagonal matrix entries")
                
        if d.ndim == 1:
            d = np.diag(d)
        else:

            try:
                diag = np.diagonal(d)
            except Exception as e:
                print("Matrix passed is not diagonal")
           
            if np.count_nonzero(d - np.diag(diag)) != 0:
                print("Matrix passed is not diagonal")
            
        if d.shape != (x.shape[0], x.shape[0]):
            print("The vector dimension isn't compatible")
            return
        d_xy = x - y
        out = np.matmul(np.matmul(d_xy, d), d_xy)

        if out>0:
            dissim = np.sqrt(out)
            sim = 1 / (1 + dissim)
        else:
            print("Dissimilarity turns out to be the square root of a negative number")
            return
        print((dissim,sim))
        return

    elif measure == "MN":
        if len(argv) == 0:
            print("Datapoints to calculate covariance matrix not passed to the function")
            return

        try:
            cov_dp = np.array(argv[0])
        except Exception as e:
            print("Invalid format for datapoints to create covariance matrix")
                
        if cov_dp.shape[1] != x.shape[0]:
            print("The shape of distribution does not conform with the data points")
            return

        d_xy = x - y
        cov_dp =  cov_dp - cov_dp.mean(axis=0)
        cov = np.dot(cov_dp.T, cov_dp) / (cov_dp.shape[0] - 1)
        try:
            A = np.linalg.inv(cov)
        except Exception as e:
            print("The distribution results in a non-invertible covariance matrix")
            return
        dissim = np.sqrt(np.matmul(np.matmul(d_xy, A), d_xy))
        sim = 1 / (1 + dissim)
        print((dissim,sim))
        return

    elif measure == "LMN":
        if len(argv) == 0:
            print("alpha value not provided")
        
        try:    
            alpha = float(argv[0])
        except Exception as e:
            print("Invalid alpha value")
        if alpha ==0:
          print("Similarity/ Dissimilarity measures not defined")
          return
        d_xy = x - y
        dissim = np.power(np.sum(abs(d_xy) ** alpha), 1/alpha)
        sim = 1 / (1 + dissim)
        print((dissim,sim))
        return

    elif measure == "CS":
        sim = np.sum(x * y) / np.sqrt(np.sum(x * x) * np.sum(y * y))
        dissim = (1 - sim) / sim
        print((dissim,sim))
        return    

    elif measure == "OS":
        if min(np.sum(x * x), np.sum(y * y)) == 0:
            print("Measures not defined; similarity has 0/0 form")
            return
        
        sim = np.sum(x * y) / min(np.sum(x * x), np.sum(y * y))
        dissim = (1 - sim) / sim
        print((dissim,sim))
        return    
    
    elif measure == "DS":
        sim = 2 * np.sum(x * y) / (np.sum(x * x) + np.sum(y * y))
        dissim = (1 - sim) / sim
        print((dissim,sim))
        return           
    
    elif measure == "JS":
        sim = np.sum(x * y) / (np.sum(x * x) + np.sum(y * y) - np.sum(x * y))
        dissim = (1 - sim) / sim
        print((dissim,sim))
        return
     
    else:
        print("Invalid measure")
        return
    
    return

Testing the code using given sample cases

In [5]:
sim_dissim([4, 12, 3, 5],[9, 8, 2, 3],'EN')
sim_dissim([4, 12, 3, 5],[9, 8, 2, 3],'HSN')
print()
print("Diagonal Testing")
sim_dissim([4, 12, 3, 5],[9, 8, 2, 3],'DN', [7, 1, 10, -1])
sim_dissim([1, 2, 4] ,[-6, 0, 9],'DN', [1, 1, 0, 1])
sim_dissim([1,-2] ,[-11, 4],'DN', [-11, 4])

(6.782329983125268, 0.12849622184722817)
(2.0, 0.3333333333333333)

Diagonal Testing
(14.035668847618199, 0.06650851452866428)
The vector dimension isn't compatible
Dissimilarity turns out to be the square root of a negative number


In [6]:
print("MN measure")
sim_dissim([4, 12, 3, 5],[9, 8, 2, 3],'MN', [[4,18,21,37],[14,7,2,11],[3,5,2,13],[11,34,47,26],[71, 2, 1, 15]])
sim_dissim([61, 53, 8] ,[7, -2, 10],'MN', [[-13,2,1,6],[17,5,-8,2],[44, 23,73,-11]])
sim_dissim([15, 41] ,[68, -29],'MN', [[-1,1],[0,0]])

MN measure
(3.510296756835771, 0.22171490123890578)
The shape of distribution does not conform with the data points
The distribution results in a non-invertible covariance matrix


In [7]:
# LMN Measure
sim_dissim([4, 12, 3, 5] ,[9, 8, 2, 3] ,'LMN', 6)
sim_dissim([2, 13, 7, 11, 5] ,[-8, 31, 1, 6, 52] ,'LMN', 0)

sim_dissim([4, 12, 3, 5] ,[9, 8, 2, 3] ,'CS')
sim_dissim([3, -2, 4] ,[4, -2, -4] ,'CS')
sim_dissim([4, 12, 3, 5] ,[9, 8, 2, 3] ,'OS')
sim_dissim([0, 0, 0] ,[4, -2, -4]  ,'OS')
sim_dissim([4, 12, 3, 5] ,[9, 8, 2, 3] ,'DS')
sim_dissim([4, 12, 3, 5] ,[9, 8, 2, 3] ,'JS')

(5.2006744337772055, 0.16127277938552234)
Similarity/ Dissimilarity measures not defined
(0.14429494027266954, 0.8739005695172557)
(inf, 0.0)
(0.03267973856209145, 0.9683544303797469)
Measures not defined; similarity has 0/0 form
(0.150326797385621, 0.8693181818181818)
(0.3006535947712418, 0.7688442211055276)
