In [1]:
!pip install kumaraswamy



In [2]:
from kumaraswamy import kumaraswamy
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
import numpy as np

from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as sch
import pdb


## Experiments with Kumaraswamy Function

In [3]:
d1 = kumaraswamy(a=0.1, b=0.7)

In [4]:
d1.pdf(0.9)

array(0.30210796)

In [5]:
d1.cdf(0.21)

array(0.7418337)

In [6]:
d1.var

0.07907948744725794

In [7]:
d1.mean

0.17121255852442666

### CCTE

In [8]:
from scipy.special import hyp2f1
from scipy.special import betainc
#The incomplte Beta function


In [9]:
def num_1(alpha,t,a,b,theta):
    v1=-(1/a)
    v2=b
    v3=1+b
    v4=(1-alpha)**(1/b)
##This is the first term in the numerator
    p1=(t-1)*(theta*t-1)
    p2=(1-alpha)*(hyp2f1(v1,v2,v3,v4))
    return p1*p2 #The product of 2  terms is the first term
    

In [10]:
def num_2(alpha,t,a,b,theta):
    #This gives the negative of the second term
    a3=1+(1/a)
    p=alpha**b
    b3=2*b
    p3=2*(theta)*t*(t-1)*b
    p4=[betainc(a3,b,1)+betainc(a3,b3,1)-betainc(a3,b,p)-betainc(a3,b3,p)]
    prod=p4[0]*p3
    return prod


In [11]:
#This defines the FGM Copula for kumaraswamy distribution
# Some checks required
def FGM_copula(alpha,t,a,b,theta,a1,b1):
    d1 = kumaraswamy(a, b)
    d2 = kumaraswamy(a1, b1)
    s1 = d1.cdf(alpha)
    s2 = d2.cdf(t)
    copula=(s1)*(s2)+theta*(s1)*(s2)*(1-s1)*(1-s2)
    return copula
    
    
    
def deno(alpha,t,a,b,theta,a1,b1):
    t1=1-alpha-t
    t2= FGM_copula(alpha,t,a,b,theta,a1,b1)
    return t1+t2
    

In [12]:
FGM_copula(0.3,0.5,0.7,0.8,2,0.6,0.8)

0.32232193347566945

In [13]:
#Here a,b, alpha are for first distribution and a1,b1,t are for second distribution
def ccte_fun(alpha,a,b,t,a1,b1,theta):
    num=num_1(alpha,t,a,b,theta)+num_2(alpha,t,a,b,theta)
    den=deno(alpha,t,a,b,theta,a1,b1)
    return num/den
    

In [14]:
ccte_fun(0.2,0.5,0.8,0.9,0.8,0.9,0.4)

-0.3503350944639705

In [15]:

num_2(0.2,0.7,0.8,0.9,4)

-2.838277127208187

In [16]:
import numpy as np
def matrix(n):
    a=(n,n)
    return np.zeros(a)

In [17]:
c=matrix(3)

In [18]:
import numpy as np

In [19]:
c

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [20]:
c[1,2]=25

In [21]:
c

array([[ 0.,  0.,  0.],
       [ 0.,  0., 25.],
       [ 0.,  0.,  0.]])

In [22]:
def update_matrix(mat,i,j,alpha,a,b,t,a1,b1,theta):
    #input here whcih element we want to update(exact index)
    mat[i-1,j-1]=ccte_fun(alpha,a,b,t,a1,b1,theta)
    

In [23]:
update_matrix(c,2,2,0.2,0.5,0.8,0.9,0.8,0.9,0.4)

In [24]:
c

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.        , -0.35033509, 25.        ],
       [ 0.        ,  0.        ,  0.        ]])

### The OOP part

In [25]:
class ccte:
    """
        A class used to get ccte of bivariate kumaraswamy distribution
        
        ...
        
        Attributes
        ------
        alpha: float
            The percentile for the first kumaraswamy variable
        a,b: float
            The kumaraswamy distribution parameters for the first variable
        t: float    
            The percentile for the first kumaraswamy variable
        a1,b1: float
            The kumaraswamy distribution parameters for the second variable
        ccte_value : float
            Stores the value of CCTE
        
        
        
        Methods
        ------
        ccte_calc:float
            Stores the ccte of first varible with repect to another 
            This assumes both the variables are kumaraswamy and copula used is FGM copula
            Stores the value of CCTE in ccte_value
    """

    def __init__(self, alpha,a,b,t,a1,b1):
       
        """
            Parameters
            ------
            alpha: float
                The percentile for the first kumaraswamy variable
            a,b: float
                The kumaraswamy distribution parameters for the first variable
            t: float    
                The percentile for the first kumaraswamy variable
            a1,b1: float
                The kumaraswamy distribution parameters for the second variable
        """
        self.alpha = alpha
        self.a = a
        self.b = b
        self.t = t
        self.a1 = a1
        self.b1 = b1
     
             
       
            
        
       
       
        
        
    def ccte_calc(self,theta):
        
        """ Returns the ccte(conditional copula tail expectation) for first variable with repect to second
         and the copula used is FGM copula
         
         
        Parameters
        ------
        Theta: float
            The dependence paramater of FGM copula 
        """
        
        alpha=self.alpha
        a=self.a
        b=self.b
        t=self.t
        a1=self.a1
        b1=self.b1
        self.ccte_value=ccte_fun(alpha,a,b,t,a1,b1,theta)
        return ccte_fun(alpha,a,b,t,a1,b1,theta)

In [26]:
obj=ccte(0.2,0.5,0.8,0.9,0.8,0.3)

In [27]:
obj.ccte_calc(0.2)

-0.21156365084895895

In [74]:
class dispersion_matrix:
    
    """
        A class used to store the dispersion matrix of a network
        
        ...
        
        Attributes
        ------
        n: int
            stores the value of size of matrix
        matrix: array(2D)
            Stores the zeros matrix of size n*n
        p: array(2D)
            Stores the updated matrix
            
            
        Methods
        ------
        matrix_def():
            builds a n*n matrix with all entries as zeros
        update_mat(i,j,alpha,a,b,t,a1,b1,theta)
            updates the (i,j) eleemnt of the dispersion matrix with the CCTE of i-th element with repect to jth 
            element
        num_1(alpha,t,a1,b1,theta):
            Gives the first term of the numerator(to be used internally for getting the ccte)
        num_2(self,alpha,t,a1,b1,theta):
            Gives the second term of the numerator(to be used internally for getting the ccte)
        FGM_copula(alpha,t,a1,b1,theta,a2,b2)
            Gives the FGM copula of 2 kumaraswamy distributions given the percentile alpha(corresponding params
            a1 and b1) and t(corresponding params a2 and b2)
        deno(alpha,t,a1,b1,theta,a2,b2)
            Gives the Denominator for ccte(to be used internally for getting the ccte)
        ccte_fun(self,alpha,a1,b1,t,a1,b1,theta)
            Calculates the ccte of kumaraswamy varible with parameters a1,b1 with respect to the one 
            with parameters a2,b2  
        update_clus_mat(self,i,j,alpha,k1,t,k2,theta)
            Gets and updates the distance matrix on which the clustering has to be done 
            The matrix is symmetric and is passed to hirarchical cluster maker
       """
    
    
    def __init__(self,n):
        
        """
        Parameters
        ------
        n: Thse size of dispersion matrix
        """
        self.n=n
        
    def num_1(self,alpha,t,k1,theta):
        """
      The function to gives the first term of the numerator to calculate the ccte 
        Parameters
        ------
        alpha: float
            The percentile for the first kumaraswamy variable
        k1: Object
            The kumaraswamy_scaled object which contains kumaraswamy distribution
        t: float    
            The percentile for the first kumaraswamy variable
            
        Returns
        ------
        The first term in the numerator to calculate the ccte
        """
        v1=-(1/k1.a)
        v2=k1.b
        v3=1+k1.b
        v4=(1-alpha)**(1/k1.b)
    
        p1=(t-1)*(theta*t-1)
        p2=(1-alpha)*(hyp2f1(v1,v2,v3,v4))
        p3=(k1.upper-k1.lower)*p1*p2
        return p1*p2 #The product of 2  terms is the first term
    
    def num_2(self,alpha,t,k2,theta):
        """
      The function to gives the neagtive of the second term of the numerator to calculate the ccte
      
        Parameters
        
        ------
        alpha: float
            The percentile for the first kumaraswamy variable
        k1: Object
            The kumaraswamy_scaled object which contains the first kumaraswamy distribution
        k2: Object
            The kumaraswamy_scaled object which contains the second kumaraswamy distribution
        t: float    
            The percentile for the second kumaraswamy variable
            
            
        Returns
        ------
        
        The neagtive of the second term in the numerator to calculate the ccte
        """
        
   
        a3=1+(1/k2.a)
        p=alpha**k2.b
        b3=2*k2.b
        p3=2*(theta)*t*(t-1)*k2.b
        p4=[betainc(a3,k2.b,1)+betainc(a3,b3,1)-betainc(a3,k2.b,p)-betainc(a3,b3,p)]
        prod=(k2.upper-k2.lower)*p4[0]*p3
        return prod
    
    def num_3(self,alpha,t,k1,k2,theta):
        """
      The function to gives the neagtive of the second term of the numerator to calculate the ccte
      
        Parameters
        
        ------
        alpha: float
            The percentile for the first kumaraswamy variable
        k1: Object
            The kumaraswamy_scaled object which contains the first kumaraswamy distribution
        k2: Object
            The kumaraswamy_scaled object which contains the second kumaraswamy distribution
        t: float    
            The percentile for the second kumaraswamy variable
            
            
        Returns
        ------
        
        The neagtive of the second term in the numerator to calculate the ccte
        """
        
   
        m=1-alpha-t+alpha*t
        n=theta*alpha*t*(1-alpha)*(1-t)
        k=k1.lower*(m+n)
        return k
    
    def FGM_copula(self,alpha,t,k1,theta,k2):
        """
         
        Gives the FGM copula of 2 kumaraswamy distributions given the percentile alpha(corresponding params
        a1 and b1) and t(corresponding params a2 and b2)
       
        Parameters
        
        ------
        alpha: float
            The percentile for the first kumaraswamy variable
        k1: Object
            The kumaraswamy_scaled object which contains first kumaraswamy distribution
        k2: Object
            The kumaraswamy_scaled object which contains second kumaraswamy distribution    
        t: float    
            The percentile for the second kumaraswamy variable
        Theta: float
            The dependence paramter of the FGM copula
            
        Returns
        ------
        The FGM copula between two input kumaraswamy variables with given percentiles
        
        
        """
        d1 = kumaraswamy(k1.a, k1.b)
        d2 = kumaraswamy(k2.a, k2.b)
        s1 = d1.cdf(alpha)
        s2 = d2.cdf(t)
        copula=(s1)*(s2)+theta*(s1)*(s2)*(1-s1)*(1-s2)
        return copula
    
    def deno(self,alpha,t,k1,theta,k2):
        """
        The function calculates the denominator that is used to calculate the ccte internally
        
        Parameters
        
        ------
        alpha: float
            The percentile for the first kumaraswamy variable
        k1: Object
            The kumaraswamy_scaled object which contains first kumaraswamy distribution
        k2: Object
            The kumaraswamy_scaled object which contains second kumaraswamy distribution 
        t: float    
            The percentile for the second kumaraswamy variable
        Theta: float
            The dependence paramter of the FGM copula
            
            
        Returns
        ------
        The denominator to calculate the ccte
        """
        t1=1.-alpha-t
        t2= self.FGM_copula(alpha,t,k1,theta,k2)
        return t1+t2
    
    def ccte_fun(self,alpha,k1,t,k2,theta):
        """
        Parameters
        
        ------
        alpha: float
            The percentile for the first kumaraswamy variable
        k1: Object
            The kumaraswamy_scaled object which contains first kumaraswamy distribution
        k2: Object
            The kumaraswamy_scaled object which contains second kumaraswamy distribution 
        t: float    
            The percentile for the first kumaraswamy variable
        Theta: float
            The dependence paramter of the FGM copula
            
        Returns
        
        --------
        
        Calculates the ccte of kumaraswamy varible with parameters a1,b1 with respect to the one 
        with parameters a2,b2 
        """
        num=self.num_1(alpha,t,k1,theta)+self.num_2(alpha,t,k1,theta)+self.num_3(alpha,t,k1,k2,theta)
        den=self.deno(alpha,t,k1,theta,k2)
        return num/den
    
        
    def matrix_def(self):
        
        """  builds a n*n matrix with all entries as zeros
         
         
        Parameters
        ------
        None
        
        
        Returns
        -------
        A n*n matrix with all zeroes
        
        """
        self.matrix=(self.n,self.n)
        self.p=np.zeros(self.matrix)
        return self.p
    
    def update_mat(self,i,j,alpha,k1,t,k2,theta):
        """ Updates the (i,j) element of a matrix with the CCTE
         Stores the updated matrix in p(attribute)
        Parameters
        ------
        i,j: Index of the matrics
        alpha: float
            The percentile for the first kumaraswamy variable
        a1,b1: float
            The kumaraswamy distribution parameters for the first variable
        t: float    
            The percentile for the first kumaraswamy variable
        a2,b2: float
            The kumaraswamy distribution parameters for the second variable
            
        Returns
        ------
        An updated matrix p
         """
        
        self.p[i-1,j-1]=self.ccte_fun(alpha,k1,t,k2,theta)
        self.p[j-1,i-1]=self.ccte_fun(t,k2,alpha,k1,theta)
        return self.p
    
    def matrix_clus(self):
        
        """  builds a n*n matrix with all entries as zeros
        This stores basically the true dispersion matrix on which clustering is to be done
        To be used for getting the linkage matrix
         
         
        Parameters
        ------
        None
        
        
        Returns
        -------
        A n*n matrix with all zeroes
        
        """
        self.matrix2=(self.n,self.n)
        self.p2=np.zeros(self.matrix2)
        return self.p2
    
    def update_clus_mat(self,i,j,alpha,k1,t,k2,theta):
        """ Updates the (i,j) element of a matrix with the CCTE
         Stores the updated matrix in p(attribute)
        Parameters
        ------
        i,j: Index of the matrics
        alpha: float
            The percentile for the first kumaraswamy variable
        a1,b1: float
            The kumaraswamy distribution parameters for the first variable
        t: float    
            The percentile for the first kumaraswamy variable
        a2,b2: float
            The kumaraswamy distribution parameters for the second variable
            
        Returns
        ------
        An updated matrix p2
        """
        t1=self.ccte_fun(alpha,k1,t,k2,theta)
        t2=self.ccte_fun(t,k2,alpha,k1,theta)
        self.p2[i-1,j-1]=np.abs(t1-t2)
        self.p2[j-1,i-1]=np.abs(t1-t2)
        return self.p2
         

In [45]:
# link=sch.linkage(obj2.p2,'ward')
# def quasidiag(link):
#     link=link.astype(int)
#     sortix=pd.Series([link[-1,0],link[-1,1]])
#     numitems=link[-1,3]
#     while sortix.max()>=numitems:
#         sortix.index=range(0,sortix.shape[0]*2,2)
#         df0=sortix[sortix>=numitems]
#         i=df0.index;j=df0.values-numitems
#         sortix[i]=link[j,0]
#         df0=pd.Series(link[j,1],index=i+1)
#         sortix=sortix.append(df0)
#         sortix=sortix.sort_index()
#         sortix.index=range(sortix.shape[0])
#     return sortix.tolist()


In [46]:
# sortix=quasidiag(link)

In [47]:
import hdbscan
#!pip install hdbscan




In [48]:
#obj2.update_clus_mat(1,2,0.4,obj3,0.9,obj4,-0.9)
# obj2.update_clus_mat(1,3,0.6,obj3,0.8,obj4,0.3)
# obj2.update_clus_mat(2,3,0.9,obj3,0.2,obj4,0.5)
"""
The function to do the soft clustering
We pass in the object (matrix) on which we  want to do the soft clustering
Output:
An Object named clustere with following attributes:
labels_: The labels of the cluster(Returned as an array)

"""
clusterer = hdbscan.HDBSCAN(metric='precomputed',allow_single_cluster=True, min_cluster_size=2,leaf_size=40,
                             prediction_data=False,cluster_selection_method='leaf')
clusterer.fit(obj2.p2)

HDBSCAN(allow_single_cluster=True, cluster_selection_method='leaf',
        metric='precomputed', min_cluster_size=2)

In [49]:
clusterer.labels_


array([-1, -1, -1, -1, -1])

In [50]:
clusterer.probabilities_

array([0., 0., 0., 0., 0.])

In [51]:
obj2=dispersion_matrix(5)

In [52]:
obj2.ccte_fun(0.4,obj3,0.9,obj4,-0.9)

51.72030584359141

In [53]:
obj2.matrix_def()

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [54]:
obj2.matrix_clus()

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [55]:
obj2.update_mat(1,2,0.9,obj3,0.9,obj3,0.9)
obj2.update_clus_mat(1,2,0.4,obj3,0.9,obj4,-0.9)
obj2.update_clus_mat(1,3,0.6,obj3,0.8,obj4,0.3)
obj2.update_clus_mat(2,3,0.9,obj3,0.2,obj4,0.5)
obj2.update_clus_mat(4,3,0.8,obj3,0.1,obj4,0.5)

array([[  0.        ,  70.9629968 ,  74.6571718 ,   0.        ,
          0.        ],
       [ 70.9629968 ,   0.        , 437.49666795,   0.        ,
          0.        ],
       [ 74.6571718 , 437.49666795,   0.        , 172.0872235 ,
          0.        ],
       [  0.        ,   0.        , 172.0872235 ,   0.        ,
          0.        ],
       [  0.        ,   0.        ,   0.        ,   0.        ,
          0.        ]])

In [40]:
from kumaraswamy import kumaraswamy
class kumaraswamy_scaled:
    """
    The Class that stores scaled kumaraswamy distribution
     ...
        
        Attributes
        ------
        a,b: Float 
            parameters of Kumaraswamy Distribution
        upper, lower: Float
            Stores the upper and lower limits of the disrtibution
        z: Float
            Stores scaled and shifted value of x(the input)
        pdf: Float
            Stores pdf of the distribution at some value x
        cdf: Float
            Stores cdf of the distribution at some value x
        mean: Float
             Stores mean of the distribution
        var: Float
              Stores variance of the distribution
            
        Methods
        ------
        get_z(x):
            gets the z(normalized)(scaled and shifted) value for corrsponding x
        get_pdf(x):
            returns and stores pdf of a value x
        get_cdf(x):
            returns and stores cdf of a value x
        get_mean():
            returns mean of kumaraswamy distribution and stores it
        get_var():
            returns variance of kumaraswamy distribution and stores it
    """
    def __init__(self,a,b,upper,lower):
        """
        Parameters
        ------
        a,b: Float 
            parameters of Kumaraswamy Distribution
        upper, lower: Float
            Stores the upper and lower limits of the disrtibution
            
        """
        
           
        self.a=a
        self.b=b
        self.upper=upper
        self.lower=lower
        self.scale=upper-lower
            
    def get_z(self,x):
        """
        A function that stores and returnsthe z(normalized)(scaled and shifted) value for corrsponding x
        Parameters
        ------
        x:float
            The value for which we want the pdf(or cdf or z)
        
        
        Returns
        -------
        Z (scaled to (0,1) and shifted)value corresponding to a given x
        
        
        """
        self.z=(x-self.lower)/(self.upper-self.lower)
        return self.z
            
            
  
    
    def get_pdf(self,x):
        """
        A function that stores and returns the pdf  for corrsponding x of given kumaraswamy distribution
        
        Parameters
        ------
        x:float
            The value for which we want the pdf
        
        
        Returns
        -------
        Pdf  for corrsponding x of given kumaraswamy distribution
        
        
        """
        y=self.get_z(x)
        
        d2=kumaraswamy(a=self.a,b=self.b)
        m=d2.pdf(y)
        self.pdf=np.asscalar(m)
        return self.pdf
    
    def get_cdf(self,x):
        """
        A function that stores and returns the cdf  for corrsponding x of given kumaraswamy distribution
        
        Parameters
        ------
        x:float
            The value for which we want the cdf
        
        
        Returns
        -------
        Cdf  for corrsponding x of given kumaraswamy distribution
        
        """
        y=self.get_z(x)
        ## Is this a good practice
       
        d2=kumaraswamy(a=self.a,b=self.b)
        z=d2.cdf(y)
        self.cdf=np.asscalar(z)
        return self.cdf
    
    def get_mean(self):
        """
        A function that returns the mean of the kumaraswamy Distribution
        
        Parameters
        ------
        None
        
        
        Returns
        -------
        Mean of the given Kumaraswmy distribution
        
        """
       
        d2=kumaraswamy(a=self.a,b=self.b)
        l=d2.mean
        self.mean=l*(self.scale)+self.lower
        return self.mean
        
    def get_var(self):
        """
        A function that returns the variance of the kumaraswamy Distribution
        
        Parameters
        ------
        None
        
        
        Returns
        -------
        variance of the given Kumaraswmy distribution
        
        """
        d2=kumaraswamy(self.a,self.b)
        t=d2.var
        self.var=(t)*(self.scale)*(self.scale)
        return self.var
        

In [41]:
obj3=kumaraswamy_scaled(0.2,0.3,1000,2000)
obj4=kumaraswamy_scaled(0.7,0.2,1000,2000)

In [42]:
obj3.get_mean()

1466.3558011326863

In [66]:
df=pd.read_excel('corr.xlsx',sheet_name='Correlation',header=None)

In [68]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24,25,26,27,28,29,30,31,32,33
0,1.0,-0.003616,0.191295,0.059066,0.221421,0.158205,0.110269,0.673965,0.671962,0.033263,...,0.559101,0.539796,0.473224,0.220153,0.305329,0.268242,0.447573,0.448218,0.485444,0.377629
1,-0.003616,1.0,-0.031602,0.164292,-0.001988,0.085168,0.141005,0.073333,0.066716,0.059686,...,0.098552,0.170419,0.190175,0.229424,0.13224,0.154183,0.252097,0.430386,0.216193,0.127292
2,0.191295,-0.031602,1.0,0.12001,0.287449,0.239269,0.081493,0.181841,0.268853,0.044625,...,0.537711,0.365616,0.341748,0.208149,0.595961,0.104598,0.526644,0.252838,0.520425,0.365842
3,0.059066,0.164292,0.12001,1.0,0.149847,0.219502,0.207587,0.067859,0.131026,0.011395,...,0.113018,0.254926,0.234628,0.229739,0.312007,0.030293,0.250024,0.482357,0.548507,0.197314
4,0.221421,-0.001988,0.287449,0.149847,1.0,0.446366,0.255191,0.37348,0.510839,0.062124,...,0.596752,0.676041,0.730001,0.296364,0.333871,0.478328,0.657423,0.353073,0.564996,0.228454
5,0.158205,0.085168,0.239269,0.219502,0.446366,1.0,0.266165,0.366995,0.310179,0.109673,...,0.315521,0.627233,0.579172,0.230947,0.208206,0.470991,0.633621,0.531877,0.519249,0.235488
6,0.110269,0.141005,0.081493,0.207587,0.255191,0.266165,1.0,0.176119,0.170971,0.066437,...,0.222872,0.443623,0.307323,0.22786,0.159946,0.251845,0.346246,0.540793,0.565416,0.221595
7,0.673965,0.073333,0.181841,0.067859,0.37348,0.366995,0.176119,1.0,0.441492,0.119471,...,0.410133,0.537181,0.539207,0.241559,0.04807,0.639895,0.551069,0.585129,0.390522,0.309035
8,0.671962,0.066716,0.268853,0.131026,0.510839,0.310179,0.170971,0.441492,1.0,0.092507,...,0.657835,0.638397,0.593747,0.244544,0.261214,0.244975,0.606785,0.531698,0.506217,0.361647
9,0.033263,0.059686,0.044625,0.011395,0.062124,0.109673,0.066437,0.119471,0.092507,1.0,...,0.078795,0.325917,0.160626,0.133269,0.062509,-0.030396,0.468638,0.085822,0.086249,0.034072


In [70]:
df2=pd.read_excel('corr.xlsx',sheet_name='Kumaraswamy')

In [71]:
df2

Unnamed: 0,to,amt,max,min,alpha.Beta,beta.Beta,a,b
0,2,7.834747,1.713574,0.462978,4.230424,1.372099,3.940428,1.411288
1,3,45.814858,1.833043,0.023198,4.792532,1.999158,4.158376,2.152386
2,4,10.174886,2.072848,0.095693,5.182623,1.820489,4.468674,1.912106
3,5,11.02021,1.617005,0.936618,10.558261,6.334219,5.840634,9.470849
4,6,2.20531,1.337403,0.251299,10.339555,7.81431,5.398973,12.815833
5,7,2.408677,1.579118,0.618965,3.312512,1.877721,2.910719,1.949058
6,8,1.836295,1.373142,0.032063,4.015201,2.645853,3.235312,2.928555
7,9,2.227306,1.804357,0.602901,3.210955,1.361734,3.005632,1.373999
8,10,3.312765,1.455199,0.349497,3.773078,1.771805,3.361205,1.854693
9,14,44.145612,1.709072,0.101871,5.444404,1.320444,5.09493,1.327197


In [73]:
obj_final=dispersion_matrix(34)

In [77]:
obj_final.matrix_clus()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [83]:
obj_final.p2.shape

(34, 34)

In [94]:
for i in range(34):
    for j in range(34):
        if i==j:
            pass
        else:
            a1=df2.iloc[i]["a"]
            b1=df2.iloc[i]["b"]
            max1=df2.iloc[i]["max"]
            min1=df2.iloc[i]["min"]
            a2=df2.iloc[j]["a"]
            b2=df2.iloc[j]["b"]
            max2=df2.iloc[j]["max"]
            min2=df2.iloc[j]["min"]
            t1=i+1
            t2=j+1
            obj1=kumaraswamy_scaled(a1,b1,max1,min1)
            obj2=kumaraswamy_scaled(a2,b2,max2,min2)
            obj_final.update_clus_mat(t1,t2,0.4,obj1,0.9,obj1,-0.9)
            

In [101]:
c=np.nan_to_num(obj_final.p2)
link=sch.linkage(c,'ward')
def quasidiag(link):
    link=link.astype(int)
    sortix=pd.Series([link[-1,0],link[-1,1]])
    numitems=link[-1,3]
    while sortix.max()>=numitems:
        sortix.index=range(0,sortix.shape[0]*2,2)
        df0=sortix[sortix>=numitems]
        i=df0.index;j=df0.values-numitems
        sortix[i]=link[j,0]
        df0=pd.Series(link[j,1],index=i+1)
        sortix=sortix.append(df0)
        sortix=sortix.sort_index()
        sortix.index=range(sortix.shape[0])
    return sortix.tolist()

  


In [102]:
sortix=quasidiag(link)

In [104]:
"""
Creating Covariance matrix
From Covariance martix
"""



'\nCreating Covariance matrix\nFrom Covariance martix\n'

In [117]:
p=[]
for i in range(34):
    a1=df2.iloc[i]["a"]
    b1=df2.iloc[i]["b"]
    max1=df2.iloc[i]["max"]
    min1=df2.iloc[i]["min"]
    objt=kumaraswamy_scaled(a1,b1,max1,min1)
    ## How to handle the math overflow error do we set the variance to zero or 1
    try:
        c=objt.get_var()
    except OverflowError:
        c = 0.
    p.append(c)
    

In [120]:
D = np.diag(p)

In [122]:
D.shape

(34, 34)

In [123]:
k=df.to_numpy()

In [125]:
"""
This is the Covariance Matrix
"""
S = D*k*D

In [184]:
dfg=pd.DataFrame(S)
def get_cluster_var(cov, c_items):
    cov_ = cov.iloc[c_items, c_items] # matrix slice
    # calculate the inversev-variance portfolio
    ivp = 1./np.diag(cov_)
    ivp/=ivp.sum()
    w_ = ivp.reshape(-1,1)
    c_var = np.dot(np.dot(w_.T, cov_), w_)[0,0]
    return c_var

def get_rec_bipart(cov, sort_ix):
    # compute HRP allocation
    # intialize weights of 1
    w = pd.Series(1, index=sort_ix)
    
    # intialize all items in one cluster
    c_items = [sort_ix]
    while len(c_items) > 0:
        # bisection
        """
        [[3, 6, 0, 9, 2, 4, 13], [5, 12, 8, 10, 7, 1, 11]]
        [[3, 6, 0], [9, 2, 4, 13], [5, 12, 8], [10, 7, 1, 11]]
        [[3], [6, 0], [9, 2], [4, 13], [5], [12, 8], [10, 7], [1, 11]]
        [[6], [0], [9], [2], [4], [13], [12], [8], [10], [7], [1], [11]]
        """
        c_items = [i[int(j):int(k)] for i in c_items for j,k in 
                   ((0,len(i)/2),(len(i)/2,len(i))) if len(i)>1]
        
        # now it has 2
        for i in range(0, len(c_items), 2):
            
            c_items0 = c_items[i] # cluster 1
            c_items1 = c_items[i+1] # cluter 2
            
            c_var0 = get_cluster_var(cov, c_items0)
            c_var1 = get_cluster_var(cov, c_items1)
            print(c_var0)
            print(c_var1)
            
            alpha = 1 - c_var0/(c_var0+c_var1)
            print(alpha)
            if alpha=='NaN':
                w[c_items0] =np.multiply(w[c_items0], 0.00000005)
                w[c_items0] =np.multiply(w[c_items0],1.)
            else:    
                w[c_items0] =np.multiply(w[c_items0], alpha)
                w[c_items0] =np.multiply(w[c_items0],1-alpha)
    return w

In [194]:
get_rec_bipart(dfg, sortix)

  """
  


nan
nan
nan
nan
nan
nan
3.411083608398114e-05
nan
nan
3.218723885241761e-05
nan
nan
nan
nan
nan
4.086849687206887e-05
0.0002062930711555182
0.8346486583726993
1.3324778332800795e-05
nan
nan
0.00014885961864039091
4.1066961258228834e-05
0.2162254555426093
0.00020258932665770334
nan
nan
nan
nan
nan
nan
0.00020771417062285695
nan
0.0010704451826096715
4.2490750036616516e-05
0.03817897220335398
0.0015460008067496608
0.00023805882877272135
0.13343658700232663
1.343399093791344e-05
0.0016390502835082023
0.9918704273646318
3.648806340879166e-05
nan
nan
0.0001817129320018672
0.0008233482407757285
0.8192021173201988
0.00010478628650352075
6.753452507018992e-05
0.39191160053991425
0.00021630366856149845
0.0031952546374346114
0.9365968131978498
0.0051304644167982656
nan
nan
nan
nan
nan
nan
0.0007049370508811661
nan
nan
nan
nan
0.0002711170640984663
0.0008882064054786523
0.7661420033208156
0.0016629280937762452
0.00300442988896368
0.6437110459652293
0.0027533199992750028
4.315676905614033e-05
0.01

23    NaN
10    NaN
18    NaN
21    NaN
17    NaN
12    NaN
13    NaN
11    NaN
16    NaN
14    NaN
15    NaN
20    NaN
19    NaN
22    NaN
4     NaN
6     NaN
5     NaN
8     NaN
7     NaN
9     NaN
3     NaN
1     NaN
0     NaN
2     NaN
27    NaN
32    NaN
29    NaN
26    NaN
24    NaN
25    NaN
33    NaN
28    NaN
30    NaN
31    1.0
dtype: float64

In [190]:
!pip install PyPortfolioOpt

