# Using the ktb1989.py script

The ktb1989 script contains some utlity functions such as `trace()` and `isPosDef()`, but the main function from this script is `makePosDef()` which applies the algorithm.

In [1]:
import ktb1989
import numpy as np  # for example number 2
import pandas as pd  # for example number 2
np.set_printoptions(3, suppress=True, linewidth=100)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

## Example 1: Paper Example

This first example uses the matrix from the paper. The starting matrix is not positive definite.

In [2]:
# example matrix provided
R = np.array(
    [[1.000, 0.477, 0.644, 0.478, 0.651, 0.826],
     [0.477, 1.000, 0.516, 0.233, 0.682, 0.750],
     [0.644, 0.516, 1.000, 0.599, 0.581, 0.742],
     [0.478, 0.233, 0.599, 1.000, 0.741, 0.800],
     [0.651, 0.682, 0.581, 0.741, 1.000, 0.789],
     [0.826, 0.750, 0.742, 0.800, 0.798, 1.000]])
print(f"is matrix positive definite?: {ktb1989.isPosDef(R)}")
R

is matrix positive definite?: False


array([[1.   , 0.477, 0.644, 0.478, 0.651, 0.826],
       [0.477, 1.   , 0.516, 0.233, 0.682, 0.75 ],
       [0.644, 0.516, 1.   , 0.599, 0.581, 0.742],
       [0.478, 0.233, 0.599, 1.   , 0.741, 0.8  ],
       [0.651, 0.682, 0.581, 0.741, 1.   , 0.789],
       [0.826, 0.75 , 0.742, 0.8  , 0.798, 1.   ]])

To apply the algorithm, use the `makePosDef()` function with the first parameter being the matrix you want to make positive definite and the second parameter (*n*) being an integer greater than 0 specifying the top left *n* x *n* area to not be adjusted during the proccess.

In [3]:
G = ktb1989.makePosDef(R, n=4)
print(f"is matrix positive definite?: {ktb1989.isPosDef(G)}")
G

is matrix positive definite?: True


array([[1.   , 0.477, 0.644, 0.478, 0.642, 0.803],
       [0.477, 1.   , 0.516, 0.233, 0.67 , 0.718],
       [0.644, 0.516, 1.   , 0.599, 0.583, 0.746],
       [0.478, 0.233, 0.599, 1.   , 0.727, 0.763],
       [0.642, 0.67 , 0.583, 0.727, 1.   , 0.813],
       [0.803, 0.718, 0.746, 0.763, 0.813, 1.   ]])

Comparing results to the paper, metrics do not exactly match (possibly becuase of the algorithm used), but the matrix is in fact non negative definite.

Note how the residuals of the top left 4x4 are 0.

In [4]:
print(f"e(G): {0.5 * ktb1989.trace(np.matmul(G-R, G-R))}")
G - R

e(G): 0.003663688995357246


array([[ 0.   ,  0.   ,  0.   ,  0.   , -0.009, -0.023],
       [ 0.   ,  0.   ,  0.   ,  0.   , -0.012, -0.032],
       [ 0.   ,  0.   ,  0.   ,  0.   ,  0.002,  0.004],
       [ 0.   ,  0.   ,  0.   ,  0.   , -0.014, -0.037],
       [-0.009, -0.012,  0.002, -0.014,  0.   ,  0.024],
       [-0.023, -0.032,  0.004, -0.037,  0.015,  0.   ]])

## Example 2: Reordering the matrix

This example goes through a potential way to apply the function when the correlation factors you want to preserve are not already in correct spot. This example is a bit clunky, so it might be good to implement some funcationality in the future.

In [5]:
data = {'A': [1,2,3,4,5],
        'B': [2,4,1,3,5],
        'C': [4,5,3,1,2],
        'D': [3,5,1,2,4],
        'E': [2,5,3,1,4]}

R = pd.DataFrame(data,columns=['A','B','C', 'D', 'E']).corr()
R.iloc[0, 1] = R.iloc[1, 0] = 0.9  # make non-psd
print(f"is matrix positive definite?: {ktb1989.isPosDef(R)}")
R

is matrix positive definite?: False


Unnamed: 0,A,B,C,D,E
A,1.0,0.9,-0.8,-0.1,0.0
B,0.9,1.0,-0.1,0.8,0.5
C,-0.8,-0.1,1.0,0.5,0.6
D,-0.1,0.8,0.5,1.0,0.7
E,0.0,0.5,0.6,0.7,1.0


In [6]:
origIdx = R.columns
# reorder to preserve correlation factors between C and E
presIdx = ['C', 'E', 'A', 'B', 'D']  
tmpR = R.copy().loc[presIdx, presIdx]
tmpR = tmpR.to_numpy()
tmpG = ktb1989.makePosDef(tmpR, 2)  # n=2 for C and E
# convert back to dataframe and return to the original order
tmpG = pd.DataFrame(tmpG)
tmpG.index = tmpG.columns = presIdx
G = tmpG.loc[origIdx, origIdx]
print(f"is matrix positive definite?: {ktb1989.isPosDef(G)}")
G

is matrix positive definite?: True


Unnamed: 0,A,B,C,D,E
A,1.0,0.723,-0.754,0.002,-0.008
B,0.723,1.0,-0.151,0.688,0.508
C,-0.754,-0.151,1.0,0.529,0.6
D,0.002,0.688,0.529,1.0,0.695
E,-0.008,0.508,0.6,0.695,1.0


Note how the C vs E correlation factors residuals are 0

In [7]:
G - R  # residuals

Unnamed: 0,A,B,C,D,E
A,0.0,-0.177,0.046,0.102,-0.008
B,-0.177,0.0,-0.051,-0.112,0.008
C,0.046,-0.051,0.0,0.029,0.0
D,0.102,-0.112,0.029,0.0,-0.005
E,-0.008,0.008,0.0,-0.005,-0.0
