In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
class NodeMgr:
    def __init__(self, distanceDf, clusterDf):
        self.distanceDf = distanceDf             
        self.clusterDf = clusterDf
        self.report = {'score':[], 'node':[], 'clusterId':[]}
        
    def appendReport(self, score, nodeName, clusterId):
        self.report['score'].append(np.round(score,2))
        self.report['node'].append(nodeName)
        self.report['clusterId'].append(clusterId)
    def getFinalReport(self):
        finalDf = pd.DataFrame(self.report)
        finalDf.sort_values(by=['clusterId', 'score'], ascending=[True, False])
        return finalDf        
    
    def getDistanceByClusterId(self, nodeName, clusterId):
        distances = pd.Series([], dtype=np.float64)
        distanceDfColumns = self.distanceDf.columns
        clusterNodes = obj.clusterDf[obj.clusterDf['cluster']==clusterId]['node']
        indexClusterNodes = list(set(self.distanceDf.index).intersection(set(clusterNodes)))    
        columnClusterNodes = list(set(self.distanceDf.columns).intersection(set(clusterNodes)))
        
        if nodeName in distanceDfColumns and len(indexClusterNodes) > 0:
            result1 = distances.append(self.distanceDf.loc[indexClusterNodes, nodeName])
            result1 = result1[result1.notna()]            
            if len(result1) > 0:
                distances = distances.append(result1)    
                
        if nodeName in self.distanceDf.index and len(columnClusterNodes) > 0:
            result2 = self.distanceDf.loc[nodeName, columnClusterNodes]
            result2 = result2[result2.notna()]
            if len(result2) > 0:
                distances = distances.append(result2)
            
        return distances
    
    def getScores(self):        
        clusterIds = np.unique(self.clusterDf['cluster'])
        for i in np.arange(0, clusterIds.shape[0]):
            currentClusterId = clusterIds[i]
            nodeNames = self.clusterDf[self.clusterDf['cluster'] == currentClusterId]['node']
            outsideClusterIds = clusterIds[clusterIds != currentClusterId]    
            for nodeName in nodeNames:
                aScore = np.mean(self.getDistanceByClusterId(nodeName, currentClusterId))
                bScores = np.zeros(len(outsideClusterIds))       
                for j in np.arange(0, len(outsideClusterIds)):
                    bScores[j] = np.mean(self.getDistanceByClusterId(nodeName, outsideClusterIds[j]))       
                bScore = np.min(bScores)
                sScore = (bScore - aScore)/np.maximum(aScore,bScore) 
                self.appendReport(sScore, nodeName,currentClusterId)
        return self.getFinalReport()             

In [3]:
proxData = pd.DataFrame(data=np.array([
                       [5.58, np.nan, np.nan, np.nan,np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], 
                       [7.00,6.50, np.nan, np.nan, np.nan,np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], 
                       [7.08,7.00,3.83, np.nan, np.nan, np.nan,np.nan, np.nan, np.nan, np.nan,np.nan], 
                       [4.83,5.08,8.17,5.83, np.nan, np.nan, np.nan,np.nan, np.nan, np.nan,np.nan],
                       [2.17, 5.75, 6.67, 6.92, 4.92,np.nan, np.nan, np.nan,np.nan, np.nan,np.nan],
                       [6.42,5.00,5.58,6.00,4.67,6.42,np.nan, np.nan, np.nan,np.nan, np.nan],
                       [3.42,5.50,6.42,6.42,5.00,3.92,6.17,np.nan, np.nan, np.nan,np.nan],
                       [2.50,4.92,6.25,7.33,4.50,2.25,6.33,2.75,np.nan, np.nan, np.nan],
                       [6.08,6.67,4.25,2.67,6.00,6.17,6.17,6.92,6.17,np.nan, np.nan],
                       [5.25,6.83,4.50,3.75,5.75,5.42,6.08,5.83,6.67,3.67, np.nan],
                       [4.75,3.00,6.08,6.67,5.00,5.58,4.83,6.17,5.67,6.50,6.92]],
                      dtype=np.float64),
                        columns=['BEL', 'BRA', 'CHI', 'CUB', 'EGY','FRA','IND','ISR','USA','USS','YUG'],
                      index=['BRA', 'CHI', 'CUB', 'EGY','FRA','IND','ISR','USA','USS','YUG', 'ZAI'],)

print(proxData)

      BEL   BRA   CHI   CUB   EGY   FRA   IND   ISR   USA   USS   YUG
BRA  5.58   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN
CHI  7.00  6.50   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN
CUB  7.08  7.00  3.83   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN
EGY  4.83  5.08  8.17  5.83   NaN   NaN   NaN   NaN   NaN   NaN   NaN
FRA  2.17  5.75  6.67  6.92  4.92   NaN   NaN   NaN   NaN   NaN   NaN
IND  6.42  5.00  5.58  6.00  4.67  6.42   NaN   NaN   NaN   NaN   NaN
ISR  3.42  5.50  6.42  6.42  5.00  3.92  6.17   NaN   NaN   NaN   NaN
USA  2.50  4.92  6.25  7.33  4.50  2.25  6.33  2.75   NaN   NaN   NaN
USS  6.08  6.67  4.25  2.67  6.00  6.17  6.17  6.92  6.17   NaN   NaN
YUG  5.25  6.83  4.50  3.75  5.75  5.42  6.08  5.83  6.67  3.67   NaN
ZAI  4.75  3.00  6.08  6.67  5.00  5.58  4.83  6.17  5.67  6.50  6.92


In [5]:
clusters = pd.DataFrame({'node':['BEL', 'BRA','EGY', 'FRA', 'ISR','USA','ZAI', 'CHI','CUB','IND','USS', 'YUG'],
                          'cluster':[0,0,0,0,0,0,0,1,1,1,1,1]})
print('******CLUSTER CONFIGURATION*****\n{}'.format(clusters))

obj = NodeMgr(proxData, clusters)
obj.getScores()

#z = '{:<04}'
#output = z.format(str) 

******CLUSTER CONFIGURATION*****
   node  cluster
0   BEL        0
1   BRA        0
2   EGY        0
3   FRA        0
4   ISR        0
5   USA        0
6   ZAI        0
7   CHI        1
8   CUB        1
9   IND        1
10  USS        1
11  YUG        1


Unnamed: 0,score,node,clusterId
0,0.39,BEL,0
1,0.22,BRA,0
2,0.2,EGY,0
3,0.35,FRA,0
4,0.3,ISR,0
5,0.43,USA,0
6,0.19,ZAI,0
7,0.33,CHI,1
8,0.4,CUB,1
9,-0.04,IND,1
