In [3]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [4]:
# Set notebook display
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

# import data analysis libraries
import os
import numpy as np
import pandas as pd

In [5]:
# Set data path
data_path = os.path.join(os.path.abspath('..'), 'data', 'normalized_data.csv')

if os.path.exists(data_path):
    print('Input file:', data_path)
else:
    print('Please fix the path!')

Input file: /Users/rolf/Downloads/noncomp/rank/data/normalized_data.csv


In [6]:
# import raw data; set first column as index name
normalized_data = pd.read_csv(data_path, index_col = 0)

In [7]:
normalized_data.head()

Unnamed: 0_level_0,EnerCon,RD,Loan,TerInd,PubSpa,WatCon,Engle,Unemp,PopDen,GradeII,SolWas,WasWater,ConWas,PubBus,PasInt
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Beijing,0.893634,1.0,0.55492,1.0,0.466294,0.849516,0.517706,1.0,0.847562,0.328413,0.804689,0.598266,0.579581,0.159141,0.106449
Tianjin,0.817083,0.498335,0.392968,0.300174,0.160638,0.897654,0.417939,0.214286,0.82957,0.354244,0.982734,1.0,0.555371,0.099644,0.04195
Shijiazhuang,0.599517,0.56182,0.356321,0.448893,0.278362,0.963315,0.781277,0.175,0.691098,0.066421,0.922382,0.880058,0.380647,0.080405,0.049464
Qinhuangdao,0.659952,0.558269,0.458958,0.67247,1.0,0.811938,0.879188,0.260714,0.705688,0.590406,0.445588,0.953179,1.0,0.051713,0.026325
Baoding,0.651894,0.567148,0.413456,0.0,0.13877,0.950802,1.0,0.028571,0.379772,0.0,0.781403,0.751156,0.455508,0.186165,0.0353


In [8]:
thresholds_data_path = os.path.join(os.path.abspath('..'), 'data', 'thresholds.csv')

if os.path.exists(thresholds_data_path):
    print('Input file :', thresholds_data_path)
else:
    print('Please fix the path')

Input file : /Users/rolf/Downloads/noncomp/rank/data/thresholds.csv


In [9]:
thresholds = pd.read_csv(thresholds_data_path, index_col = 0)
thresholds

Unnamed: 0,EnerCon,RD,Loan,TerInd,PubSpa,WatCon,Engle,Unemp,PopDen,GradeII,SolWas,WasWater,ConWas,PubBus,PasInt
Preference threshold,0.779009,0.38313,0.41695,0.321251,0.115751,0.804843,0.378181,0.214286,0.702564,0.407749,0.778631,0.59646,0.536584,0.079932,0.043927
Indifference threshold,0.651894,0.21465,0.175335,0.0,0.051134,0.652279,0.0,0.0,0.462638,0.0,0.632029,0.283815,0.481663,0.0,0.0


In [10]:
def outranking_matrix(perf_matrix, threshold_matrix, weight, indif_pref_coeff, weak_pref_coeff):
    '''
    Parameter:
    perf_matrix: entity's performance matrix; DataFrame
    threshold_matrix: first row is perference threshold value of each indicator,
                      second row is indifference threshold value of each indicator
                      DataFrame
    weight: weight of each indicator, list
    indif_pref_coeff: coefficient of indifference, float
    weak_pref_coeff: coefficient of weak preference, float
    
    Return values include intermediate pair-wise comparison matrix: inter_value
    pair-matrix and outranking matrix.
    '''
    # pair-wise comparison
    import itertools
    
    m, n = perf_matrix.shape
    
    # pair-wise matrix of rows for comparison
    pair_matrix = pd.DataFrame(list(itertools.combinations(list(range(m)), 2)))
    
    i, j = pair_matrix.shape
    
    # Define a empty DataFrame for store the intermediate pair-wise comparison value
    inter_value = pd.DataFrame(index = pair_matrix.index, columns = perf_matrix.columns)
    
    # assume that comparison two entities a and b, if a's perform better than b on indicator j, the corresponding
    # element in intermediate pair_wise comparision matrix equals to 2. Similarly, if indiffence, equals to 1,
    # if weak perference, equals to 3. If b perform better than a on indicator j, the corresponding
    # element in intermediate pair_wise comparision matrix equals to 0.
    for i in range(i):
        for cn in range(n):
            if np.absolute(perf_matrix.iloc[pair_matrix.iat[i, 0], cn] - perf_matrix.iloc[pair_matrix.iat[i, 1], cn]) <= threshold_matrix.iloc[1, cn]:
                inter_value.iloc[i, cn] = 1
            elif perf_matrix.iloc[pair_matrix.iat[i, 0], cn] > perf_matrix.iat[pair_matrix.iloc[i, 1], cn] and np.absolute(perf_matrix.iat[pair_matrix.iloc[i, 0], cn] - perf_matrix.iat[pair_matrix.iloc[i, 1], cn]) > threshold_matrix.iloc[0, cn]:
                inter_value.iloc[i, cn] = 2
            elif threshold_matrix.iloc[1, cn] < np.absolute(perf_matrix.iat[pair_matrix.iloc[i, 0], cn] - perf_matrix.iat[pair_matrix.iloc[i, 1], cn]) <= threshold_matrix.iloc[0, cn]:
                inter_value.iloc[i, cn] = 3
            else:
                inter_value.iloc[i, cn] = 0
                
        
    for i in range(i+1):
        for cn in range(n):
            if inter_value.iloc[i, cn] == 1:
                inter_value.iloc[i, cn] = indif_pref_coeff * weight.iloc[0, cn]
            elif inter_value.iloc[i, cn] == 2:
                inter_value.iloc[i, cn] = weight.iloc[0, cn]
            elif inter_value.iloc[i, cn] == 3:
                inter_value.iloc[i, cn] = indif_pref_coeff * weight.iloc[0, cn]
            else:
                inter_value.iloc[i, cn] = 0
                
    pair_matrix['outranking_matrix_element_value'] = inter_value.sum(axis = 1)
    
    # generate outranking matrix
    outranking_matrix = pd.DataFrame(0, index = perf_matrix.index, columns = perf_matrix.index)
    
    for row in range(len(pair_matrix)):
        outranking_matrix.iloc[pair_matrix.iloc[row, 0], pair_matrix.iloc[row, 1]] = pair_matrix.iloc[row, 2]
        outranking_matrix.iloc[pair_matrix.iloc[row, 1], pair_matrix.iloc[row, 0]] = 1 - pair_matrix.iloc[row, 2]
                
    return outranking_matrix, inter_value, pair_matrix 

In [15]:
ind_weight_data_path = os.path.join(os.path.abspath('..'), 'data', 'ind_weight.csv')

if os.path.exists(thresholds_data_path):
    print('Input file :', ind_weight_data_path)
else:
    print('Please fix the path')

Input file : /Users/rolf/Downloads/noncomp/rank/data/ind_weight.csv


In [17]:
ind_weight = pd.read_csv(ind_weight_data_path)

In [18]:
outranking_matrix, inter_value, pair_matrix = outranking_matrix(normalized_data, thresholds, ind_weight, 1/2, 1/2)

TypeError: can't multiply sequence by non-int of type 'float'

In [None]:
outranking