In [12]:
import numpy as np
import pandas as pd

In [13]:
#定义欧式距离
def euclidean_distance(instance1, instance2):
    return np.linalg.norm(instance1 - instance2)

#定义无监督的Relief算法
def unsupervised_relief(X, num_neighbors=3):
    num_instances, num_features = X.shape
    weights = np.zeros(num_features)

    for i in range(num_instances):
        target_instance = X[i, :]

        # 找到最近的邻居
        neighbors_indices = np.argsort([euclidean_distance(target_instance, X[j, :]) for j in range(num_instances) if j != i])[:num_neighbors]

        # 计算特征权重
        for feature_idx in range(num_features):
            weights[feature_idx] += np.sum(target_instance[feature_idx] - X[neighbors_indices, feature_idx])

    return weights


In [14]:
data = pd.read_excel(r"all_samples_clean.xlsx", sheet_name="Sheet1")
data = data.drop(['BASIC_entity_name', 'BASIC_year'], axis=1)

#将非0值转换为1
data = data.applymap(lambda x: 1 if x != 0 else 0)

In [15]:
feature_weights = unsupervised_relief(data.values)


In [9]:
data.columns[np.argsort(feature_weights)[::-1]]

Index(['DILEMMAAB_FREECASHFLOW', 'FINANCINGAB_IDLEFUNDS',
       'SURPLUSAB_REVENUESTRUCTURE', 'RINVESTAB_COMBINEDPROFIT',
       'SURPLUSAB_PROFITSTRUCTURE', 'REVENUEAB_TAXREVENUE',
       'CASHAB_LIMITEDCASH', 'FINANCINGAB_HIGHINTEREST', 'COSTAB_SHOPPINGCASH',
       'RPARTYAB_VARNISHOCCUPATION', 'ASSETAB_STOCK',
       'REVENUEAB_REVENUEGROWTH', 'INVESTMENTAB_ASSETDISPOSAL',
       'RINVESTAB_GROUP', 'DILEMMAAB_OPCASH', 'REVENUEAB_SELLINGEXPENSES',
       'ASSETAB_PREPAIDQUALITY', 'SURPLUSAB_ACCRUALS', 'DEBTAB_DEBTSTRUCTURE',
       'REVENUEAB_SELLONCREDITRATIO', 'REVENUEAB_COSTREVENUE',
       'REVENUEAB_CASHREVENUE', 'RINVESTAB_GOODWILLQUALITY',
       'REVENUEAB_SELLONCREDITQUALITY', 'CASHAB_DEPOSIT',
       'DILEMMAAB_NEGAREVENUE', 'ASSETAB_GOODWILLQUALITY',
       'ASSETAB_SOFTASSETS', 'CASHAB_OPERATINGCASH', 'COSTAB_PROFITGROWTH',
       'COSTAB_PREPAIDEXPENSES', 'INVESTMENTAB_INVESTSTRATEGY',
       'COSTAB_PERIODCOST', 'DEBTAB_FINANCINGSTRUCTURE',
       'INVESTMENTAB_CONSTR

In [10]:
feature_weights

array([ 634.,  141.,  854.,  619., 1009.,  648., 1043.,  591., -102.,
        159.,  506.,  562.,  569., 1025.,  836.,  482., 1267.,  647.,
        940.,  319., 1026., 1184.,  962.,  463.,  530.,  726.,  768.,
        976.,  930.,  679.,  789., 1084., 1105.,  707.,  960., -407.,
       1019.,  348.,  845., 1093., 1107.])