In [1]:
import warnings
import os
import pandas as pd
from pathlib import Path
warnings.filterwarnings('ignore')


doc_name = "熵权-Topsis分析阶段表格数据"
base_path = Path(os.path.abspath('.')) / 'assert'

# 是否筛选数字化程度高一点的数据
# dataset = dataset[(dataset["wufei"]>0.2)&(dataset["zhaochenyv"]>0.5)&(dataset["yuanchun"]>0.5)&(dataset["lishouxi"]>0.5)&(dataset["csmar"]>10)]
dataset = pd.read_csv('./data/制造企业绩效评价数据总表.csv', dtype={"股票代码": 'object'})
dataset = dataset[dataset['截止日期']==2023].reset_index(drop=True)
dataset.drop(["截止日期","wufei", "zhaochenyv", "yuanchun", "lishouxi", "csmar"], axis=1, inplace=True)
print(f"筛选保留2023年数据{dataset.shape[0]}个。")

筛选保留2023年数据1515个。


In [2]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from utils.config import table_translate
from utils.config import EvaluationIndicatorTable, pos_indicators


data = dataset.copy(deep=True)
# 将构建的指标体系转化为表格形式
ind_table = pd.DataFrame(EvaluationIndicatorTable)
# 计算公式
table_translate(ind_table, filename=doc_name, table_name="评价指标体系表")
# 指标逆向化
for name in pos_indicators:
    max_ = data[name].max()
    data[name] = data[name].apply(lambda x: max_ - x)

un_unit = data[["股票代码", "股票简称", '行业代码', '所属省份']].set_index("股票简称")
data = data.set_index("股票简称")[ind_table["指标层"].tolist()].astype('float')
# 极值标准化
scaler = MinMaxScaler()
scale = scaler.fit_transform(data)
epsilon = 1e-10
p, i = scale.shape
temp = scale / (scale.sum(axis=0)+epsilon)
E = - 1 / np.log(p) * (temp*np.log(temp+epsilon)).sum(axis=0)
w = (1-E)/(1 - E).sum()
# topsis
temp = scale * w
_max = temp.max(axis=0)
_min = temp.min(axis=0)
#  计算到正负理想节解的距离
dax = np.sqrt((temp - _max)**2).sum(axis=1)
din = np.sqrt((temp - _min)**2).sum(axis=1)
score = din/(dax+din)
s = scale.sum(axis=1)
s = (s-s.min())/(s.max()-s.min())*0.05
score = pd.Series(score + s, index=data.index, name='score')
data = pd.merge(data, score, left_index=True, right_index=True, how='outer').reset_index()
data = pd.merge(un_unit, data,on=["股票简称"], how='outer').sort_values(['股票代码']).reset_index(drop=True)
data.to_csv('./data/dataset.csv', index=False)