In [1]:
import warnings
import os
import pandas as pd
from pathlib import Path
from utils.config import config, table_translate, EvaluationIndicatorTable,tMp
warnings.filterwarnings('ignore')
sshshs = {
    "1":"国企",
    "2":"民营",
    "3":"外资"
}
doc_name = "熵权-Topsis分析阶段表格数据"
base_path = Path(os.path.abspath('.')) / 'assert'
works = {key: indicator for key, indicator in config['行业代码'].items()}
# dataset = dataset[(dataset["wufei"]>0.2)&(dataset["zhaochenyv"]>0.5)&(dataset["yuanchun"]>0.5)&(dataset["lishouxi"]>0.5)&(dataset["csmar"]>10)]
dataset = pd.read_csv('./data/制造企业绩效评价数据总表.csv', dtype={"股票代码": 'object'})
# 确定评价年份
dataset["行业名称"] = dataset["行业代码"].apply(lambda x: works[str(x)])
dataset["股权性质"] = dataset["股权性质"].apply(lambda x:sshshs[str(x)] if str(x) in sshshs else "其他")

# 最后进行指标逆向化
_max = dataset['存货周转率(%)'].max()
dataset['存货周转率(%)'] = dataset['存货周转率(%)'].apply(lambda x: _max-x)
dataset['EBITDA率(%)']  = dataset['EBITDA率(%)'] /1e8
dataset['员工收入增长率(%)']  = dataset['员工收入增长率(%)'] /100
years = sorted(list(set(dataset["截止日期"].tolist())))
if len(years)==1:
    year = 2023
    matrix = dataset[dataset['截止日期']==year].reset_index(drop=True).copy(deep=True)
    print('共获得{0}年, {1}个指标下{2}个样本企业数据'.format(year,matrix.shape[1], matrix.shape[0]))
else:
    dataset.sort_values(["股票代码",'截止日期'], inplace=True)
    matrix = dataset.set_index(["股票代码",'截止日期']).copy(deep=True)
    start, *y, end = years
    print(f'共获得{start}-{end}年，{matrix.shape[1]}个指标下{dataset[dataset["截止日期"]==2023].shape[0]}家企业{matrix.shape[0]}个样本企业数据')

共获得2018-2023年，41个指标下1436家企业8616个样本企业数据


In [2]:
# 样本结构分析
un_set = dataset[["股票代码","股票简称", "截止日期","行业名称","所属省份","股权性质"]]

def consum(ind):
    temp = un_set.groupby(["截止日期", ind]).count().reset_index().set_index(ind)
    temp2 = pd.DataFrame()
    for i in range(6):
        temp2[2018+i] = temp[temp["截止日期"]==2018+i][["股票代码"]]
    temp2["合计"] = temp2.sum(axis=1) 
    temp2.reset_index(inplace=True)
    temp2.fillna(0, inplace=True)
    table_translate(temp2,f"目标样本分{ind}逐年份变化情况表", doc_name)
consum("行业名称")
consum("所属省份")
consum("股权性质")
un_set.to_csv('./data/un_unit.csv')

In [3]:
# 统计学特征
import numpy as np
from sklearn.preprocessing import MinMaxScaler

ind_table = pd.DataFrame(EvaluationIndicatorTable)
# 计算公式
table_translate(ind_table, filename=doc_name, table_name="评价指标体系表")
metrix = matrix[ind_table["指标层"].tolist()].astype('float')
columns = [f"X{i+1}" for i in range(len(metrix.columns.tolist()))]
# 极值标准化
scaler = MinMaxScaler()
scale = scaler.fit_transform(metrix)
temp = pd.DataFrame(scale, index=matrix.index, columns=columns).reset_index()
temp = temp.iloc[:25].round(3)
temp.loc[temp.shape[0]] = ['...' for i in range(temp.shape[1])]
temp = pd.concat([temp.iloc[:,:5], pd.DataFrame([['...']] *16, columns=['...']), temp.iloc[:,-3:]], axis=1)
table_translate(temp, filename=doc_name, table_name="数据处理结果表")

temp = metrix.describe().T.round(3).reset_index(drop=True)
temp.index = [f"X{i+1}" for i in range(temp.shape[0])] 
temp.reset_index(inplace=True)
temp["count"] = temp["count"].astype(int)
table_translate(temp, filename=doc_name, table_name="数据样本描述性特征")

In [4]:
# 熵权-topsia

epsilon = 1e-10
p, i = scale.shape
temp = scale / (scale.sum(axis=0)+epsilon)
E = - 1 / np.log(p) * (temp*np.log(temp+epsilon)).sum(axis=0)
w = (1-E)/(1 - E).sum()
# topsis
temp = scale * w
_max = temp.max(axis=0)
_min = temp.min(axis=0)
#  计算到正负理想节解的距离
dax = np.sqrt((temp - _max)**2).sum(axis=1)
din = np.sqrt((temp - _min)**2).sum(axis=1)
score = din/(dax+din)
s = scale.sum(axis=1)
s = (s-s.min())/(s.max()-s.min())*0.1
# 不知道结果还对不对
score = pd.Series(score + s, index=matrix.index, name='score')
data = pd.merge(matrix, score, left_index=True, right_index=True, how='outer')

data.sort_values("score", ascending=False)
data.sort_index(inplace=True)
data.to_csv('./data/dataset.csv', index=False)
temp = data.reset_index()
temp = temp[temp["截止日期"]==2023].sort_values(["score"], ascending=False)

temp = temp.iloc[:30].round(3)
temp = temp[["股票简称", "净资产收益率(%)", "资产报酬率(%)","营业收入增长率(%)","总资产周转率(%)","社会责任报告质量","员工收入增长率(%)","数字化供应链覆盖度","数字化战略导向持续性","score"]]
r_map = {value:key for key,value in tMp.items()}
temp.rename(columns=r_map, inplace=True)
temp.reset_index(drop=True,inplace=True)
temp.loc[temp.shape[0]] = ['...' for i in range(temp.shape[1])]
temp = pd.concat([temp.iloc[:,:4], pd.DataFrame([['...']] *30, columns=['...']), temp.iloc[:,-5:]], axis=1)
table_translate(temp, filename=doc_name, table_name="熵权TOPSIS评价结果")
