In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# 原始数据
data = {
    "Sport": [
        "Swimming", "Athletics", "Baseball", "Softball", "Breaking", "Cricket", 
        "Cycling-BMX Freestyle", "Cycling-Track", "Equastrian-Jumping", "Flag Football",
        "Football", "Gymnastics-Artistic", "Karate", "Lacrosse-Sixes", "Rowing-Coastal", 
        "Sailing", "Skateboarding", "Squash"
    ],
    "Safety_Fair_Play": [91.5, 56.5, 79, 75.5, 87, 81, 13, 88, 98.5, 100, 50, 79, 28, 95, 92, 87, 21, 98],
    "Gender_Equity": [91.44197, 97.32408, 76.92308, 76.92308, 96.9697, 88.88889, 100, 99.57447, 
                      34.28571, 83.63636, 87.11434, 98.94737, 95.12195, 80, 98.9858, 100, 100, 100],
    "Sustainability": [93.75, 80.00, 52.50, 52.50, 75.00, 27.50, 75.00, 85.00, 
                       100.00, 47.50, 83.75, 75.00, 15.00, 47.50, 60.00, 80.00, 85.00, 3.75],
    "Inclusivity": [93.103, 98.522, 9.852, 8.867, 7.881, 6.896, 8.374, 17.241, 
                    24.138, 15.763, 11.33, 24.631, 33.498, 4.433, 17.241, 32.02, 11.33, 28.079],
    "Relevance_Innovation": [93.75, 85, 75.74, 17.13, 17.13, 17.04, 17.04, 17.04, 
                             14.91, 14.91, 14.91, 13.63, 13.63, 13.33, 13.33, 11.49, 11.11, 2.43],
    "Popularity_Accessibility": [32.3013, 73.9318, 64.6017, 58.7433, 42.1427, 45.1777, 
                                 37.1283, 20.4252, 40.5952, 30.7501, 53.5431, 28.2461, 
                                 64.7109, 42.6731, 36.4481, 51.5331, 44.1441, 51.2571]
}

# 创建 DataFrame
df = pd.DataFrame(data)

# 对所有数值列进行对数变换并标准化
numerical_columns = [
    "Safety_Fair_Play", "Gender_Equity", "Sustainability", "Inclusivity", 
    "Relevance_Innovation", "Popularity_Accessibility"
]

# 对数变换（避免 log(0) 问题，+1）
for col in numerical_columns:
    df[f"{col}_Log"] = np.log(df[col] + 1)

# 标准化处理
scaler = MinMaxScaler()
log_columns = [f"{col}_Log" for col in numerical_columns]
df[log_columns] = scaler.fit_transform(df[log_columns])

# 查看处理结果
df_processed = df[["Sport"] + log_columns]
df_processed


Unnamed: 0,Sport,Safety_Fair_Play_Log,Gender_Equity_Log,Sustainability_Log,Inclusivity_Log,Relevance_Innovation_Log,Popularity_Accessibility_Log
0,Swimming,0.955512,0.915808,0.979104,0.980746,1.0,0.352256
1,Athletics,0.71492,0.974467,0.927814,1.0,0.970803,1.0
2,Baseball,0.882041,0.75334,0.792135,0.237925,0.936475,0.893789
3,Softball,0.859402,0.75334,0.792135,0.205202,0.501708,0.819074
4,Breaking,0.930274,0.971034,0.906971,0.168996,0.501708,0.559057
5,Cricket,0.894537,0.889177,0.586122,0.128569,0.500208,0.613357
6,Cycling-BMX Freestyle,0.0,1.0,0.906971,0.187576,0.500208,0.460371
7,Cycling-Track,0.935992,0.995985,0.947408,0.416516,0.500208,0.0
8,Equastrian-Jumping,0.992428,0.0,1.0,0.526805,0.462349,0.529881
9,Flag Football,1.0,0.831923,0.760038,0.387457,0.462349,0.314157
