In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
data = pd.read_csv("sukhumvit_hotel_Nclean.csv")

data.head()

Unnamed: 0,Check-in Process,Convenience,Cost-effectiveness,Dining,Facilities & Environment,Guest Impressions,Hygiene,Location,Room,Service & Staff,Satisfaction rating
0,1,2,1,0,4,2,0,2,1,1,4.7
1,0,0,0,1,3,1,0,0,1,0,5.0
2,0,0,0,0,0,3,0,1,0,0,5.0
3,0,1,0,0,2,0,0,0,0,1,5.0
4,0,0,0,0,1,1,0,1,1,0,4.7


In [6]:
#  feature and target 
X = data.drop(columns=['Satisfaction rating'])  
y = data['Satisfaction rating']

In [30]:
# normalization 
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).ravel()

In [58]:
# traning data and testing data 
kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [80]:
# traing BP neural network
mlp = MLPRegressor(hidden_layer_sizes=(9,), activation='tanh', solver='lbfgs',
                   alpha=0.001, max_iter=10000, random_state=42)
mlp.fit(X_train, y_train)

In [82]:
# MES and R2
mse_scores = []
r2_scores = []

for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y_scaled[train_index], y_scaled[test_index]
    
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    mse_scores.append(mse)
    r2_scores.append(r2)

In [84]:

mean_mse = np.mean(mse_scores)
mean_r2 = np.mean(r2_scores)


mean_mse, mean_r2

(0.9297268715234941, 0.06834408849399844)

In [86]:

y_pred = mlp.predict(X_test)

In [92]:

# 计算 MLP 的输入层权重重要性
input_layer_weights = np.abs(mlp.coefs_[0])  # 取输入层权重的绝对值
feature_importance = input_layer_weights.sum(axis=1)  # 每个输入特征的权重求和

# 构造特征重要性 DataFrame
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importance})

# 计算占比
feature_importance_df['Proportion'] = feature_importance_df['Importance'] / feature_importance_df['Importance'].sum()

# 排序，取前 10 个重要特征
top_10_features = feature_importance_df.sort_values(by='Importance', ascending=False).head(10)

# 显示前 10 个影响因素
top_10_features

Unnamed: 0,Feature,Importance,Proportion
4,Facilities & Environment,53.512895,0.169152
0,Check-in Process,39.850938,0.125967
1,Convenience,37.802384,0.119492
2,Cost-effectiveness,35.509547,0.112244
9,Service & Staff,33.311728,0.105297
5,Guest Impressions,30.936512,0.097789
8,Room,25.093578,0.07932
7,Location,20.61569,0.065165
3,Dining,20.550771,0.06496
6,Hygiene,19.175471,0.060613
