In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('./csv/merged_score.csv')

In [3]:
# 准备数据
X = data[['director_rating', 'author_rating', 'actor_rating']]
y = data['rating']

In [None]:
print(X.head)

In [4]:
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# 构建随机森林模型
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# 预测
y_pred = rf.predict(X_test)

# 计算不同的误差度量
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
r2 = r2_score(y_test, y_pred)

# 打印误差度量
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Absolute Percentage Error (MAPE): {mape}%')
print(f'R-squared: {r2}')

Mean Squared Error (MSE): 0.41256655676459486
Root Mean Squared Error (RMSE): 0.6423134412143302
Mean Absolute Error (MAE): 0.46699040693692406
Mean Absolute Percentage Error (MAPE): 7.182090721402151%
R-squared: 0.6554630196226434
