In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr

df = pd.read_csv('./train.csv')
X = df.drop(columns=['Inhibition','ID'])
y = df['Inhibition']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

# RandomForest 사용
rf = RandomForestRegressor(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

# A 점수 산출
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
y_range = y_test.max() - y_test.min()
norm_rmse = rmse / y_range

# B 점수 산출
pearson_corr, _ = pearsonr(y_test, y_pred)

# 최종 score 산출
score = 0.5 * (1 - min(norm_rmse, 1)) + 0.5 * pearson_corr

print(f'Normalized RMSE: {norm_rmse:.4f}')
print(f'Pearson Correlation: {pearson_corr:.4f}')
print(f'Score: {score:.4f}')

Normalized RMSE: 0.2592
Pearson Correlation: 0.2217
Score: 0.4813
