In [2]:
import pandas as pd

from category_encoders import OneHotEncoder
from pathlib import Path
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeRegressor

from src.load_datasets import load_dataset

from src.evaluate_regression import get_rankings, average_spearman, custom_train_test_split

factors = ["dataset", "model", "tuning", "scoring"]
new_index = "encoder"
target = "cv_score"

# ---- load data...
DATA_DIR = Path("../../data/raw")

# ---- ... or split a dataset
df_train = load_dataset(DATA_DIR / "dataset_train.csv")
X_train, X_test, y_train, y_test = custom_train_test_split(df_train, factors, target)

# ---- predict
dummy_pipe = Pipeline([("encoder", OneHotEncoder()), ("model", DecisionTreeRegressor())])
y_pred = pd.Series(dummy_pipe.fit(X_train, y_train).predict(X_test), index=y_test.index, name="cv_score_pred")
df_pred = pd.concat([X_test, y_test, y_pred], axis=1)

# ---- convert to rankings and evaluate
rankings_test = get_rankings(df_pred, factors=factors, new_index=new_index, target="cv_score")
rankings_pred = get_rankings(df_pred, factors=factors, new_index=new_index, target="cv_score_pred")
print(average_spearman(rankings_test, rankings_pred))

Loading data ...
0.5006719994115977


In [9]:
df_pred

Unnamed: 0,dataset,model,tuning,scoring,encoder,cv_score,cv_score_pred
0,43098,KNC,full,AUC,BE,0.701196,0.693207
1,43098,KNC,full,AUC,BUCV10RGLMME,0.849776,0.858937
2,43098,KNC,full,AUC,BUCV10TE,0.846670,0.844146
3,43098,KNC,full,AUC,BUCV2RGLMME,0.853203,0.860167
4,43098,KNC,full,AUC,BUCV2TE,0.866123,0.859091
...,...,...,...,...,...,...,...
9060,43897,LGBMC,no,ACC,PBTE01,1.000000,1.000000
9061,43897,LGBMC,no,ACC,RGLMME,1.000000,1.000000
9062,43897,LGBMC,no,ACC,SE,1.000000,1.000000
9063,43897,LGBMC,no,ACC,TE,1.000000,1.000000


In [8]:
rankings_test

Unnamed: 0_level_0,3,3,3,3,3,3,3,29,29,29,...,43900,43900,43900,43922,43922,43922,43922,43922,43922,43922
Unnamed: 0_level_1,DTC,DTC,DTC,KNC,LGBMC,LR,LR,DTC,DTC,KNC,...,LGBMC,LR,LR,DTC,KNC,KNC,KNC,LGBMC,SVC,SVC
Unnamed: 0_level_2,full,no,no,full,no,full,no,full,model,full,...,no,model,no,full,full,full,no,no,no,no
Unnamed: 0_level_3,AUC,ACC,AUC,F1,F1,F1,F1,ACC,ACC,F1,...,ACC,ACC,AUC,AUC,ACC,F1,ACC,F1,ACC,F1
encoder,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4
BE,1,1,1,22,11,17,5,4,6,7,...,9,3.0,28,10,0,0,0,2,4,4
BUCV10RGLMME,1,1,1,17,3,23,23,7,9,5,...,18,19.0,3,14,17,18,14,13,21,21
BUCV10TE,1,1,1,10,1,25,24,6,10,9,...,19,25.0,1,12,12,12,13,11,12,12
BUCV2RGLMME,1,1,1,16,14,14,13,9,8,13,...,21,14.0,22,15,8,8,9,16,7,8
BUCV2TE,1,1,1,11,7,16,6,8,8,11,...,17,15.0,17,12,9,9,8,12,8,7
BUCV5RGLMME,1,1,1,15,6,24,8,1,8,8,...,20,16.0,14,14,14,14,14,13,14,15
BUCV5TE,1,1,1,9,10,20,3,7,10,2,...,20,23.0,9,13,11,11,10,13,9,9
CBE,3,4,3,29,13,28,28,16,12,12,...,5,1.0,16,20,22,25,19,14,1,0
CE,1,1,1,28,4,21,25,3,4,4,...,0,7.0,30,7,5,2,5,3,0,2
CV10RGLMME,1,1,1,19,22,5,12,0,0,25,...,7,11.0,27,18,19,22,14,6,23,23
