In [1]:
from openml import tasks

import pandas as pd

from keywords import oml_task_ids

In [2]:
oml_tasks = tasks.get_tasks(oml_task_ids)
oml_datasets = [oml_task.get_dataset() for oml_task in oml_tasks]



In [3]:
oml_data = [(oml_dataset.name, *oml_dataset.get_data()) for oml_dataset in oml_datasets]

df_datasets = pd.DataFrame(
    data=[(
        oml_task_id,
        name,
        X.shape[0],
        X.shape[1]-1,
        X.loc[X.isnull().any(axis=1), :].shape[0],
        sum(categorical_indicator[:-1])
    ) for oml_task_id, (name, X, _, categorical_indicator, _) in zip(oml_task_ids, oml_data)],
    columns=['id', 'name', 'n', 'p', '# na rows', '# categorical columns']
).sort_values(by=['id'], axis=0).reset_index(drop=True)

print(df_datasets.to_latex(index=False))
df_datasets

\begin{tabular}{rlrrrr}
\toprule
id & name & n & p & # na rows & # categorical columns \\
\midrule
37 & diabetes & 768 & 8 & 0 & 0 \\
43 & spambase & 4601 & 57 & 0 & 0 \\
3903 & pc3 & 1563 & 37 & 0 & 0 \\
3904 & jm1 & 10885 & 21 & 5 & 0 \\
3913 & kc2 & 522 & 21 & 0 & 0 \\
3918 & pc1 & 1109 & 21 & 0 & 0 \\
9946 & wdbc & 569 & 30 & 0 & 0 \\
10093 & banknote-authentication & 1372 & 4 & 0 & 0 \\
146819 & climate-model-simulation-crashes & 540 & 18 & 0 & 0 \\
146820 & wilt & 4839 & 5 & 0 & 0 \\
167120 & numerai28.6 & 96320 & 21 & 0 & 0 \\
168350 & phoneme & 5404 & 5 & 0 & 0 \\
189922 & gina & 3153 & 970 & 0 & 1 \\
190137 & ozone-level-8hr & 2534 & 72 & 0 & 0 \\
190392 & madeline & 3140 & 259 & 0 & 1 \\
190410 & philippine & 5832 & 308 & 0 & 1 \\
359955 & blood-transfusion-service-center & 748 & 4 & 0 & 0 \\
359962 & kc1 & 2109 & 21 & 0 & 0 \\
359972 & sylvine & 5124 & 20 & 0 & 1 \\
359975 & Satellite & 5100 & 36 & 0 & 0 \\
\bottomrule
\end{tabular}



Unnamed: 0,id,name,n,p,# na rows,# categorical columns
0,37,diabetes,768,8,0,0
1,43,spambase,4601,57,0,0
2,3903,pc3,1563,37,0,0
3,3904,jm1,10885,21,5,0
4,3913,kc2,522,21,0,0
5,3918,pc1,1109,21,0,0
6,9946,wdbc,569,30,0,0
7,10093,banknote-authentication,1372,4,0,0
8,146819,climate-model-simulation-crashes,540,18,0,0
9,146820,wilt,4839,5,0,0
