In [48]:
"""Python script to run experiment and record the performance."""
import argparse
from pathlib import Path
import torch
from sklearn.metrics import accuracy_score
import json
from statsmodels.stats.proportion import proportion_confint

from tableshift import get_dataset
from tableshift.models.training import train
from tableshift.models.utils import get_estimator
from tableshift.models.default_hparams import get_default_config
from experiments_causal.metrics import balanced_accuracy_score
from sklearn.model_selection import cross_val_score


# Experiment with all features

In [49]:
experiment = "assistments"
model = "xgb"
cache_dir = "../tmp"
save_dir= "../tmp"

In [50]:
dset = get_dataset(experiment, cache_dir)
config = get_default_config(model, dset)
estimator = get_estimator(model, **config)

skillbuilder-data-2009-2010.zip: Skipping, found more recently modified local copy (use --force to force download)


Train on in-domain train set

In [51]:
estimator = get_estimator(model, **config)
estimator = train(estimator, dset, config=config)

Test in-domain train on id test.

In [52]:
X_te, y_te, _, _ = dset.get_pandas("id_test")
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.9404725283794633

Test in-domain train on ood test.

In [53]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
X_te.columns

Index(['hint_count', 'attempt_count', 'ms_first_response', 'position',
       'overlap_time', 'Average_confidence(FRUSTRATED)',
       'Average_confidence(CONFUSED)', 'Average_confidence(CONCENTRATING)',
       'Average_confidence(BORED)', 'problem_type_algebra',
       'problem_type_fill_in_1', 'problem_type_choose_1',
       'problem_type_choose_n', 'problem_type_rank',
       'problem_type_open_response', 'tutor_mode_tutor', 'type_LinearSection',
       'type_MasterySection', 'type_RandomChildOrderSection',
       'type_RandomIterateSection', 'type_PlacementsSection',
       'type_ChooseConditionSection', 'type_NumericLimitSection',
       'bottom_hint', 'skill_id', 'first_action'],
      dtype='object')

In [54]:
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.5839454354669464

In [55]:
nobs = len(y_te)
count = nobs * acc
# beta : Clopper-Pearson interval based on Beta distribution
acc_conf = proportion_confint(count, nobs, alpha=0.05, method="beta")
acc_conf

(0.5614396708422, 0.6061924645119001)

Perform cross-validation on ood test set

In [56]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
config = get_default_config(model, dset)
estimator = get_estimator(model, **config)
cross_val_score(estimator, X_te, y_te, cv=5)

array([0.7513089 , 0.7664042 , 0.77952756, 0.75853018, 0.75328084])

# Experiments with a random subset (no 228)

In [57]:
experiment = "assistments_random_test_228"
model = "xgb"
cache_dir = "../tmp"
save_dir= "../tmp"

In [58]:
dset = get_dataset(experiment, cache_dir)
config = get_default_config(model, dset)
estimator = get_estimator(model, **config)

skillbuilder-data-2009-2010.zip: Skipping, found more recently modified local copy (use --force to force download)


Train on in-domain train set

In [59]:
estimator = get_estimator(model, **config)
estimator = train(estimator, dset, config=config)

Test in-domain train on id test.

In [60]:
X_te, y_te, _, _ = dset.get_pandas("id_test")
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.9259141594399299

Test in-domain train on ood test.

In [61]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
X_te.columns

Index(['hint_count', 'attempt_count', 'ms_first_response', 'overlap_time',
       'Average_confidence(FRUSTRATED)', 'Average_confidence(CONFUSED)',
       'Average_confidence(CONCENTRATING)', 'problem_type_choose_1',
       'problem_type_algebra', 'problem_type_fill_in_1', 'problem_type_rank',
       'problem_type_choose_n', 'problem_type_open_response', 'bottom_hint',
       'first_action'],
      dtype='object')

In [62]:
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.9100055897149245

In [63]:
nobs = len(y_te)
count = nobs * acc
# beta : Clopper-Pearson interval based on Beta distribution
acc_conf = proportion_confint(count, nobs, alpha=0.05, method="beta")
acc_conf

(0.9048577313117038, 0.9149619549378666)

Perform cross-validation on ood test set

In [64]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
config = get_default_config(model, dset)
estimator = get_estimator(model, **config)
cross_val_score(estimator, X_te, y_te, cv=5)

array([0.9249501 , 0.92255489, 0.92694611, 0.92731629, 0.91733227])