In [2]:
"""Python script to run experiment and record the performance."""
from pathlib import Path
from sklearn.metrics import accuracy_score
from statsmodels.stats.proportion import proportion_confint


from tableshift import get_dataset
from tableshift.models.training import train
from tableshift.models.utils import get_estimator
from tableshift.models.default_hparams import get_default_config
from sklearn.model_selection import cross_val_score

import xgboost as xgb


  from .autonotebook import tqdm as notebook_tqdm


# Experiment with all features

In [None]:
experiment = "assistments"
model = "xgb"
cache_dir = "../tmp"
save_dir= "../tmp"

In [3]:
dset = get_dataset(experiment, cache_dir)

Train on in-domain train set

In [4]:
estimator = xgb.XGBClassifier()
X_train, y_train, _, _ = dset.get_pandas("train")
estimator = estimator.fit(X_train,y_train)

Test in-domain train on id test.

In [5]:
X_te, y_te, _, _ = dset.get_pandas("id_test")
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.9404725283794633

Test in-domain train on ood test.

In [6]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
X_te.columns

Index(['hint_count', 'attempt_count', 'ms_first_response', 'position',
       'overlap_time', 'Average_confidence(FRUSTRATED)',
       'Average_confidence(CONFUSED)', 'Average_confidence(CONCENTRATING)',
       'Average_confidence(BORED)', 'problem_type_algebra',
       'problem_type_fill_in_1', 'problem_type_choose_1',
       'problem_type_choose_n', 'problem_type_rank',
       'problem_type_open_response', 'tutor_mode_tutor', 'type_LinearSection',
       'type_MasterySection', 'type_RandomChildOrderSection',
       'type_RandomIterateSection', 'type_PlacementsSection',
       'type_ChooseConditionSection', 'type_NumericLimitSection',
       'bottom_hint', 'skill_id', 'first_action'],
      dtype='object')

In [7]:
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.5839454354669464

In [8]:
nobs = len(y_te)
count = nobs * acc
# beta : Clopper-Pearson interval based on Beta distribution
acc_conf = proportion_confint(count, nobs, alpha=0.05, method="beta")
acc_conf

(0.5614396708422, 0.6061924645119001)

Perform cross-validation on ood test set

In [9]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
estimator = xgb.XGBClassifier()
cross_val_score(estimator, X_te, y_te, cv=5)

array([0.7513089 , 0.7664042 , 0.77952756, 0.75853018, 0.75328084])

# Experiments with a random subset (no 228)

In [10]:
experiment = "assistments_random_test_228"
model = "xgb"
cache_dir = "../tmp"
save_dir= "../tmp"

In [11]:
dset = get_dataset(experiment, cache_dir)

skillbuilder-data-2009-2010.zip: Skipping, found more recently modified local copy (use --force to force download)


Train on in-domain train set

In [12]:
estimator = xgb.XGBClassifier()
X_train, y_train, _, _ = dset.get_pandas("train")
estimator = estimator.fit(X_train,y_train)

Test in-domain train on id test.

In [13]:
X_te, y_te, _, _ = dset.get_pandas("id_test")
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.9259141594399299

Test in-domain train on ood test.

In [14]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
X_te.columns

Index(['hint_count', 'attempt_count', 'ms_first_response', 'overlap_time',
       'Average_confidence(FRUSTRATED)', 'Average_confidence(CONFUSED)',
       'Average_confidence(CONCENTRATING)', 'problem_type_choose_1',
       'problem_type_algebra', 'problem_type_fill_in_1', 'problem_type_rank',
       'problem_type_choose_n', 'problem_type_open_response', 'bottom_hint',
       'first_action'],
      dtype='object')

In [15]:
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.9100055897149245

In [16]:
nobs = len(y_te)
count = nobs * acc
# beta : Clopper-Pearson interval based on Beta distribution
acc_conf = proportion_confint(count, nobs, alpha=0.05, method="beta")
acc_conf

(0.9048577313117038, 0.9149619549378666)

Perform cross-validation on ood test set

In [17]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
estimator = estimator = xgb.XGBClassifier()
cross_val_score(estimator, X_te, y_te, cv=5)

array([0.9249501 , 0.92255489, 0.92694611, 0.92731629, 0.91733227])

# Experiments with a random subset (but skill)

In [18]:
experiment = "assistments_but_skill"
model = "xgb"
cache_dir = "../tmp"
save_dir= "../tmp"
dset = get_dataset(experiment, cache_dir)

skillbuilder-data-2009-2010.zip: Skipping, found more recently modified local copy (use --force to force download)


Train on in-domain train set

In [19]:
estimator = xgb.XGBClassifier()
X_train, y_train, _, _ = dset.get_pandas("train")
estimator.fit(X_train,y_train)

Test in-domain train on id test.

In [20]:
X_te, y_te, _, _ = dset.get_pandas("id_test")
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.9350627861531258

Test in-domain train on ood test.

In [21]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
X_te.columns

Index(['hint_count', 'attempt_count', 'ms_first_response', 'position',
       'overlap_time', 'Average_confidence(FRUSTRATED)',
       'Average_confidence(CONFUSED)', 'Average_confidence(CONCENTRATING)',
       'Average_confidence(BORED)', 'problem_type_choose_1',
       'problem_type_algebra', 'problem_type_fill_in_1', 'problem_type_rank',
       'problem_type_choose_n', 'problem_type_open_response',
       'tutor_mode_tutor', 'tutor_mode_test', 'type_LinearSection',
       'type_MasterySection', 'type_RandomChildOrderSection',
       'type_RandomIterateSection', 'type_PlacementsSection',
       'type_ChooseConditionSection', 'type_NumericLimitSection',
       'bottom_hint', 'first_action'],
      dtype='object')

In [22]:
yhat_te = estimator.predict(X_te)
acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
acc

0.9101652958556257

In [23]:
nobs = len(y_te)
count = nobs * acc
# beta : Clopper-Pearson interval based on Beta distribution
acc_conf = proportion_confint(count, nobs, alpha=0.05, method="beta")
acc_conf

(0.9050214080347108, 0.915117614315424)

Perform cross-validation on ood test set

In [24]:
X_te, y_te, _, _ = dset.get_pandas("ood_test")
estimator = estimator = xgb.XGBClassifier()
cross_val_score(estimator, X_te, y_te, cv=5)

array([0.93293413, 0.93413174, 0.93932136, 0.93690096, 0.92492013])

# Experiments for 100 random subsets

In [4]:
import json
from lightgbm import LGBMClassifier
for index in range (500):
    experiment = f"assistments_random_test_{index}"
    model = "lightgbm"
    cache_dir = "../tmp"
    save_dir= Path("../tmp")
    dset = get_dataset(experiment, cache_dir)

    estimator = LGBMClassifier()
    X_train, y_train, _, _ = dset.get_pandas("train")
    estimator.fit(X_train,y_train)

    evaluation = {}
    test_splits = ["id_test", "ood_test", "validation"]
    for test_split in test_splits:
        # Fetch predictions and labels for a sklearn model.
        X_te, y_te, _, _ = dset.get_pandas(test_split)
        X_te = X_te.astype(float)
        yhat_te = estimator.predict(X_te)

        # Calculate accuracy
        acc = accuracy_score(y_true=y_te, y_pred=yhat_te)
        evaluation[test_split] = acc
        nobs = len(y_te)
        count = nobs * acc
        # beta : Clopper-Pearson interval based on Beta distribution
        acc_conf = proportion_confint(count, nobs, alpha=0.05, method="beta")
        evaluation[test_split + "_conf"] = acc_conf
        print(f"training completed! {test_split} accuracy: {acc:.4f}")
         # Open a file in write mode
        SAVE_DIR_EXP = save_dir / experiment
        SAVE_DIR_EXP.mkdir(exist_ok=True)
        with open(f"{str(SAVE_DIR_EXP)}/{model}_eval.json", "w") as f:
            # Use json.dump to write the dictionary into the file
            evaluation["features"] = dset.predictors
            json.dump(evaluation, f)

skillbuilder-data-2009-2010.zip: Skipping, found more recently modified local copy (use --force to force download)
training completed! id_test accuracy: 0.9253
training completed! ood_test accuracy: 0.9100
training completed! validation accuracy: 0.9257
skillbuilder-data-2009-2010.zip: Skipping, found more recently modified local copy (use --force to force download)
training completed! id_test accuracy: 0.7762
training completed! ood_test accuracy: 0.7323
training completed! validation accuracy: 0.7751
skillbuilder-data-2009-2010.zip: Skipping, found more recently modified local copy (use --force to force download)
training completed! id_test accuracy: 0.8779
training completed! ood_test accuracy: 0.4460
training completed! validation accuracy: 0.8778
skillbuilder-data-2009-2010.zip: Skipping, found more recently modified local copy (use --force to force download)
training completed! id_test accuracy: 0.9324
training completed! ood_test accuracy: 0.9099
training completed! validation a