In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
%reload_ext autoreload
%autoreload 2

import sys

# setting path
sys.path.append('../')

from shared.progress import ProgressEstimator
from shared.progsnap import ProgSnap2Dataset, PS2, EventType
from shared.database import CSVDataProvider, SQLiteDataProvider
from shared.data import SQLiteLogger

In [None]:
from configs import config_PCRS, config_iSnap, config_CWO

# Assign variable directly so Pylance doesn't get upset
submit_columns = None
test_problem_id = None
problem_id_column = None
code_column = None
data_folder = None
database = None

# Chose the config you want to use
locals().update(config_PCRS)

In [None]:
# List problem IDs, in case you want to select a subset
dataset = ProgSnap2Dataset(CSVDataProvider(data_folder))
problem_ids = dataset.get_main_table()[problem_id_column].unique()
problem_ids

In [None]:
# Optionally, select a subset of problem_ids
problem_ids = problem_ids[:5]
problem_ids

In [None]:
dataset = ProgSnap2Dataset(CSVDataProvider(data_folder))
logger = SQLiteLogger(database)

In [None]:
# Optional, clear the existing models
logger.clear_table("Models")

In [None]:
from shared.preprocess import SimpleAIFBuilder

def build_problem(problem_id):

    builder = SimpleAIFBuilder(
        problem_id,
        code_column=code_column,
        problem_id_column=problem_id_column
    )
    builder.build(dataset)

    classifier = builder.get_trained_classifier()
    estimator = builder.get_trained_progress_model()
    correct_count = int(builder.X_train[builder.y_train].unique().size)
    logger.set_models(problem_id, estimator, classifier, correct_count)

In [None]:
for problem_id in problem_ids:
    print(f"Building problem {problem_id}")
    build_problem(problem_id)
    print(f"Built problem {problem_id}")

In [None]:
# Test to confirm you can get a specific model
progress, classifier = logger.get_models(test_problem_id)
progress