### Initializing Raw Datasets

In [None]:
!pip install kaggle

!git clone https://github.com/google-research/arcade-nl2code
!mv arcade-nl2code/arcade_nl2code arcade_nl2code

!kaggle datasets download \
    -d googleai/arcade-nl2code-dataset \
    -p arcade_nl2code/annotated_dataset/dataset/ --force

!unzip arcade_nl2code/annotated_dataset/dataset/arcade-nl2code-dataset.zip \
    -d arcade_nl2code/annotated_dataset/dataset/

!rm -rf arcade-nl2code

In [None]:
%pip install -r arcade_nl2code/evaluation/requirements.txt
%pip install seqio diff_match_patch ipython_genutils cloudpickle statsmodels openpyxl
!sudo apt-get install -y faketime

In [None]:
!cd arcade_nl2code/annotated_dataset && PYTHONPATH=../../ python build_existing_tasks_split.py

In [None]:
!cd arcade_nl2code/annotated_dataset && PYTHONPATH=../../ python build_new_tasks_split.py

In [None]:
!sed -i \
    -e 's/np.bool/bool/g' \
    -e 's/item, _ = stats.mode(genres)/item = pd.Series(genres).value_counts().idxmax()/g' \
    -e "s/sns.lmplot('Fat', 'Energy'/sns.lmplot(x='Fat',y='Energy'/g" \
    -e "s/sns.barplot('Ingredient', 'Fat'/sns.barplot(x='Ingredient', y='Fat'/g" \
    -e "s/sns.boxplot('Ingredient', 'Fat'/sns.boxplot(x='Ingredient', y='Fat'/g" \
    -e "s/sns.regplot('Fat', y='Energy'/sns.regplot(x='Fat', y='Energy'/g" \
    -e "s/sns.jointplot('Fat','Energy'/sns.jointplot(x='Fat', y='Energy'/g" \
    -e "s/al_results_2020/2020_al_data_kaggle_upload_new_old_syllabi/g" \
    -e "s/general_english_r/ge_r/g" \
    ./arcade_nl2code/annotated_dataset/dataset/existing_tasks/dataset.json \
    ./arcade_nl2code/annotated_dataset/dataset/new_tasks/dataset.json

In [None]:
import json

basedir = 'arcade_nl2code/annotated_dataset/dataset'
for d in ["existing_tasks", "new_tasks"]:
    datafile = f'{basedir}/{d}/dataset.json'
    with open(datafile, 'r') as f:
        data = json.load(f)
    for notebook in data:
        fpath = f'{basedir}/{d}/artifacts/{notebook["notebook_name"]}'
        last_turn = notebook['turns'][-1]
        cells = last_turn["metadata"]["context_cells"]
        last_intent, last_code = last_turn["turn"]["intent"]["value"], last_turn["turn"]["code"]["value"]
        cells.extend([{"cell_type": "markdown", "source": last_intent}, {"cell_type": "code", "source": last_code}])
        meta = {"execution_count": None, "metadata": {}, "outputs": []}
        contents = {"cells": list(map(lambda c: c | meta, cells))}
        print(f'Saving notebook {notebook["notebook_name"]}')
        with open(fpath, 'w') as f:
            f.write(json.dumps(contents, indent=4))

### Generating Prompt Datasets

In [None]:
%env MAX_PROMPT_SIZE=900
%env DATASET_ROOT=arcade_nl2code/annotated_dataset/dataset
!faketime "2022-12-10 12:00:00" python -m arcade_nl2code.annotated_dataset.generate_schema_augmented_prompts \
    --dataset ${DATASET_ROOT}/existing_tasks/dataset.json \
    --output_folder ${DATASET_ROOT}/existing_tasks/derived_datasets/ \
    --runtime_artifacts_root ${DATASET_ROOT}/existing_tasks/artifacts/ \
    --schema_representation_method "originating_dfs.header_description.after_variable_cell" \
    --max_prompt_size ${MAX_PROMPT_SIZE} \
    --truncate_metadata_path ${DATASET_ROOT}/existing_tasks/derived_datasets/dataset.schema.originating_dfs.header_description.after_variable_cell.maxp900.maxp_no_prefix-1.maxctxcell-1.truncate_metadata.json \
    --ignore_errors


### Running Evaluation 

In [None]:
!PYTHONPATH=. python arcade_nl2code/evaluation/scripts/get_dummy_prediction.py \
    --input arcade_nl2code/annotated_dataset/dataset/new_tasks/derived_datasets/dataset.schema.originating_dfs.header_description.after_variable_cell.maxp900.maxp_no_prefix-1.maxctxcell-1.json \
    --output arcade_nl2code/evaluation/test_data/dummy_prediction.json

In [None]:
!cd arcade_nl2code/evaluation/ && docker build -t notebook_evaluator .

In [None]:
%env PROJECT_ROOT=arcade_nl2code
!docker run -it --shm-size=4g \
  --mount type=bind,source=$PWD/${PROJECT_ROOT}/evaluation/test_data/,target=/data \
  --mount type=bind,source=$PWD/${PROJECT_ROOT}/annotated_dataset/dataset/new_tasks/artifacts,target=/artifacts \
  -w / \
  --entrypoint /opt/conda/bin/python \
  notebook_evaluator:latest \
  -m arcade_nl2code.evaluation.execution_evaluation_main \
  --prediction_file /data/dummy_prediction.json \
  --output_path /data/ \
  --runtime_artifact_root /artifacts \
  --lm_output_postprocessor extract_first_cell_block  \
  --split_episode \
  --noreuse_state \
  --timeout 180 \
  --num_workers 20