In [None]:
ROOT_DIR = "/kaggle/working"
GIT_CHECKOUT_ARG = "refactor-model-module"

In [None]:
# Clone repo and download dataset

import os

if not os.path.exists(f"{ROOT_DIR}/neural-swipe-typing"):
    !git clone https://github.com/proshian/neural-swipe-typing.git {ROOT_DIR}/neural-swipe-typing
    %cd {ROOT_DIR}/neural-swipe-typing
    !git checkout {GIT_CHECKOUT_ARG}

if not os.path.exists(f"{ROOT_DIR}/neural-swipe-typing/data/data_preprocessed/valid.jsonl"):
    %cd {ROOT_DIR}/neural-swipe-typing/src
    !python ./data_obtaining_and_preprocessing/download_dataset_preprocessed.py
    %cd {ROOT_DIR}/neural-swipe-typing

In [None]:
%cd {ROOT_DIR}/neural-swipe-typing

In [None]:
# Checking out the config to make sure "!git checkout {GIT_CHECKOUT_ARG}" is not aborted
!git checkout configs/train.json
!git checkout {GIT_CHECKOUT_ARG}
!git pull

In [None]:
!ls  {ROOT_DIR}/neural-swipe-typing/checkpoints/epoch_end  

In [None]:
# !!! Note: package version differ from {ROOT_DIR}/neural-swipe-typing/requirements/requirements.txt
!pip install lightning

In [None]:
CKPT_OF_CHOICE = None

In [None]:
FEATURE_EXTRACTOR_NAME = "traj_and_nearest"


import json


train_config = {
    "num_classes": 35,
    "max_out_seq_len": 35,
    "grid_name": "default",
    "grids_path": "./data/data_preprocessed/gridname_to_grid.json",
    "trajectory_features_statistics_path": "./data/data_preprocessed/trajectory_features_statistics.json",
    "bounding_boxes_path": "./data/data_preprocessed/key_bounding_boxes.json",
    "keyboard_tokenizer_path": "./tokenizers/keyboard/ru.json",
    "swipe_feature_extractor_factory_config_path": "./configs/feature_extractor/traj_and_nearest.json",
    "swipe_point_embedder_config_path": "./configs/swipe_point_embedder/separate_traj_and_nearest__6_coord.json",
    "dataset_paths": {
        "train": "./data/data_preprocessed/train_filtered.jsonl",
        "val": "./data/data_preprocessed/valid.jsonl"
    },
    "dataloader_num_workers": 4,
    "train_batch_size": 256,
    "val_batch_size": 512,
    "vocab_path": "./data/data_preprocessed/voc.txt",
    "train_total": 5591814,
    "val_total": 10000,
    "seed": 42,
    "early_stopping": {
        "enabled": False,
        "patience": 30
    },
    "lr_scheduler": {
        "type": "ReduceLROnPlateau",
        "params": {
            "factor": 0.5,
            "patience": 8
        }
    },
    "path_to_continue_checkpoint": f"{ROOT_DIR}/neural-swipe-typing/checkpoints/epoch_end/{CKPT_OF_CHOICE}" if CKPT_OF_CHOICE else None,
    "model_name": "v3_nearest_and_traj_transformer_bigger",
    "label_smoothing": 0.045,
    "optimizer": {
        "type": "Adam",
        "params": {"lr":1e-4, "weight_decay":0}
    },
    "device": "cuda"
}


with open(f'{ROOT_DIR}/neural-swipe-typing/configs/train.json', 'w') as f:
    json.dump(train_config, f)


###########################################################


feature_extractor_config = [
    {
        "type": "trajectory",
        "params": {
            "include_dt": False,
            "include_velocities": True,
            "include_accelerations": True
        }
    },
    {
        "type": "nearest_key",
        "params": {
            "use_lookup": True
        }
    }
]


with open(f'{ROOT_DIR}/neural-swipe-typing/configs/feature_extractor/{FEATURE_EXTRACTOR_NAME}.json', 'w') as f:
    json.dump(feature_extractor_config, f)

In [None]:
!python -m src.train --train_config configs/train.json