# Install necessary packages

In [None]:
!wget -q 'https://downloads.rclone.org/v1.65.1/rclone-v1.65.1-linux-amd64.deb' -O 'rclone-v1.65.1-linux-amd64.deb'
!dpkg -i ./'rclone-v1.65.1-linux-amd64.deb' >/dev/null
!apt install fuse3 >/dev/null

# Mounting OneDrive

In [None]:
#@markdown # <center>rclone config file</center>

from google.colab import files
from sys import exit as end
from pathlib import Path


#@markdown ***
UPLOAD_CONFIG_FILE = True #@param {type:"boolean"}
DOWNLOAD_CONFIG_FILE = False #@param {type:"boolean"}

rclone_config_dir = '/root/.config/rclone'
rclone_config_filename = 'rclone.conf'
rclone_config_filepath = Path(rclone_config_dir) / rclone_config_filename

if UPLOAD_CONFIG_FILE:
    print(f'Select your {rclone_config_filename} file')
    file = files.upload()
    if list(file.keys())[0] != rclone_config_filename:
        end(f'File Error: File is not {rclone_config_filename}')
    elif not Path(rclone_config_dir).exists():
        !rclone mkdir {rclone_config_dir}

    !rclone move '/content/{rclone_config_filename}' {rclone_config_dir}
    !rclone config redacted
elif DOWNLOAD_CONFIG_FILE:
    if not Path(rclone_config_filepath).exists():
        end(f'File Error: {rclone_config_filename} not found in {rclone_config_dir}')
    files.download(rclone_config_filepath)


In [None]:
%cd /content
!umount -l /content/onedrive

In [None]:
!rm nohup.out
!mkdir -p /content/onedrive
!nohup rclone --vfs-cache-mode full mount google-colab-onedrive:AI/transformer-model/machine-translation-en-vi /content/onedrive &

# Fetching codes

In [None]:
!git clone https://github.com/minhnguyent546/machine-translation-en-vi.git
%cd machine-translation-en-vi

In [None]:
!pip install -q datasets pyvi

# Configuration

In [None]:
from utils.config_util import get_config
from pathlib import Path
import os

storage_dir = '/content/onedrive'
Path(storage_dir).mkdir(parents=True, exist_ok=True)

config = get_config('./config/config.yaml')
config['checkpoints_dir'] = f'{storage_dir}/checkpoints'
config['experiment_name'] = f'{storage_dir}/runs/model'
config['batch_size'] = 32
config['num_epochs'] = 10
config['seq_length'] = 120
config['num_validation_samples'] = 200 # -1 for evaluating the whole dataset
config['num_test_samples'] = -1
config['beam_size'] = 5
# config['preload'] = None

# Preprocessing

In [None]:
from preprocess import preprocess
preprocess(config)

# Tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir {config['experiment_name']}

# Training

In [None]:
from train_model import train_model
train_model(config)

# Testing

In [None]:
from test_model import test_model
test_model(config)