# Plaidbot Training

This notebook is for training a plaidbot model

Change runtime to use a GPU

## Setup

In [None]:
!pip install transformers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp -r '/content/drive/MyDrive/Colab Notebooks/Plaidbot/v2/app' .

In [None]:
from typing import List
from datetime import datetime

In [None]:
# Imports
from app.options.model_options import ModelOptions
from app.options.prepro_options import PreproOptions
from app.src.pick_users.run_pick_users import run_pick_users
from app.src.select_data.run_select_data import run_select_data
from app.src.train.run_training import run_training
from app.src.train.run_prediction import run_prediction

## Options

In [None]:
prepro_opts = PreproOptions()

prepro_opts.user_filename:str = 'users.json'
prepro_opts.message_folder:str = '/content/drive/MyDrive/Colab Notebooks/Plaidbot/messages'
prepro_opts.selected_folders: List[str] = [
        'general',
        # add more folders...
    ]

# Filtering
prepro_opts.min_date: datetime  = datetime(2018,1,1)
prepro_opts.min_num_words: int = 3
prepro_opts.max_messages: int = 100000

In [None]:
model_opts = ModelOptions()
model_opts.max_len:int = 150
model_opts.bert_model_name:str = 'distilbert-base-uncased'
model_opts.val_size = 0.2
model_opts.num_epochs = 2
model_opts.batch_size = 8
model_opts.device = 'cuda:0'
model_opts.learning_rate = 2e-5
model_opts.saved_model_name = ''

## Pick users and Data

In [None]:
# Ideally I'd like to just have this automatically save the results to the appropriate options...
# No other work needed on the users part
# Otherwise, Once it's finished, need to update Options and restart the env
run_pick_users(prepro_opts)

In [None]:
prepro_opts.user_id_int_dict = None
model_opts.user_int_name_dict = None
model_opts.num_labels = 0 # Get rid of this... make it dynamic..

In [None]:
messages = run_select_data(prepro_opts)
# May want to save the messages... could make this optional
# Add a message summary print func

## Train

In [None]:
model = run_training(messages, model_opts)

In [None]:
test_messages = [
    'This is a test'
]

preds = run_prediction(model, test_messages, model_opts)
print(preds)

## Save the model

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:

base_model = model.get_inner_model().get_base_model()
base_model.push_to_hub(model_opts.saved_model_name)

In [None]:
test_messages = ['This is a test']
preds = run_prediction(None, test_messages, model_opts)
print(preds)