### Trainer

In [1]:
import ray

from ray.data.preprocessors import MinMaxScaler
from ray.train.xgboost import XGBoostTrainer
from ray.air.config import ScalingConfig

In [6]:
train_dataset = ray.data.from_items([{"x": x, "y": 2 * x} for x in range(0, 32, 3)])
valid_dataset = ray.data.from_items([{"x": x, "y": 2 * x} for x in range(1, 32, 3)])

preprocessor = MinMaxScaler(['x'])

trainer = XGBoostTrainer(
    label_column='y',
    params={'objective': 'reg:squarederror'},
    scaling_config=ScalingConfig(num_workers=4),
    datasets={'train': train_dataset, 'valid': valid_dataset},
    preprocessor=preprocessor
)
result = trainer.fit()

Trial name,status,loc,iter,total time (s),train-rmse,valid-rmse
XGBoostTrainer_23b41_00000,TERMINATED,127.0.0.1:14321,11,9.62486,3.34358,4.80537


[2m[1m[36m(scheduler +3m25s)[0m Tip: use `ray status` to view detailed cluster status. To disable these messages, set RAY_SCHEDULER_EVENTS=0.


[2m[36m(_RemoteRayXGBoostActor pid=14348)[0m [21:49:49] task [xgboost.ray]:140469835001184 got new rank 3
[2m[36m(_RemoteRayXGBoostActor pid=14346)[0m [21:49:49] task [xgboost.ray]:140649986649488 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=14345)[0m [21:49:49] task [xgboost.ray]:140339096452304 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=14347)[0m [21:49:49] task [xgboost.ray]:140488339225904 got new rank 2


Result for XGBoostTrainer_23b41_00000:
  date: 2022-10-17_21-49-51
  done: false
  experiment_id: 42df304aac4f490fbe707ec138dadb92
  hostname: YONGJINs-MacBook-Pro.local
  iterations_since_restore: 1
  node_ip: 127.0.0.1
  pid: 14321
  time_since_restore: 9.355885982513428
  time_this_iter_s: 9.355885982513428
  time_total_s: 9.355885982513428
  timestamp: 1666010991
  timesteps_since_restore: 0
  train-rmse: 26.757627185845564
  training_iteration: 1
  trial_id: 23b41_00000
  valid-rmse: 28.41558771704671
  warmup_time: 0.007151126861572266
  
Result for XGBoostTrainer_23b41_00000:
  date: 2022-10-17_21-49-51
  done: true
  experiment_id: 42df304aac4f490fbe707ec138dadb92
  experiment_tag: '0'
  hostname: YONGJINs-MacBook-Pro.local
  iterations_since_restore: 11
  node_ip: 127.0.0.1
  pid: 14321
  time_since_restore: 9.624855041503906
  time_this_iter_s: 0.1621711254119873
  time_total_s: 9.624855041503906
  timestamp: 1666010991
  timesteps_since_restore: 0
  train-rmse: 3.34357850385

2022-10-17 21:49:51,546	INFO tune.py:758 -- Total run time: 22.20 seconds (14.33 seconds for the tuning loop).


### Checkpoint

In [7]:
import os
import ray.cloudpickle as cpickle
from ray.air.constants import PREPROCESSOR_KEY

checkpoint = result.checkpoint
with checkpoint.as_directory() as checkpoint_path:
    path = os.path.join(checkpoint_path, PREPROCESSOR_KEY)
    with open(path, 'rb') as f:
        preprocessor = cpickle.load(f)
    print(preprocessor)

MinMaxScaler(columns=['x'])


In [8]:
path

'/Users/yjkim/ray_results/XGBoostTrainer_2022-10-17_21-49-29/XGBoostTrainer_23b41_00000_0_2022-10-17_21-49-37/checkpoint_000010/_preprocessor'

### predictor

In [9]:
from ray.train.batch_predictor import BatchPredictor
from ray.train.xgboost import XGBoostPredictor

test_dataset = ray.data.from_items([{"x": x} for x in range(2, 32, 3)])

batch_predictor = BatchPredictor.from_checkpoint(checkpoint, XGBoostPredictor)
predicted_probabilities = batch_predictor.predict(test_dataset)
predicted_probabilities.show()

Map Progress (1 actors 1 pending): 100%|████████| 10/10 [00:03<00:00,  2.83it/s]

{'predictions': 0.09843720495700836}
{'predictions': 5.604666709899902}
{'predictions': 11.405311584472656}
{'predictions': 15.684700012207031}
{'predictions': 23.990947723388672}
{'predictions': 29.900211334228516}
{'predictions': 34.59944152832031}
{'predictions': 40.6968994140625}
{'predictions': 45.68107604980469}
{'predictions': 50.29003143310547}





In [14]:
keyboards = {
	'1': ['1', '.',',','?','!'],
	'2': ['2', 'A','B','C'],
	'3': ['3', 'D','E','F'],
	'4': ['4', 'G','H','I'],
	'5': ['5', 'J','K','L'],
	'6': ['6', 'M','N','O'],
	'7': ['7', 'P','Q','R','S'],
	'8': ['8', 'T','U','V'],
	'9': ['9', 'W','X','Y','Z']
}

def cnt_seq(n, seq): 
    seq_cnt = []
    prev_val = seq[0]
    prev_cnt = 1
    for i, s in enumerate(seq[1:]):
        if s == prev_val:
            prev_cnt += 1
        else:
            seq_cnt.append((prev_val, prev_cnt))
            prev_val = s
            prev_cnt = 1

    else:
        seq_cnt.append((prev_val, prev_cnt))

    return seq_cnt

def get_key_seq(seq_cnt, keyboards):
    key = []
    for num, cnt in seq_cnt:
        num_seq = keyboards[num]
        key.append(num_seq[cnt % len(num_seq) - 1])
    
    return ''.join(key)

n = int(input())
seq = input()

seq_cnt = cnt_seq(n, seq)
result = get_key_seq(seq_cnt, keyboards)
print(result)

14
44433355556666
HELO
