In [1]:
import ray
ray.shutdown()
import os
os.environ["RAY_ML_DEV"] = "1"

In [2]:
import ray
import torch
import pytorch_lightning as pl
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import numpy as np

  from pandas import MultiIndex, Int64Index


In [3]:
dataset = load_dataset('imdb')
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
num_classes = 2 # positive or negative sentiment

Reusing dataset imdb (/home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
ray_datasets = {
    "train": ray.data.from_torch(dataset["train"]),
    "val": ray.data.from_torch(dataset["test"]),
    "test": ray.data.from_torch(dataset["test"])
}

find: ‘.git’: No such file or directory
2023-03-24 13:28:10,634	INFO worker.py:1413 -- Connecting to existing Ray cluster at address: 10.0.31.25:6379...
2023-03-24 13:28:10,643	INFO worker.py:1601 -- Connected to Ray cluster. View the dashboard at https://console.anyscale-staging.com/api/v2/sessions/ses_pdz4qqxjifbazrxk8qywtn91jz/services?redirect_to=dashboard 
2023-03-24 13:28:11,216	INFO packaging.py:346 -- Pushing file package 'gcs://_ray_pkg_90dfdeedee80f488b154715ca4549d42.zip' (0.01MiB) to Ray cluster...
2023-03-24 13:28:11,218	INFO packaging.py:359 -- Successfully pushed file package 'gcs://_ray_pkg_90dfdeedee80f488b154715ca4549d42.zip'.


TypeError: __cinit__() takes exactly 19 positional arguments (21 given)

In [None]:
from ray.data.preprocessors import BatchMapper


def map_fn(batch):
    batch["text"] = [
        np.array(tokenizer.encode(text, padding="max_length", truncation=True, max_length=512))
        for text in batch["text"]
    ]
    return batch


preprocessor = BatchMapper(fn=map_fn, batch_format="pandas")


In [None]:
transfored_ds = preprocessor.fit_transform(ray_datasets["val"])

In [None]:
def convert_numpy_to_tensor(batch):
    # text = torch.Tensor(batch["text"]).cuda()
    # label = torch.Tensor(batch["label"]).cuda()
    text = torch.Tensor(batch["text"])
    label = torch.Tensor(batch["label"])
    return (text, label)



# for batch in transfored_ds.iter_torch_batches(batch_size=3, collate_fn=collate):
#     text, label = batch
#     print(text, type(text), text.shape)
#     print(label, type(label), label.shape)
#     break

In [None]:
class SentimentModel(pl.LightningModule):
    def __init__(self, lr=2e-5, eps=1e-8):
        super().__init__()
        self.lr = lr
        self.eps = eps
        self.model = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=2)

    def forward(self, x):
        outputs = self.model(x)
        logits = outputs.logits
        return logits

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = F.cross_entropy(logits.view(-1, self.num_classes), y)
        self.log('train_loss', loss, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = F.cross_entropy(logits.view(-1, self.num_classes), y)
        preds = torch.argmax(logits, dim=1)
        acc = torch.sum(preds == y).float() / len(preds)
        self.log('val_loss', loss, on_epoch=True)
        self.log('val_accuracy', acc, on_epoch=True)

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=self.lr, eps=self.eps)


In [None]:
from ray.train.lightning import LightningTrainer, LightningConfigBuilder
from ray.air.config import RunConfig, ScalingConfig, CheckpointConfig
import ray.tune as tune


lightning_config = (
    LightningConfigBuilder()
    .module(cls=SentimentModel, lr=tune.loguniform(1e-3, 1e-5), eps=1e-8)
    .trainer(max_epochs=3, accelerator="cpu")
    .build()
)

dataset_iter_config = {"batch_size": tune.choice([16, 32, 64]), "collate_fn": convert_numpy_to_tensor}

run_config = RunConfig(
    name="ptl-advanced-example",
    checkpoint_config=CheckpointConfig(
        num_to_keep=2,
        checkpoint_score_attribute="val_accuracy",
        checkpoint_score_order="max",
    ),
)

scaling_config = ScalingConfig(
    num_workers=4, use_gpu=False, resources_per_worker={"CPU": 1}
)

trainer = LightningTrainer(
    datasets=ray_datasets,
    scaling_config=scaling_config
)


In [None]:
mutation_lightning_config = (
    LightningConfigBuilder()
    .module(
        lr=tune.loguniform(1e-3, 1e-5),
    )
    .build()
)

tuner = tune.Tuner(
    lightning_trainer,
    param_space={"lightning_config": lightning_config, "dataset_iter_config": dataset_iter_config},
    run_config=ray.air.RunConfig(
        name="release-tuner-test",
        verbose=2,
        checkpoint_config=CheckpointConfig(
            num_to_keep=2,
            checkpoint_score_attribute="val_accuracy",
            checkpoint_score_order="max",
        ),
    ),
    tune_config=tune.TuneConfig(
        metric="val_accuracy",
        mode="max",
        num_samples=4,
        scheduler=PopulationBasedTraining(
            time_attr="training_iteration",
            hyperparam_mutations={"lightning_config": mutation_lightning_config},
            perturbation_interval=1,
        ),
    ),
)