In [None]:
!pip install ray
!pip install pyarrow
!pip install tabulate
!pip install opendatasets
!pip install opencv-python

In [15]:
import opendatasets as od
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.nn.torchmetrics.Accuracy

import ray
from ray import train
from ray.air import session
from ray.air.config import ScalingConfig
from ray.train.torch import TorchCheckpoint, TorchTrainer
from ray.data.preprocessors import Concatenator, Chain, StandardScaler
from ray import tune
from ray.tune.tuner import Tuner, TuneConfig
from ray.air.config import RunConfig
from ray.train.batch_predictor import BatchPredictor
from ray.train.torch import TorchPredictor
from ray.data.preprocessor import Preprocessor
from ray.data.aggregate import Max
from ray.data.datasource.partitioning import Partitioning
import cv2

from PIL import Image
import os

ModuleNotFoundError: No module named 'torch.nn.torchmetrics'

In [None]:
od.download(
    "https://www.kaggle.com/datasets/wwymak/architecture-dataset")

In [2]:
root = "architecture-dataset/architecture-dataset/arcDataset/"
partitioning = Partitioning("dir", field_names=["class"], base_dir=root)
ds = ray.data.read_images(root, partitioning=partitioning)
train_dataset, valid_dataset = ds.train_test_split(test_size=0.05)

2022-12-18 15:50:35,870	INFO worker.py:1538 -- Started a local Ray instance.
Read progress: 100%|█████████████████████████████████████████████████████████████████| 200/200 [00:19<00:00, 10.25it/s]
Read progress: 100%|███████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 2023.81it/s]


In [3]:
HEIGHT = 32
WIDTH = 32
CLASS_ID_MAPPING = {'Achaemenid architecture': 0, 'American Foursquare architecture': 1, 'American craftsman style': 2, 'Ancient Egyptian architecture': 3, 'Art Deco architecture': 4, 'Art Nouveau architecture': 5, 'Baroque architecture': 6, 'Bauhaus architecture': 7, 'Beaux-Arts architecture': 8, 'Byzantine architecture': 9, 'Chicago school architecture': 10, 'Colonial architecture': 11, 'Deconstructivism': 12, 'Edwardian architecture': 13, 'Georgian architecture': 14, 'Gothic architecture': 15, 'Greek Revival architecture': 16, 'International style': 17, 'Novelty architecture': 18, 'Palladian architecture': 19, 'Postmodern architecture': 20, 'Queen Anne architecture': 21, 'Romanesque architecture': 22, 'Russian Revival architecture': 23, 'Tudor Revival architecture': 24}

class OpenCVPreprocessor(Preprocessor):
    def _fit(self, dataset):
        self.stats_ = None
    
    def _transform_numpy(self, df):
        images = df["image"]
        result = []
        for image in images:
            image = cv2.Canny(image=image, threshold1=100, threshold2=200) 
            image = cv2.resize(image, (HEIGHT,WIDTH), interpolation=cv2.INTER_CUBIC)
            result.append([image])
        result = np.array(result)
        df["image"] = result
        
        classes = df["class"]
        result = []
        for class_name in classes:
            result.append(self._hot_encode(class_name))
        result = np.array(result)
        df["class"] = result
        return df

    def _hot_encode(self, name):
        result = CLASS_ID_MAPPING[name]
        #result[] = 1
        return result
    
preprocessor = OpenCVPreprocessor()
transformed = preprocessor.fit_transform(valid_dataset)

Map_Batches: 100%|█████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 16.72it/s]


In [62]:
transformed.take(1)[0]["class"]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0])

In [12]:
NUMBER_OF_CLASSES = len(CLASS_ID_MAPPING)

class CNN(nn.Module):
    def __init__(self, number_of_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, number_of_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x



def train_loop_per_worker(config):
    batch_size = config["batch_size"]
    lr = config["lr"]
    epochs = config["num_epochs"]
    number_of_classes = config["number_of_classes"]

    train_data = session.get_dataset_shard("train")
    model = CNN(number_of_classes)
    model = train.torch.prepare_model(model)
    
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    for cur_epoch in range(epochs):
        for batch in train_data.iter_torch_batches(
            batch_size=batch_size, dtypes=torch.float32
        ):
            inputs, labels = batch["image"], batch["class"]
            optimizer.zero_grad()
            predictions = model(inputs)
            train_loss = loss_fn(predictions, labels.type(torch.LongTensor))
            train_loss.backward()
            optimizer.step()
        loss = train_loss.item()
        session.report({"loss": loss}, checkpoint=TorchCheckpoint.from_model(model))
        
    
        accuracy = Accuracy(task="multiclass", num_classes=NUMBER_OF_CLASSES)

        session.report({"accuracy": accuracy(predictions, labels)}, checkpoint=TorchCheckpoint.from_model(model))
        

trainer = TorchTrainer(
    train_loop_per_worker=train_loop_per_worker,
    train_loop_config={
        "batch_size": 128,
        "num_epochs": 20,
        "lr": 0.001,
        "number_of_classes": NUMBER_OF_CLASSES
    },
    scaling_config=ScalingConfig(
        num_workers=3, 
        use_gpu=False,
        trainer_resources={"CPU": 0},
    ),
    datasets={"train": valid_dataset},
    preprocessor=preprocessor,
)

result = trainer.fit()
print(f"Last result: {result.metrics}")

2022-12-18 16:04:08,035	INFO data_parallel_trainer.py:286 -- GPUs are detected in your Ray cluster, but GPU training is not enabled for this trainer. To enable GPU training, make sure to set `use_gpu` to True in your scaling config.


0,1
Current time:,2022-12-18 16:04:19
Running for:,00:00:11.78
Memory:,20.0/31.8 GiB

Trial name,# failures,error file
TorchTrainer_3899a_00000,1,C:\Users\lukas\ray_results\TorchTrainer_2022-12-18_16-04-08\TorchTrainer_3899a_00000_0_2022-12-18_16-04-09\error.txt

Trial name,status,loc,iter,total time (s),loss,_timestamp,_time_this_iter_s
TorchTrainer_3899a_00000,ERROR,127.0.0.1:10220,1,7.24772,3.24762,1671375859,0.0680242


[2m[36m(TorchTrainer pid=10220)[0m 2022-12-18 16:04:12,333	INFO data_parallel_trainer.py:286 -- GPUs are detected in your Ray cluster, but GPU training is not enabled for this trainer. To enable GPU training, make sure to set `use_gpu` to True in your scaling config.
[2m[36m(RayTrainWorker pid=14780)[0m 2022-12-18 16:04:19,327	INFO config.py:86 -- Setting up process group for: env:// [rank=0, world_size=3]
[2m[36m(RayTrainWorker pid=14780)[0m 2022-12-18 16:04:19,491	INFO train_loop_utils.py:270 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=14780)[0m 2022-12-18 16:04:19,491	INFO train_loop_utils.py:330 -- Wrapping provided model in DistributedDataParallel.


Trial name,_time_this_iter_s,_timestamp,_training_iteration,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
TorchTrainer_3899a_00000,0.0680242,1671375859,1,2022-12-18_16-04-19,False,,1481f91fb17545c0a0ed2a299b21515c,0,LAPTOP-S8FJSVCO,1,3.24762,127.0.0.1,10220,True,7.24772,7.24772,7.24772,1671375859,0,,1,3899a_00000,0.0406878


2022-12-18 16:04:19,825	ERROR trial_runner.py:1088 -- Trial TorchTrainer_3899a_00000: Error processing event.
ray.exceptions.RayTaskError(NameError): [36mray::_Inner.train()[39m (pid=10220, ip=127.0.0.1, repr=TorchTrainer)
  File "python\ray\_raylet.pyx", line 830, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 834, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 780, in ray._raylet.execute_task.function_executor
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\tune\trainable\trainable.py", line 367, in train
    raise skipped from exception_cause(skipped)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\train\_inte

RayTaskError(NameError): [36mray::_Inner.train()[39m (pid=10220, ip=127.0.0.1, repr=TorchTrainer)
  File "python\ray\_raylet.pyx", line 830, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 834, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 780, in ray._raylet.execute_task.function_executor
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\tune\trainable\trainable.py", line 367, in train
    raise skipped from exception_cause(skipped)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\train\_internal\utils.py", line 54, in check_for_failure
    ray.get(object_ref)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\_private\client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\_private\worker.py", line 2309, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(NameError): [36mray::RayTrainWorker._RayTrainWorker__execute()[39m (pid=14780, ip=127.0.0.1, repr=<ray.train._internal.worker_group.RayTrainWorker object at 0x000002C37ADE2790>)
  File "python\ray\_raylet.pyx", line 830, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 834, in ray._raylet.execute_task
  File "python\ray\_raylet.pyx", line 780, in ray._raylet.execute_task.function_executor
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\util\tracing\tracing_helper.py", line 466, in _resume_span
    return method(self, *_args, **_kwargs)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\train\_internal\worker_group.py", line 31, in __execute
    raise skipped from exception_cause(skipped)
  File "C:\Users\lukas\anaconda3\lib\site-packages\ray\train\_internal\utils.py", line 129, in discard_return_wrapper
    train_func(*args, **kwargs)
  File "<ipython-input-12-67426eb76e76>", line 51, in train_loop_per_worker
NameError: name 'Accuracy' is not defined