In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from ray import tune
from ray.tune.schedulers import ASHAScheduler

class SimpleNN(nn.Module):
    def __init__(self, hidden_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28 * 28, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 10)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def train_fn(config):
    hidden_dim = config["hidden_dim"]
    lr = config["lr"]
    batch_size = config["batch_size"]

    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    train_loader = torch.utils.data.DataLoader(datasets.MNIST('.', train=True, download=True, transform=transform), 
                                               batch_size=batch_size, shuffle=True)
    
    model = SimpleNN(hidden_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    model.train()
    for epoch in range(10):
        for batch in train_loader:
            data, target = batch
            data = data.view(data.size(0), -1)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
    
    tune.report(loss=loss.item())

config = {
    "hidden_dim": tune.grid_search([32, 64, 128]),
    "lr": tune.grid_search([1e-3, 1e-4, 1e-2]),
    "batch_size": tune.grid_search([16, 32])
}

scheduler = ASHAScheduler(
    metric="loss",  # The metric to optimize
    mode="min",     # Whether to minimize or maximize the metric
    max_t=10,       # Maximum number of epochs
    grace_period=1  # Minimum number of epochs to run for each trial
)

analysis = tune.run(
    train_fn,
    config=config,
    num_samples=1,
    resources_per_trial={"cpu": 1, "gpu": 0},
    scheduler=scheduler
)

print("Best config: ", analysis.get_best_config(metric="loss", mode="min"))


2024-09-15 18:00:40,295	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-09-15 18:03:10
Running for:,00:02:30.43
Memory:,22.0/63.9 GiB

Trial name,status,loc,batch_size,hidden_dim,lr
train_fn_bed63_00000,RUNNING,127.0.0.1:22672,16,32,0.001
train_fn_bed63_00001,RUNNING,127.0.0.1:24324,32,32,0.001
train_fn_bed63_00002,RUNNING,127.0.0.1:5544,16,64,0.001
train_fn_bed63_00003,RUNNING,127.0.0.1:27988,32,64,0.001
train_fn_bed63_00004,RUNNING,127.0.0.1:26384,16,128,0.001
train_fn_bed63_00005,RUNNING,127.0.0.1:23024,32,128,0.001
train_fn_bed63_00006,RUNNING,127.0.0.1:14168,16,32,0.0001
train_fn_bed63_00007,RUNNING,127.0.0.1:27016,32,32,0.0001
train_fn_bed63_00008,RUNNING,127.0.0.1:13572,16,64,0.0001
train_fn_bed63_00009,RUNNING,127.0.0.1:7944,32,64,0.0001


2024-09-15 18:03:10,739	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/sitth/ray_results/train_fn_2024-09-15_18-00-40' in 0.0330s.
2024-09-15 18:03:21,079	INFO tune.py:1041 -- Total run time: 160.78 seconds (150.40 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- train_fn_bed63_00016: FileNotFoundError('Could not fetch metrics for train_fn_bed63_00016: both result.json and progress.csv were not found at C:/Users/sitth/ray_results/train_fn_2024-09-15_18-00-40/train_fn_bed63_00016_16_batch_size=16,hidden_dim=128,lr=0.0100_2024-09-15_18-00-40')
- train_fn_bed63_00017: FileNotFoundError('Could not fetch metrics for train_fn_bed63_00017: both result.json and progress.csv were not found at C:/Users/sitth/ray_results/train_fn_2024-09-15_18-00-40/train_fn_bed63_00017_17_batch_size=32,hidden_dim=128,lr=0.0100_2024-09-15_18-00-40')


Best config:  None


In [1]:
from ray import tune

def train_model(config):
    # Extract hyperparameters from config
    architecture = config["architecture"]
    lr = config["lr"]
    batch_size = config["batch_size"]
    
    # Example training code
    print(f"Traininggggggg with arch={architecture} lr={lr} and batch_size={batch_size}")

config = {
    'architecture': tune.choice([[32, 64, 128], [64, 128, 256], [64, 128, 256, 512]]),
    "lr": tune.choice([1e-3, 1e-4, 1e-2]),  # Learning rates to choose from
    "batch_size": tune.choice([16, 32])     # Batch sizes to choose from
}

# Example of how to run the tuning
tune.run(
    train_model,
    config=config,
    num_samples=10  # Number of trials
)

2024-09-16 11:37:56,461	INFO worker.py:1783 -- Started a local Ray instance.
2024-09-16 11:37:58,932	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2024-09-16 11:37:58,934	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-09-16 11:38:05
Running for:,00:00:04.52
Memory:,8.4/63.9 GiB

Trial name,status,loc,architecture,batch_size,lr
train_model_7335c_00000,TERMINATED,127.0.0.1:21848,"[64, 128, 256]",32,0.001
train_model_7335c_00001,TERMINATED,127.0.0.1:10404,"[64, 128, 256]",16,0.01
train_model_7335c_00002,TERMINATED,127.0.0.1:1460,"[32, 64, 128]",32,0.01
train_model_7335c_00003,TERMINATED,127.0.0.1:19492,"[64, 128, 256, 512]",32,0.001
train_model_7335c_00004,TERMINATED,127.0.0.1:19536,"[64, 128, 256]",32,0.001
train_model_7335c_00005,TERMINATED,127.0.0.1:6368,"[64, 128, 256]",32,0.001
train_model_7335c_00006,TERMINATED,127.0.0.1:5908,"[64, 128, 256]",32,0.0001
train_model_7335c_00007,TERMINATED,127.0.0.1:25828,"[64, 128, 256, 512]",16,0.01
train_model_7335c_00008,TERMINATED,127.0.0.1:23880,"[64, 128, 256, 512]",32,0.001
train_model_7335c_00009,TERMINATED,127.0.0.1:6044,"[64, 128, 256, 512]",32,0.0001


2024-09-16 11:38:05,196	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/sitth/ray_results/train_model_2024-09-16_11-37-58' in 0.0120s.


Trial train_model_7335c_00002 completed. Last result: 
Trial train_model_7335c_00001 completed. Last result: 
Trial train_model_7335c_00000 completed. Last result: 
Trial train_model_7335c_00003 completed. Last result: 
Trial train_model_7335c_00006 completed. Last result: 
Trial train_model_7335c_00005 completed. Last result: 
[36m(train_model pid=10404)[0m Traininggggggg with arch=[64, 128, 256] lr=0.01 and batch_size=16
Trial train_model_7335c_00009 completed. Last result: 
Trial train_model_7335c_00008 completed. Last result: 
Trial train_model_7335c_00007 completed. Last result: 
Trial train_model_7335c_00004 completed. Last result: 


2024-09-16 11:38:05,214	INFO tune.py:1041 -- Total run time: 6.28 seconds (4.51 seconds for the tuning loop).


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x16d22be0980>