# Basics


## Simple Example Tutorial 

In [None]:
import optuna
import sklearn


# Define an objective function to be minimized.
def objective(trial):

    # Invoke suggest methods of a Trial object to generate hyperparameters.
    regressor_name = trial.suggest_categorical("regressor", ["SVR", "RandomForest"])
    if regressor_name == "SVR":
        svr_c = trial.suggest_float("svr_c", 1e-10, 1e10, log=True)
        regressor_obj = sklearn.svm.SVR(C=svr_c)
    else:
        rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32)
        regressor_obj = sklearn.ensemble.RandomForestRegressor(max_depth=rf_max_depth)

    X, y = sklearn.datasets.fetch_california_housing(return_X_y=True)
    X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(X, y, random_state=0)

    regressor_obj.fit(X_train, y_train)
    y_pred = regressor_obj.predict(X_val)

    error = sklearn.metrics.mean_squared_error(y_val, y_pred)

    return error  # An objective value linked with the Trial object.


study = optuna.create_study()  # Create a new study.
study.optimize(objective, n_trials=10)  # Invoke optimization of the objective function.

[32m[I 2026-02-07 16:33:30,216][0m A new study created in memory with name: no-name-fe367878-8245-4878-9fb7-c9ed71052af1[0m
[32m[I 2026-02-07 16:33:40,797][0m Trial 0 finished with value: 0.27331871245527417 and parameters: {'regressor': 'RandomForest', 'rf_max_depth': 25}. Best is trial 0 with value: 0.27331871245527417.[0m
[32m[I 2026-02-07 16:33:50,677][0m Trial 1 finished with value: 0.2737074871050314 and parameters: {'regressor': 'RandomForest', 'rf_max_depth': 18}. Best is trial 0 with value: 0.27331871245527417.[0m
[32m[I 2026-02-07 16:33:54,656][0m Trial 2 finished with value: 0.4317168415755644 and parameters: {'regressor': 'RandomForest', 'rf_max_depth': 6}. Best is trial 0 with value: 0.27331871245527417.[0m


In [None]:
from optuna.visualization import plot_optimization_history

plot_optimization_history(study)


## Model

In [None]:
import torch
from torch import nn, einsum
import torch.nn.functional as F
from einops import rearrange, repeat

from einops.layers.torch import Rearrange


def pair(t):
    return t if isinstance(t, tuple) else (t, t)


class PatchEmbed(nn.Module):
  def __init__(self, img_size, patch_size, in_chans=3, embed_dim=768):
    super().__init__()
    self.img_size = pair(img_size)
    self.patch_height, self.patch_width = pair(patch_size)
    self.in_channels = in_chans
    self.embed_dim = embed_dim

    self.patch_dim = self.in_channels * self.patch_height * self.patch_width
    self.layernorm1 = nn.LayerNorm(self.patch_dim)
    self.linear = nn.Linear(self.patch_dim, self.embed_dim)
    self.layernorm2 = nn.LayerNorm(self.embed_dim)

  def forward(self, x):
    # c=3, h=16, w=16, p1=14, p2=14
    # print('before rearrange', x.shape)      # [1, 3, 224, 224]
    x = rearrange(x, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=self.patch_height, p2=self.patch_width)
    # print('after rearrange', x.shape)     # [1, 256, 588]
    x = self.layernorm1(x)                  # [1, 256, 588]
    x = self.linear(x)                      # [1, 256, 1024]
    x = self.layernorm2(x)                  # [1, 256, 1024]
    return x




class ViT(nn.Module):
    def __init__(self, config):
        super().__init__()
        img_size = config['img_size']
        n_classes = config['n_classes']
        dim = config['dim']
        depth = config['depth']
        heads = config['heads']
        mlp_dim = config['mlp_dim']
        pool = config.get('pool', 'cls')
        channels = config['channels']
        dim_head = config.get('dim_head', 64)
        dropout = config.get('dropout', 0.)
        emb_dropout = config.get('emb_dropout', 0.)
        patch_size = config.get('patch_size', -1)

        image_height, image_width = pair(img_size)    
    
        patch_height, patch_width = patch_size, patch_size
        num_patches = (image_height // patch_height) * (image_width // patch_width)

        num_input_to_transformer = num_patches if patch_size > 0 else image_height 

        #what if we just used rows?
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
        if patch_size > 0: 
            self.to_patch_embedding = PatchEmbed(img_size=img_size, 
                                             patch_size=patch_size, 
                                             in_chans=channels, 
                                             embed_dim=dim)
            print(f'Using patch size of {patch_size}, resulting in {num_patches} patches') 
        else: 
            self.to_patch_embedding = nn.Sequential(
                Rearrange('b 1 w h -> b w h'),
                nn.LayerNorm(num_input_to_transformer ),
                nn.Linear(num_input_to_transformer, dim),
                nn.LayerNorm(dim)
            )
            print(f'Using rows as input, resulting in {num_input_to_transformer} patches')


        self.pos_embedding = nn.Parameter(torch.randn(1, num_input_to_transformer+1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = nn.TransformerEncoder(
           nn.TransformerEncoderLayer(
            d_model=dim, 
            nhead=heads,
            dim_feedforward=mlp_dim,
           dropout=dropout,
           batch_first=True,
           activation='gelu',
           ),
        num_layers=depth
        )

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Linear(dim, n_classes)

    def forward(self, img):
      
        #x = 
        x = self.to_patch_embedding(img)        # [1, 256, 1024]
        b, n, _ = x.shape

        # [1, 1, 1024]
        cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b=b)
        x = torch.cat((cls_tokens, x), dim=1)   # [1, 257, 1024], append one additional cls token
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x)
        x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]

        x = self.to_latent(x)
        return self.mlp_head(x)


In [17]:
def training_val_loop(model, train_loader, test_loader, epochs=1):
    losses = []
    model = model.to(Device)
    print(model)
    best_acc = 0.0
    best_model_wts = model.state_dict()
    # Define loss function and optimizer 

    # training loo
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss().to(Device)
    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            # zero the parameter gradients
            optimizer.zero_grad()
            inputs, labels = inputs.to(Device), labels.to(Device)
            
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
            # print statistics
            running_loss += loss.item()
            if i % 100 == 99:    # print every 100 mini-batches
                #flush=True

                print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}')
                running_loss = 0.0


        correct = 0
        total = 0 
        for i, (inputs, labels) in enumerate(test_loader):
            inputs, labels = inputs.to(Device), labels.to(Device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item();
        if correct / total > best_acc:
            best_acc = correct / total
            best_model_wts = model.state_dict()
        print(f' epoch {epoch + 1}: accuracy {100 * correct / total} %')
    
    print('Finished Training')
    return losses,best_acc,best_model_wts 



In [None]:
import optuna
import sklearn

Device="cuda"

# Define an objective function to be minimized.
def objective(trial):
     training_val_loop(model, train_loader, test_loader, epochs=1):


    return error  # An objective value linked with the Trial object.


study = optuna.create_study()  # Create a new study.
study.optimize(objective, n_trials=10)  # Invoke optimization of the objective function.