# Image classification example

In [1]:
import torch.nn
import torch.optim

import openml
import openml_pytorch
import openml_pytorch.layers
import openml_pytorch.config
import logging
import warnings

# Suppress FutureWarning messages
warnings.simplefilter(action='ignore')

############################################################################
# Enable logging in order to observe the progress while running the example.
openml.config.logger.setLevel(logging.DEBUG)
openml_pytorch.config.logger.setLevel(logging.DEBUG)
############################################################################

############################################################################
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# openml.config.apikey = 'key'
from openml_pytorch.trainer import OpenMLTrainerModule
from openml_pytorch.trainer import OpenMLDataModule
from openml_pytorch.trainer import Callback

In [3]:
data_module = OpenMLDataModule(
    type_of_data="image",
    file_dir=openml.config.get_cache_directory() + "/datasets/45923/Images/",
    # file_dir=openml.config.get_cache_directory()+'/datasets/44312/PNU_Micro/images/',
    # filename_col="FILE_NAME",
    filename_col="Filename",
    target_mode="categorical",
)

In [4]:
# Download the OpenML task for the Indoor scene classification.
# task = openml.tasks.get_task(361987)
# indoor scenes
task = openml.tasks.get_task(362070)

In [5]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(13456, 120)
        self.fc2 = nn.Linear(120, 84)
        # self.fc3 = nn.Linear(84, 19) # To user - Remember to set correct size of last layer. 
        self.fc3 = nn.Linear(84, 67) # To user - Remember to set correct size of last layer. 

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = Net()

In [6]:

trainer = OpenMLTrainerModule(
    data_module=data_module,
    verbose = True,
    epoch_count = 1,
    # optimizer_gen = torch.optim.AdamW
)
openml_pytorch.config.trainer = trainer

In [7]:
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)

                                                 

train: [4.033145392033934, tensor(0.0423, device='mps:0')]
valid: [3.991187976097649, tensor(0.0470, device='mps:0')]
Loss tensor(3.9840, device='mps:0')


                                                 

train: [4.039602313653826, tensor(0.0372, device='mps:0')]
valid: [3.9921434515954894, tensor(0.0451, device='mps:0')]
Loss tensor(3.9850, device='mps:0')


In [8]:
run.predictions

Unnamed: 0,repeat,fold,sample,row_id,prediction,correct,confidence.airport_inside,confidence.artstudio,confidence.auditorium,confidence.bakery,...,confidence.stairscase,confidence.studiomusic,confidence.subway,confidence.toystore,confidence.trainstation,confidence.tv_studio,confidence.videostore,confidence.waitingroom,confidence.warehouse,confidence.winecellar
0,0,0,0,7200,kitchen,hairsalon,0.040039,0.008503,0.012137,0.023789,...,0.010141,0.006988,0.033145,0.021225,0.009748,0.009803,0.006998,0.009111,0.031846,0.020170
1,0,0,0,7119,kitchen,hairsalon,0.040039,0.008503,0.012137,0.023789,...,0.010141,0.006988,0.033145,0.021225,0.009748,0.009803,0.006998,0.009111,0.031846,0.020170
2,0,0,0,7117,kitchen,hairsalon,0.040039,0.008503,0.012137,0.023789,...,0.010141,0.006988,0.033145,0.021225,0.009748,0.009803,0.006998,0.009111,0.031846,0.020170
3,0,0,0,7211,kitchen,hairsalon,0.040039,0.008503,0.012137,0.023789,...,0.010141,0.006988,0.033145,0.021225,0.009748,0.009803,0.006998,0.009111,0.031846,0.020170
4,0,0,0,7196,kitchen,hairsalon,0.040039,0.008503,0.012137,0.023789,...,0.010141,0.006988,0.033145,0.021225,0.009748,0.009803,0.006998,0.009111,0.031846,0.020170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15615,0,2,0,4279,bedroom,church_inside,0.035417,0.009082,0.013963,0.023588,...,0.011744,0.007056,0.035572,0.020490,0.010174,0.010689,0.007410,0.009738,0.031315,0.017684
15616,0,2,0,4291,bedroom,church_inside,0.035417,0.009082,0.013963,0.023588,...,0.011744,0.007056,0.035572,0.020490,0.010174,0.010689,0.007410,0.009738,0.031315,0.017684
15617,0,2,0,4267,bedroom,church_inside,0.035417,0.009082,0.013963,0.023588,...,0.011744,0.007056,0.035572,0.020490,0.010174,0.010689,0.007410,0.009738,0.031315,0.017684
15618,0,2,0,4288,bedroom,church_inside,0.035417,0.009082,0.013963,0.023588,...,0.011744,0.007056,0.035572,0.020490,0.010174,0.010689,0.007410,0.009738,0.031315,0.017684


In [16]:
trainer

<Logger openml_pytorch.config (DEBUG)>

# Pretrained Image classification example

In [None]:
import torch.nn
import torch.optim

import openml
import openml_pytorch
import openml_pytorch.layers
import openml_pytorch.config
import logging
import warnings

# Suppress FutureWarning messages
warnings.simplefilter(action='ignore')

############################################################################
# Enable logging in order to observe the progress while running the example.
openml.config.logger.setLevel(logging.DEBUG)
openml_pytorch.config.logger.setLevel(logging.DEBUG)
############################################################################

############################################################################
import torch.nn as nn
import torch.nn.functional as F
from openml_pytorch.trainer import OpenMLTrainerModule

In [None]:
# Example model. You can do better :)
import torchvision.models as models

# Load the pre-trained ResNet model
model = models.resnet18(pretrained=True)

# Modify the last fully connected layer to the required number of classes
num_classes = 20
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)

# Optional: If you're fine-tuning, you may want to freeze the pre-trained layers
for param in model.parameters():
    param.requires_grad = False

# If you want to train the last layer only (the newly added layer)
for param in model.fc.parameters():
    param.requires_grad = True

In [None]:
from openml import OpenMLTask
def custom_optimizer_gen(model: torch.nn.Module, task: OpenMLTask) -> torch.optim.Optimizer:
    return torch.optim.Adam(model.fc.parameters())

data_module = OpenMLDataModule(
    type_of_data="image",
    file_dir=openml.config.get_cache_directory() + "/datasets/45923/Images/",
    # file_dir=openml.config.get_cache_directory()+'/datasets/44312/PNU_Micro/images/',
    # filename_col="FILE_NAME",
    filename_col="Filename",
    target_mode="categorical",
)


trainer = OpenMLTrainerModule(
    data_module=data_module,
    verbose = True,
    epoch_count = 1,
)
openml_pytorch.config.trainer = trainer

In [None]:
############################################################################
# Download the OpenML task for Indoor scene classification.
task = openml.tasks.get_task(362070)

############################################################################
# Run the model on the task (requires an API key).m
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)

Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.27s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.20s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.22s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.21s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.22s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.20s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.20s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.19s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.18s/it]
Epochs: 100%|██████████| 3/3 [00:03<00:00,  1.21s/it]


In [None]:
run

OpenML Run
Uploader Name...................: None
Metric..........................: None
Local Result - Accuracy (+- STD): 0.0000 +- 0.0000
Run ID..........................: None
Task ID.........................: 361152
Task Type.......................: None
Task URL........................: https://www.openml.org/t/361152
Flow ID.........................: None
Flow Name.......................: torch.nn.ResNet.2f74ee0621acaf36
Flow URL........................: None
Setup ID........................: None
Setup String....................: Python_3.11.9. Torch_2.4.1. NumPy_1.26.4. SciPy_1.14.1. ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stri

# Sequential classification

In [None]:

import torch.nn
import torch.optim

import openml
import openml_pytorch
import openml_pytorch.layers
import openml_pytorch.config
import logging


############################################################################
# Enable logging in order to observe the progress while running the example.
openml.config.logger.setLevel(logging.DEBUG)
openml_pytorch.config.logger.setLevel(logging.DEBUG)
############################################################################

In [None]:

############################################################################
# Define a sequential network that does the initial image reshaping
# and normalization model.
processing_net = torch.nn.Sequential(
    openml_pytorch.layers.Functional(function=torch.Tensor.reshape,
                                                shape=(-1, 1, 28, 28)),
    torch.nn.BatchNorm2d(num_features=1)
)
############################################################################

############################################################################
# Define a sequential network that does the extracts the features from the
# image.
features_net = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5),
    torch.nn.LeakyReLU(),
    torch.nn.MaxPool2d(kernel_size=2),
    torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5),
    torch.nn.LeakyReLU(),
    torch.nn.MaxPool2d(kernel_size=2),
)
############################################################################

############################################################################
# Define a sequential network that flattens the features and compiles the
# results into probabilities for each digit.
results_net = torch.nn.Sequential(
    openml_pytorch.layers.Functional(function=torch.Tensor.reshape,
                                                shape=(-1, 4 * 4 * 64)),
    torch.nn.Linear(in_features=4 * 4 * 64, out_features=256),
    torch.nn.LeakyReLU(),
    torch.nn.Dropout(),
    torch.nn.Linear(in_features=256, out_features=10),
)
############################################################################
# openml.config.apikey = 'key'

############################################################################
# The main network, composed of the above specified networks.
model = torch.nn.Sequential(
    processing_net,
    features_net,
    results_net
)
############################################################################


In [None]:
from openml_pytorch.trainer import OpenMLTrainerModule

trainer = OpenMLTrainerModule(
    # file_dir=openml.config.get_cache_directory()+'/datasets/44312/PNU_Micro/images/',
    file_dir=openml.config.get_cache_directory()+'/datasets/45923/Images/',
    # filename_col="FILE_NAME",
    filename_col="Filename",
    target_mode="categorical",
    verbose = True,
    epoch_count = 1,
)
openml_pytorch.config.trainer = trainer

In [None]:
# Download the OpenML task for the mnist 784 dataset.
task = openml.tasks.get_task(3573)

  exec(code_obj, self.user_global_ns, self.user_ns)
  dataset = get_dataset(task.dataset_id, *dataset_args, **get_dataset_kwargs)


In [None]:
task.get_dataset().get_data(task.target_name)[0]

  return datasets.get_dataset(self.dataset_id)


Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
69996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
69997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
69998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
run = openml.runs.run_model_on_task(model, task, avoid_duplicate_runs=False)

  return datasets.get_dataset(self.dataset_id)
  openml.datasets.get_dataset(task.dataset_id).name,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, "encoded_labels"] = label_encoder.transform(y)


KeyError: "['Filename'] not in index"