# Settings

In [1]:
# Install packages
!pip install optuna
!pip install torcheval
!pip install ipdb
!pip install pydicom
!pip install segment_anything
!pip install scikit-image
!pip install -U kaleido

# Import classes
from TrainUtils.OptunaParamFinder import OptunaParamFinder
from DataUtils.XrayDataset import XrayDataset
from Enumerators.NetType import NetType

# Initialize the file system
import s3fs
s3 = s3fs.S3FileSystem()

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet!=0.4.17 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
Downloading alembic-1.14.0-py3-none-any.whl (233 kB)
Downloading SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m35.5 MB/s

Matplotlib is building the font cache; this may take a moment.
Downloading: "https://download.pytorch.org/models/vit_b_32-d86f8d99.pth" to /home/ec2-user/.cache/torch/hub/checkpoints/vit_b_32-d86f8d99.pth
Downloading: "https://download.pytorch.org/models/resnext50_32x4d-1a0047aa.pth" to /home/ec2-user/.cache/torch/hub/checkpoints/resnext50_32x4d-1a0047aa.pth
Downloading: "https://download.pytorch.org/models/resnext101_64x4d-173b62eb.pth" to /home/ec2-user/.cache/torch/hub/checkpoints/resnext101_64x4d-173b62eb.pth


# Initialize data

In [2]:
# Define variables
working_dir1 = "s3://dd-s-matteo-dev-resources/"

# Load data
train_data1 = XrayDataset.load_dataset(working_dir=working_dir1, dataset_name="xray_dataset_training", s3=s3)
val_data1 = XrayDataset.load_dataset(working_dir=working_dir1, dataset_name="xray_dataset_validation", s3=s3)
test_data1 = XrayDataset.load_dataset(working_dir=working_dir1, dataset_name="xray_dataset_test", s3=s3)

The dataset xray_dataset_training have been loaded!
The dataset xray_dataset_validation have been loaded!
The dataset xray_dataset_test have been loaded!


# Optimize parameters

In [4]:
# Define variables
model_name1 = "resnext101_optuna"
net_type1 = NetType.BASE_RES_NEXT101
epochs1 = 50
val_epochs1 = 10
use_cuda1 = True

# Define Optuna model
n_trials1 = 5
optuna1 = OptunaParamFinder(model_name=model_name1, working_dir=working_dir1, train_data=train_data1,
                            val_data=val_data1, test_data=test_data1, net_type=net_type1, epochs=epochs1,
                            val_epochs=val_epochs1, use_cuda=use_cuda1, n_trials=n_trials1, s3=s3)
# Run search
optuna1.initialize_study()

# Evaluate study
print()
optuna1.analyze_study()

[I 2025-01-03 14:56:36,516] A new study created in memory with name: no-name-bf672849-a02d-457c-86e4-050042ead531


-------------------------------------------------------------------------------------------------------
Parameters: {'n_conv_segment_neurons': 2048, 'n_conv_view_neurons': 512, 'n_conv_segment_layers': 1, 'n_conv_view_layers': 1, 'kernel_size': 3, 'n_fc_layers': 2, 'optimizer': 'RMSprop', 'lr_last': 1e-05, 'lr_second_last_factor': 31, 'batch_size': 16, 'p_dropout': 0.6, 'use_batch_norm': True}
Validation convergence has been reached sooner...
'resnext101_optuna' has been successfully saved!... train loss: 0.215 -> 0.0555

Training loss = 45.67308 - Training accuracy = 53.2786906% - Training F1-score = 58.874%
Performance for TRAINING set:
 - Accuracy: 53.28%
 - LOSS: 45.67
 - F1-score: 58.87%
 - AUC: 64.29%
 - MCC: 0.34



[I 2025-01-03 15:59:49,191] Trial 50 finished with value: 0.5618 and parameters: {'n_conv_segment_neurons': 11, 'n_conv_view_neurons': 9, 'n_conv_segment_layers': 1, 'n_conv_view_layers': 1, 'kernel_size': 3, 'n_fc_layers': 2, 'optimizer': 'RMSprop', 'lr_last': 5, 'lr_second_last_factor': 31, 'batch_size': 4, 'p_drop': 6, 'use_batch_norm': True}. Best is trial 14 with value: 0.60241.


Validation loss = 40.625 - Validation accuracy = 51.8518507% - Validation F1-score = 56.18
Performance for VALIDATION set:
 - Accuracy: 51.85%
 - LOSS: 40.62
 - F1-score: 56.18%
 - AUC: 63.53%
 - MCC: 0.3
-------------------------------------------------------------------------------------------------------
Parameters: {'n_conv_segment_neurons': 1024, 'n_conv_view_neurons': 512, 'n_conv_segment_layers': 1, 'n_conv_view_layers': 1, 'kernel_size': 5, 'n_fc_layers': 2, 'optimizer': 'RMSprop', 'lr_last': 1e-05, 'lr_second_last_factor': 11, 'batch_size': 32, 'p_dropout': 0.4, 'use_batch_norm': True}


[I 2025-01-03 16:16:49,056] Trial 51 pruned. 


-------------------------------------------------------------------------------------------------------
Parameters: {'n_conv_segment_neurons': 512, 'n_conv_view_neurons': 512, 'n_conv_segment_layers': 1, 'n_conv_view_layers': 1, 'kernel_size': 5, 'n_fc_layers': 2, 'optimizer': 'RMSprop', 'lr_last': 1e-05, 'lr_second_last_factor': 11, 'batch_size': 32, 'p_dropout': 0.4, 'use_batch_norm': True}
Validation convergence has been reached sooner...
'resnext101_optuna' has been successfully saved!... train loss: 0.2839 -> 0.0432

Training loss = 49.21875 - Training accuracy = 53.2786906% - Training F1-score = 58.874%
Performance for TRAINING set:
 - Accuracy: 53.28%
 - LOSS: 49.22
 - F1-score: 58.87%
 - AUC: 64.29%
 - MCC: 0.34

Validation loss = 47.97794 - Validation accuracy = 51.8518507% - Validation F1-score = 56.18
Performance for VALIDATION set:
 - Accuracy: 51.85%
 - LOSS: 47.98
 - F1-score: 56.18%
 - AUC: 63.53%
 - MCC: 0.3


[I 2025-01-03 17:05:10,448] Trial 52 finished with value: 0.5618 and parameters: {'n_conv_segment_neurons': 9, 'n_conv_view_neurons': 9, 'n_conv_segment_layers': 1, 'n_conv_view_layers': 1, 'kernel_size': 5, 'n_fc_layers': 2, 'optimizer': 'RMSprop', 'lr_last': 5, 'lr_second_last_factor': 11, 'batch_size': 5, 'p_drop': 4, 'use_batch_norm': True}. Best is trial 14 with value: 0.60241.


-------------------------------------------------------------------------------------------------------
Parameters: {'n_conv_segment_neurons': 1024, 'n_conv_view_neurons': 512, 'n_conv_segment_layers': 1, 'n_conv_view_layers': 1, 'kernel_size': 5, 'n_fc_layers': 2, 'optimizer': 'RMSprop', 'lr_last': 1e-05, 'lr_second_last_factor': 11, 'batch_size': 32, 'p_dropout': 0.4, 'use_batch_norm': True}


[I 2025-01-03 17:06:54,445] Trial 53 pruned. 


-------------------------------------------------------------------------------------------------------
Parameters: {'n_conv_segment_neurons': 1024, 'n_conv_view_neurons': 512, 'n_conv_segment_layers': 1, 'n_conv_view_layers': 1, 'kernel_size': 5, 'n_fc_layers': 2, 'optimizer': 'RMSprop', 'lr_last': 1e-05, 'lr_second_last_factor': 1, 'batch_size': 32, 'p_dropout': 0.4, 'use_batch_norm': True}


[I 2025-01-03 17:08:40,013] Trial 54 pruned. 



Storing study...
Study stored!
Best study:
batch_size: 4
kernel_size: 3
lr_last: 4
lr_second_last_factor: 1
n_conv_segment_layers: 1
n_conv_segment_neurons: 11
n_conv_view_layers: 1
n_conv_view_neurons: 9
n_fc_layers: 2
optimizer: RMSprop
p_drop: 6
use_batch_norm: True
