mwalmsley · mwalmsley · Jul 22, 2023 · Mar 29, 2023 · Mar 29, 2023 · Mar 30, 2023
diff --git a/README.md b/README.md
@@ -6,6 +6,7 @@
 ![publish](https://github.com/mwalmsley/zoobot/actions/workflows/python-publish.yml/badge.svg)
 [![PyPI](https://badge.fury.io/py/zoobot.svg)](https://badge.fury.io/py/zoobot)
 [![DOI](https://zenodo.org/badge/343787617.svg)](https://zenodo.org/badge/latestdoi/343787617)
+[![status](https://joss.theoj.org/papers/447561ee2de4709eddb704e18bee846f/status.svg)](https://joss.theoj.org/papers/447561ee2de4709eddb704e18bee846f)
 <a href="https://ascl.net/2203.027"><img src="https://img.shields.io/badge/ascl-2203.027-blue.svg?colorB=262255" alt="ascl:2203.027" /></a>
 
 Zoobot classifies galaxy morphology with deep learning.

diff --git a/benchmarks/pytorch/run_benchmarks.sh b/benchmarks/pytorch/run_benchmarks.sh
@@ -14,20 +14,22 @@ SEED=$RANDOM
 
 # GZ Evo i.e. all galaxies
 # effnet, greyscale and color
-sbatch --job-name=evo_py_gr_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+# sbatch --job-name=evo_py_gr_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # sbatch --job-name=evo_py_gr_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-sbatch --job-name=evo_py_co_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-# sbatch --job-name=evo_py_co_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+# sbatch --job-name=evo_py_co_eff_224_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB
+sbatch --job-name=evo_py_co_eff_300_$SEED --export=ARCHITECTURE=efficientnet_b0,BATCH_SIZE=128,RESIZE_AFTER_CROP=300,DATASET=gz_evo,COLOR_STRING=--color,GPUS=2,SEED=$SEED $TRAIN_JOB
 # and resnet18
 # sbatch --job-name=evo_py_gr_res18_224_$SEED --export=ARCHITECTURE=resnet18,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # sbatch --job-name=evo_py_gr_res18_300_$SEED --export=ARCHITECTURE=resnet18,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # and resnet50
 # sbatch --job-name=evo_py_gr_res50_224_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # sbatch --job-name=evo_py_gr_res50_300_$SEED --export=ARCHITECTURE=resnet50,BATCH_SIZE=256,RESIZE_AFTER_CROP=300,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # and with max-vit tiny because hey transformers are cool
+
 # smaller batch size due to memory
 # sbatch --job-name=evo_py_gr_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
-sbatch --job-name=evo_py_co_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+# sbatch --job-name=evo_py_co_vittiny_224_$SEED --export=ARCHITECTURE=maxvit_tiny_224,BATCH_SIZE=128,RESIZE_AFTER_CROP=224,DATASET=gz_evo,COLOR_STRING=--color,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
+
 # and max-vit small (works badly)
 # sbatch --job-name=evo_py_gr_vitsmall_224_$SEED --export=ARCHITECTURE=maxvit_small_224,BATCH_SIZE=64,RESIZE_AFTER_CROP=224,DATASET=gz_evo,MIXED_PRECISION_STRING=--mixed-precision,GPUS=2,SEED=$SEED $TRAIN_JOB
 # and convnext (works badly)

diff --git a/docs/data_notes.rst b/docs/data_notes.rst
@@ -33,7 +33,7 @@ Zoobot includes weights for the following pretrained models.
      - 224px
      - 3
      - Yes
-     - WIP
+     - `Link <https://www.dropbox.com/s/19mooltlnofnj10crd88v/effnetb0_color_224px.ckpt?dl=0>`__
    * - ResNet50 
      - 300px
      - 1
@@ -59,6 +59,11 @@ Zoobot includes weights for the following pretrained models.
      - 1
      - Not yet
      - `Link <https://www.dropbox.com/s/pndcgi6wxh9wuqb/maxvittiny_greyscale_224px.ckpt?dl=0>`__
+   * - Max-ViT Tiny
+     - 224px
+     - 3
+     - Not yet
+     - `Link <https://www.dropbox.com/s/ibuo5n1tcaphvn3/maxvittiny_color_224px.ckpt?dl=0>`__
 
 
 

diff --git a/zoobot/pytorch/estimators/define_model.py b/zoobot/pytorch/estimators/define_model.py
@@ -185,6 +185,8 @@ def __init__(
         # bit lazy assuming 224 input size
         self.encoder_dim = get_encoder_dim(self.encoder, input_size=224, channels=channels)
         # typically encoder_dim=1280 for effnetb0
+        logging.info('encoder dim: {}'.format(self.encoder_dim))
+
 
         self.head = get_pytorch_dirichlet_head(
             self.encoder_dim,

diff --git a/zoobot/pytorch/training/finetune.py b/zoobot/pytorch/training/finetune.py
@@ -121,12 +121,13 @@ def configure_optimizers(self):
 
         if self.freeze:
             params = self.head.parameters()
-            return torch.optim.AdamW(params, lr=self.learning_rate)
+            return torch.optim.AdamW(params, betas=(0.9, 0.999), lr=self.learning_rate)
         else:
             lr = self.learning_rate
             params = [{"params": self.head.parameters(), "lr": lr}]
 
             # this bit is specific to Zoobot EffNet
+            # TODO check these are blocks not individual layers
             encoder_blocks = list(self.encoder.children())
 
             # for n, l in enumerate(encoder_blocks):
@@ -135,6 +136,7 @@ def configure_optimizers(self):
             #     print(l)
 
             # layers with no parameters don't count
+            # TODO double-check is_tuneable
             tuneable_blocks = [b for b in encoder_blocks if is_tuneable(b)]
 
             assert self.n_layers <= len(
@@ -252,6 +254,7 @@ def __init__(
             self,
             num_classes: int,
             label_smoothing=0.,
+            class_weights=None,
             **super_kwargs) -> None:
 
         super().__init__(**super_kwargs)
@@ -264,6 +267,7 @@ def __init__(
         )
         self.label_smoothing = label_smoothing
         self.loss = partial(cross_entropy_loss,
+                            weight=class_weights,
                             label_smoothing=self.label_smoothing)
         self.train_acc = tm.Accuracy(task='binary', average="micro")
         self.val_acc = tm.Accuracy(task='binary', average="micro")
@@ -385,12 +389,12 @@ def forward(self, x):
         return x
 
 
-def cross_entropy_loss(y_pred, y, label_smoothing=0.):
+def cross_entropy_loss(y_pred, y, label_smoothing=0., weight=None):
     # y should be shape (batch) and ints
     # y_pred should be shape (batch, classes)
     # returns loss of shape (batch)
     # will reduce myself
-    return F.cross_entropy(y_pred, y.long(), label_smoothing=label_smoothing, reduction='none')
+    return F.cross_entropy(y_pred, y.long(), label_smoothing=label_smoothing, weight=weight, reduction='none')
 
 
 def dirichlet_loss(y_pred, y, question_index_groups):
@@ -481,7 +485,7 @@ def get_trainer(
 
     return trainer
 
-
+# TODO check exactly which layers get FTd
 def is_tuneable(block_of_layers):
     if len(list(block_of_layers.parameters())) == 0:
         logging.info('Skipping block with no params')

diff --git a/zoobot/shared/load_predictions.py b/zoobot/shared/load_predictions.py
@@ -40,23 +40,28 @@ def load_hdf5s(hdf5_locs: List):
     prediction_metadata = []
     template_label_cols = None  # will use this var to check consistency of label_cols across each hdf5_loc
     for loc in hdf5_locs:
-        with h5py.File(loc, 'r') as f:
-            logging.debug(f.keys())
-            these_predictions = f['predictions'][:]
-            these_prediction_metadata = {
-                'id_str': f['id_str'].asstr()[:],
-                'hdf5_loc': [os.path.basename(loc) for _ in these_predictions]
-        }
-            predictions.append(these_predictions)  # will create a list where each element is 3D predictions stored in each hdf5
-            prediction_metadata.append(these_prediction_metadata)  # also track id_str, similarly
-
-            if template_label_cols is None:  # first file to load, use this as the expected template
-                template_label_cols = f['label_cols'].asstr()[:]
-                logging.info('Using label columns {} from first hdf5 {}'.format(template_label_cols, loc))
-            else:
-                these_label_cols = f['label_cols'].asstr()[:]
-                if any(these_label_cols != template_label_cols):
-                    raise ValueError('Label columns {} of hdf5 {} do not match first label columns {}'.format(loc, f['label_cols'], template_label_cols))
+        try:
+            with h5py.File(loc, 'r') as f:
+
+                    logging.debug(f.keys())
+                    these_predictions = f['predictions'][:]
+                    these_prediction_metadata = {
+                        'id_str': f['id_str'].asstr()[:],
+                        'hdf5_loc': [os.path.basename(loc) for _ in these_predictions]
+                }
+                    predictions.append(these_predictions)  # will create a list where each element is 3D predictions stored in each hdf5
+                    prediction_metadata.append(these_prediction_metadata)  # also track id_str, similarly
+
+                    if template_label_cols is None:  # first file to load, use this as the expected template
+                        template_label_cols = f['label_cols'].asstr()[:]
+                        logging.info('Using label columns {} from first hdf5 {}'.format(template_label_cols, loc))
+                    else:
+                        these_label_cols = f['label_cols'].asstr()[:]
+                        if any(these_label_cols != template_label_cols):
+                            raise ValueError('Label columns {} of hdf5 {} do not match first label columns {}'.format(loc, f['label_cols'], template_label_cols))
+        except Exception as e:
+            logging.critical('Failed to load {}'.format(loc))
+            raise e
 
 
     # there is no assumption that id_str is unique, or attempt to group predictions by id_str