Add HF BERT configs for Microsoft PubMed CLIP model (#491)

* Add bert HF config * Add ClsLastHiddenStatePooler to avoid needing to override ClsPooler bool arg * Update pooler comment for clarity
mlfoundations · Apr 16, 2023 · ff2df73 · ff2df73
1 parent 37b729b
commit ff2df73
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 0 deletions.
diff --git a/src/open_clip/hf_configs.py b/src/open_clip/hf_configs.py
@@ -42,4 +42,15 @@
         },
         "pooler": "mean_pooler",
     },
+    # https://huggingface.co/docs/transformers/model_doc/bert
+    "bert": {
+        "config_names": {
+            "context_length": "max_position_embeddings",
+            "vocab_size": "vocab_size",
+            "width": "hidden_size",
+            "heads": "num_attention_heads",
+            "layers": "num_hidden_layers",
+        },
+        "pooler": "cls_pooler",
+    },
 }
diff --git a/src/open_clip/hf_model.py b/src/open_clip/hf_model.py
@@ -80,6 +80,20 @@ def forward(self, x: BaseModelOutput, attention_mask: TensorType):
         return x.last_hidden_state[:, self.cls_token_position, :]
 
 
+@register_pooler
+class ClsLastHiddenStatePooler(nn.Module):
+    """CLS token pooling
+    NOTE: this is equivalent to ClsPooler above with use_pooler_output=False
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.cls_token_position = 0
+
+    def forward(self, x: BaseModelOutput, attention_mask: TensorType):
+        return x.last_hidden_state[:, self.cls_token_position, :]
+
+
 class HFTextEncoder(nn.Module):
     """HuggingFace model adapter"""
     output_tokens: torch.jit.Final[bool]