Skip to content

Commit

Permalink
Add a configuration using xlm roberta and vit b/32 for multilingual c…
Browse files Browse the repository at this point in the history
…lip (#216)

* Add a configuration using xlm roberta and vit b/32 for multilingual clip

https://huggingface.co/docs/transformers/model_doc/xlm-roberta

* Update xlm-roberta-base-ViT-B-32.json

* Update hf_configs.py

* Update test_hf_model.py

* Update test_hf_model.py

* Update hf_configs.py

* Update xlm-roberta-base-ViT-B-32.json
  • Loading branch information
rom1504 committed Nov 10, 2022
1 parent 9e76329 commit bb924a8
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
11 changes: 11 additions & 0 deletions src/open_clip/hf_configs.py
Expand Up @@ -9,5 +9,16 @@
"layers": "num_hidden_layers",
},
"pooler": "mean_pooler",
},
# https://huggingface.co/docs/transformers/model_doc/xlm-roberta#transformers.XLMRobertaConfig
"xlm-roberta": {
"config_names": {
"context_length": "max_position_embeddings",
"vocab_size": "vocab_size",
"width": "hidden_size",
"heads": "num_attention_heads",
"layers": "num_hidden_layers",
},
"pooler": "mean_pooler",
}
}
15 changes: 15 additions & 0 deletions src/open_clip/model_configs/xlm-roberta-base-ViT-B-32.json
@@ -0,0 +1,15 @@
{
"embed_dim": 512,
"vision_cfg": {
"image_size": 224,
"layers": 12,
"width": 768,
"patch_size": 32
},
"text_cfg": {
"hf_model_name": "xlm-roberta-base",
"hf_tokenizer_name": "xlm-roberta-base",
"proj": "mlp",
"pooler_type": "mean_pooler"
}
}
2 changes: 1 addition & 1 deletion tests/test_hf_model.py
Expand Up @@ -17,7 +17,7 @@ def test_poolers():
assert res.shape == (bs, d), f"{name} returned wrong shape"

# test PreTrainedTextEncoder
@pytest.mark.parametrize("model_id", ["arampacha/roberta-tiny", "roberta-base"])
@pytest.mark.parametrize("model_id", ["arampacha/roberta-tiny", "roberta-base", "xlm-roberta-base"])
def test_pretrained_text_encoder(model_id):
bs, sl, d = 2, 10, 64
cfg = AutoConfig.from_pretrained(model_id)
Expand Down

0 comments on commit bb924a8

Please sign in to comment.