Make text encoder trainable property default to False.

ludwig-ai · May 25, 2022 · e39d6d1 · e39d6d1
1 parent 1218de3
commit e39d6d1
Showing 1 changed file with 16 additions and 17 deletions.
diff --git a/ludwig/encoders/text_encoders.py b/ludwig/encoders/text_encoders.py
@@ -45,7 +45,7 @@ def __init__(
         use_pretrained: bool = True,
         pretrained_model_name_or_path: str = "albert-base-v2",
         saved_weights_in_checkpoint: bool = False,
-        trainable: bool = True,
+        trainable: bool = False,
         reduce_output: str = "cls_pooled",
         vocab_size: int = 30000,
         embedding_size: int = 128,
@@ -173,7 +173,7 @@ def __init__(
         use_pretrained: bool = True,
         pretrained_model_name_or_path: str = "google/mt5-base",
         saved_weights_in_checkpoint: bool = False,
-        trainable: bool = True,
+        trainable: bool = False,
         reduce_output: str = "cls_pooled",
         vocab_size: int = 250112,
         d_model: int = 512,
@@ -299,7 +299,7 @@ def __init__(
         pretrained_model_name_or_path: str = "xlm-roberta-base",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: str = "cls_pooled",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = None,
         pad_token_id: int = 1,
         bos_token_id: int = 0,
@@ -396,7 +396,7 @@ def __init__(
         use_pretrained: bool = True,
         pretrained_model_name_or_path: str = "bert-base-uncased",
         saved_weights_in_checkpoint: bool = False,
-        trainable: bool = True,
+        trainable: bool = False,
         reduce_output: str = "cls_pooled",
         vocab_size: int = 30522,
         hidden_size: int = 768,
@@ -519,7 +519,7 @@ def __init__(
         use_pretrained: bool = True,
         pretrained_model_name_or_path: str = "xlm-mlm-en-2048",
         saved_weights_in_checkpoint: bool = False,
-        trainable: bool = True,
+        trainable: bool = False,
         reduce_output: str = "cls_pooled",
         vocab_size: int = 30145,
         emb_dim: int = 2048,
@@ -659,7 +659,7 @@ def __init__(
         use_pretrained: bool = True,
         pretrained_model_name_or_path: str = "openai-gpt",
         saved_weights_in_checkpoint: bool = False,
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = 30522,
         n_positions: int = 40478,
         n_ctx: int = 512,
@@ -759,7 +759,7 @@ def __init__(
         use_pretrained: bool = True,
         pretrained_model_name_or_path: str = "gpt2",
         reduce_output: str = "sum",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = 50257,
         n_positions: int = 1024,
         n_ctx: int = 1024,
@@ -864,7 +864,7 @@ def __init__(
         pretrained_model_name_or_path: str = "roberta-base",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: str = "cls_pooled",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = None,
         pad_token_id: int = 1,
         bos_token_id: int = 0,
@@ -900,7 +900,6 @@ def __init__(
         self.reduce_output = reduce_output
         if not self.reduce_output == "cls_pooled":
             self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
-        self.transformer.trainable = trainable
         self.transformer.resize_token_embeddings(vocab_size)
 
     def forward(self, inputs: torch.Tensor, mask: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
@@ -951,7 +950,7 @@ def __init__(
         pretrained_model_name_or_path: str = "transfo-xl-wt103",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: str = "sum",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = 267735,
         cutoffs: List[int] = [20000, 40000, 200000],
         d_model: int = 1024,
@@ -1074,7 +1073,7 @@ def __init__(
         pretrained_model_name_or_path: str = "xlnet-base-cased",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: str = "sum",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = 32000,
         d_model: int = 1024,
         n_layer: int = 24,
@@ -1411,7 +1410,7 @@ def __init__(
         pretrained_model_name_or_path: str = "ctrl",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: str = "cls-pooled",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = 30522,
         hidden_size: int = 768,
         num_hidden_layers: int = 12,
@@ -1531,7 +1530,7 @@ def __init__(
         pretrained_model_name_or_path: str = "t5-small",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: str = "sum",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = 32128,
         d_model: int = 512,
         d_kv: int = 64,
@@ -1638,7 +1637,7 @@ def __init__(
         pretrained_model_name_or_path: str = "flaubert/flaubert_small_cased",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: str = "sum",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = 30145,
         pre_norm: bool = False,
         layerdrop: float = 0.0,
@@ -1773,7 +1772,7 @@ def __init__(
         pretrained_model_name_or_path: str = "google/electra-small-discriminator",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: str = "sum",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = 30522,
         embedding_size: int = 128,
         hidden_size: int = 256,
@@ -1888,7 +1887,7 @@ def __init__(
         pretrained_model_name_or_path: str = "allenai/longformer-base-4096",
         saved_weights_in_checkpoint: bool = False,
         reduce_output: Optional[str] = "cls_pooled",
-        trainable: bool = True,
+        trainable: bool = False,
         num_tokens: Optional[int] = None,
         pretrained_kwargs: Dict = None,
         **kwargs
@@ -1968,7 +1967,7 @@ def __init__(
         pretrained_model_name_or_path: str,
         max_sequence_length: int,
         reduce_output: str = "sum",
-        trainable: bool = True,
+        trainable: bool = False,
         vocab_size: int = None,
         pretrained_kwargs: Dict = None,
         **kwargs