Skip to content

Commit

Permalink
Revert "Adds rule of thumb for determining embeddings size (#2050)" (#…
Browse files Browse the repository at this point in the history
…2069)

This reverts commit 37db773.
  • Loading branch information
justinxzhao committed May 29, 2022
1 parent c92d891 commit a36fbb2
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 13 deletions.
2 changes: 1 addition & 1 deletion ludwig/encoders/category_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class CategoricalEmbedEncoder(Encoder):
def __init__(
self,
vocab: List[str],
embedding_size: Optional[int] = None,
embedding_size: int = 50,
embeddings_trainable: bool = True,
pretrained_embeddings: Optional[str] = None,
embeddings_on_cpu: bool = False,
Expand Down
18 changes: 6 additions & 12 deletions ludwig/modules/embedding_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

def embedding_matrix(
vocab: List[str],
embedding_size: Optional[int] = None,
embedding_size: int,
representation: str = "dense",
embeddings_trainable: bool = True,
pretrained_embeddings: Optional[str] = None,
Expand All @@ -43,10 +43,7 @@ def embedding_matrix(
if representation == "dense":
if pretrained_embeddings:
embeddings_matrix = load_pretrained_embeddings(pretrained_embeddings, vocab)
if embedding_size is None:
embedding_size = embeddings_matrix.shape[-1]
logger.info(f"Setting embedding size to be equal to {embeddings_matrix.shape[-1]}.")
elif embeddings_matrix.shape[-1] != embedding_size:
if embeddings_matrix.shape[-1] != embedding_size:
if not force_embedding_size:
embedding_size = embeddings_matrix.shape[-1]
logger.info(f"Setting embedding size to be equal to {embeddings_matrix.shape[-1]}.")
Expand All @@ -60,10 +57,7 @@ def embedding_matrix(
embedding_initializer_obj = torch.tensor(embeddings_matrix, dtype=torch.float32)

else:
if embedding_size is None:
# use embedding size rule of thumb
embedding_size = min(512, int(round(1.6 * vocab_size**0.56)))
elif vocab_size < embedding_size and not force_embedding_size:
if vocab_size < embedding_size and not force_embedding_size:
logger.info(
f" embedding_size ({embedding_size}) is greater than "
f"vocab_size ({vocab_size}). Setting embedding size to be "
Expand Down Expand Up @@ -92,7 +86,7 @@ def embedding_matrix(

def embedding_matrix_on_device(
vocab: List[str],
embedding_size: Optional[int] = None,
embedding_size: int,
representation: str = "dense",
embeddings_trainable: bool = True,
pretrained_embeddings: Optional[str] = None,
Expand All @@ -102,7 +96,7 @@ def embedding_matrix_on_device(
) -> Tuple[nn.Module, int]:
embeddings, embedding_size = embedding_matrix(
vocab,
embedding_size=embedding_size,
embedding_size,
representation=representation,
embeddings_trainable=embeddings_trainable,
pretrained_embeddings=pretrained_embeddings,
Expand All @@ -123,7 +117,7 @@ class Embed(LudwigModule):
def __init__(
self,
vocab: List[str],
embedding_size: Optional[int] = None,
embedding_size: int,
representation: str = "dense",
embeddings_trainable: bool = True,
pretrained_embeddings: Optional[str] = None,
Expand Down

0 comments on commit a36fbb2

Please sign in to comment.