Skip to content

Commit

Permalink
receiving reports of instability when scaling up with the SiLU based …
Browse files Browse the repository at this point in the history
…value gates. use the sigmoid gating from alphafold2 instead
  • Loading branch information
lucidrains committed May 10, 2024
1 parent 33b8a32 commit fa2773a
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
6 changes: 3 additions & 3 deletions iTransformer/iTransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ def __init__(
)

self.to_v_gates = nn.Sequential(
nn.Linear(dim, dim_inner, bias = False),
nn.SiLU(),
Rearrange('b n (h d) -> b h n d', h = heads)
nn.Linear(dim, heads, bias = False),
nn.Sigmoid(),
Rearrange('b n h -> b h n 1', h = heads)
)

self.attend = Attend(flash = flash, dropout = dropout)
Expand Down
6 changes: 3 additions & 3 deletions iTransformer/iTransformer2D.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ def __init__(
)

self.to_v_gates = nn.Sequential(
nn.Linear(dim, dim_inner, bias = False),
nn.SiLU(),
Rearrange('b n (h d) -> b h n d', h = heads)
nn.Linear(dim, heads, bias = False),
nn.Sigmoid(),
Rearrange('b n h -> b h n 1', h = heads)
)

self.attend = Attend(flash = flash, dropout = dropout, causal = causal)
Expand Down
6 changes: 3 additions & 3 deletions iTransformer/iTransformerFFT.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ def __init__(
)

self.to_v_gates = nn.Sequential(
nn.Linear(dim, dim_inner, bias = False),
nn.SiLU(),
Rearrange('b n (h d) -> b h n d', h = heads)
nn.Linear(dim, heads, bias = False),
nn.Sigmoid(),
Rearrange('b n h -> b h n 1', h = heads)
)

self.attend = Attend(flash = flash, dropout = dropout)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'iTransformer',
packages = find_packages(exclude=[]),
version = '0.5.5',
version = '0.6.0',
license='MIT',
description = 'iTransformer - Inverted Transformer Are Effective for Time Series Forecasting',
author = 'Phil Wang',
Expand Down

0 comments on commit fa2773a

Please sign in to comment.