Skip to content

Commit

Permalink
Fix concurrent attempts at link deployment (#1541)
Browse files Browse the repository at this point in the history
* fix concurrent attempts at link deployment

* added test

* added test
  • Loading branch information
hellt committed Aug 20, 2023
1 parent b5dda3d commit 3268306
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 9 deletions.
4 changes: 4 additions & 0 deletions docs/rn/0.44.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,7 @@ Read more about this feature in the [Certificates Management](../manual/cert.md#

* fix: TCP sessions on srlinux were not establishing due to missing disabled tx offload #1537
* fix: links to ovs bridges were blocked #1534

### 0.44.2

* fixing concurrent attempt to link deployment #1541
8 changes: 3 additions & 5 deletions links/link_veth.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ type LinkVEth struct {
Endpoints []Endpoint

deploymentState LinkDeploymentState
stateMutex sync.RWMutex
deployMutex sync.Mutex
}

func (*LinkVEth) GetType() LinkType {
Expand All @@ -100,11 +100,11 @@ func (*LinkVEth) Verify() {
func (l *LinkVEth) Deploy(ctx context.Context) error {
// since each node calls deploy on its links, we need to make sure that we only deploy
// the link once, even if multiple nodes call deploy on the same link.
l.stateMutex.RLock()
l.deployMutex.Lock()
defer l.deployMutex.Unlock()
if l.deploymentState == LinkDeploymentStateDeployed {
return nil
}
l.stateMutex.RUnlock()

for _, ep := range l.GetEndpoints() {
if ep.GetNode().GetState() != state.Deployed {
Expand Down Expand Up @@ -157,9 +157,7 @@ func (l *LinkVEth) Deploy(ctx context.Context) error {
}
}

l.stateMutex.Lock()
l.deploymentState = LinkDeploymentStateDeployed
l.stateMutex.Unlock()

return nil
}
Expand Down
23 changes: 19 additions & 4 deletions tests/01-smoke/04-generate.robot
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
*** Settings ***
Library OperatingSystem
Resource ../common.robot

Suite Teardown Cleanup
Library OperatingSystem
Resource ../common.robot


*** Variables ***
Expand All @@ -29,9 +27,26 @@ Verify nodes
Should Contain ${output} clab-${lab-name}-node2-1
Should Contain ${output} clab-${lab-name}-node3-1

Cleanup ${lab-name}

Deploy ${lab-name}-scale lab with generate command
[Documentation] Deploy 3-tier lab with 5 nodes in each tier. Tiers are interconnected with links.
... This test verifies that scaled topology can be deployed without concurrent errors.
Skip If '${runtime}' != 'docker'
${rc} ${output} = Run And Return Rc And Output
... sudo -E ${CLAB_BIN} --runtime ${runtime} generate --name ${lab-name}-scale --kind linux --image alpine:3 --nodes 5,5,5 --deploy
Log ${output}
Should Be Equal As Integers ${rc} 0
Should Not Contain ${output} failed
Should Not Contain ${output} ERRO

Cleanup ${lab-name}-scale


*** Keywords ***
Cleanup
[Arguments] ${lab-name}

Skip If '${runtime}' != 'docker'
${rc} ${output} = Run And Return Rc And Output
... sudo -E ${CLAB_BIN} --runtime ${runtime} destroy -t ${lab-name}.clab.yml --cleanup
Expand Down

0 comments on commit 3268306

Please sign in to comment.