@@ -31,6 +31,7 @@ public sealed class BgpSession : IDisposable
3131 private ushort _negotiatedHoldTime ;
3232 private List < IpPrefix > _advertisedPrefixes = [ ] ;
3333 private TimeSpan _keepAliveInterval ;
34+ private long _lastReceivedTicks ; // UTC ticks of last received message; drives the HoldTimer (Interlocked)
3435
3536 public BgpFsmState State => _state ;
3637 public PeerConfig Peer => _peerConfig ;
@@ -59,24 +60,25 @@ public async Task RefreshRoutesAsync()
5960
6061 private async Task WithdrawAllAsync ( )
6162 {
62- if ( _advertisedPrefixes . Count == 0 ) return ;
63+ var count = _advertisedPrefixes . Count ;
64+ if ( count == 0 ) return ;
6365
6466 const int maxPerUpdate = 100 ;
65- for ( var i = 0 ; i < _advertisedPrefixes . Count ; i += maxPerUpdate )
67+ for ( var i = 0 ; i < count ; i += maxPerUpdate )
6668 {
67- var batch = _advertisedPrefixes . Skip ( i ) . Take ( maxPerUpdate ) . ToList ( ) ;
69+ var batch = _advertisedPrefixes . GetRange ( i , Math . Min ( maxPerUpdate , count - i ) ) ;
6870 var update = new BgpUpdateMessage
6971 {
7072 WithdrawnRoutes = batch ,
7173 PathAttributes = [ ] ,
7274 Nlri = [ ]
7375 } ;
74- await SendMessageAsync ( update ) ;
76+ await WriteMessageAsync ( update ) ; // caller (initial send / refresh) already holds _sendLock
7577 _metrics . UpdateSent ( ) ;
7678 }
7779
78- _logger . LogInformation ( "Withdrawn {Count} routes from {Peer}" , _advertisedPrefixes . Count , _peerConfig . Address ) ;
79- _advertisedPrefixes = [ ] ;
80+ _logger . LogInformation ( "Withdrawn {Count} routes from {Peer}" , count , _peerConfig . Address ) ;
81+ _advertisedPrefixes . Clear ( ) ;
8082 }
8183
8284 public BgpSession (
@@ -108,7 +110,7 @@ public BgpSession(
108110
109111 public async Task RunAsync ( CancellationToken cancellationToken = default )
110112 {
111- var linkedCts = CancellationTokenSource . CreateLinkedTokenSource ( cancellationToken , _cts . Token ) ;
113+ using var linkedCts = CancellationTokenSource . CreateLinkedTokenSource ( cancellationToken , _cts . Token ) ;
112114
113115 try
114116 {
@@ -172,8 +174,11 @@ public async Task RunAsync(CancellationToken cancellationToken = default)
172174 _metrics . SessionEstablished ( ) ;
173175 _logger . LogInformation ( "SessionEstablished with {Peer} ASN={Asn}" , _peerConfig . Address , _remoteAsn ) ;
174176
175- // Send initial routes
176- await SendAllRoutesAsync ( ) ;
177+ // Send initial routes. Hold the send lock so _advertisedPrefixes stays consistent
178+ // w.r.t. a RefreshRoutesAsync fired from the API the instant IsEstablished became true.
179+ await _sendLock . WaitAsync ( linkedCts . Token ) ;
180+ try { await SendAllRoutesAsync ( ) ; }
181+ finally { _sendLock . Release ( ) ; }
177182
178183 // Run main loop: read messages + send keepalives
179184 await RunEstablishedAsync ( linkedCts . Token ) ;
@@ -182,6 +187,11 @@ public async Task RunAsync(CancellationToken cancellationToken = default)
182187 {
183188 _logger . LogInformation ( "SessionClosed (cancelled) with {Peer}" , _peerConfig . Address ) ;
184189 }
190+ catch ( BgpNotificationException ex )
191+ {
192+ _logger . LogWarning ( ex , "BGP error from {Peer}: {Error}/{SubError}" , _peerConfig . Address , ex . ErrorCode , ex . SubErrorCode ) ;
193+ await SendNotificationAsync ( ex . ErrorCode , ex . SubErrorCode ) ;
194+ }
185195 catch ( BgpParseException ex )
186196 {
187197 _logger . LogError ( ex , "Parse error from {Peer}" , _peerConfig . Address ) ;
@@ -190,6 +200,8 @@ public async Task RunAsync(CancellationToken cancellationToken = default)
190200 catch ( Exception ex )
191201 {
192202 _logger . LogError ( ex , "Session error with {Peer}" , _peerConfig . Address ) ;
203+ // Best-effort Cease so the peer sees a clean close instead of a bare TCP RST.
204+ try { await SendNotificationAsync ( BgpConstants . Error . Cease , BgpConstants . SubError . Unspecific ) ; } catch { }
193205 }
194206 finally
195207 {
@@ -209,23 +221,40 @@ public async Task RunAsync(CancellationToken cancellationToken = default)
209221
210222 private async Task RunEstablishedAsync ( CancellationToken cancellationToken )
211223 {
212- using var keepaliveTimer = new PeriodicTimer ( _keepAliveInterval ) ;
224+ // Hold time 0 -> KEEPALIVE timer and Hold Timer are disabled (RFC 4271 §4.2/§6.5).
225+ if ( _negotiatedHoldTime == 0 )
226+ {
227+ await ReadLoopAsync ( cancellationToken ) ;
228+ await _cts . CancelAsync ( ) ;
229+ return ;
230+ }
213231
232+ Interlocked . Exchange ( ref _lastReceivedTicks , DateTime . UtcNow . Ticks ) ;
233+
234+ using var keepaliveTimer = new PeriodicTimer ( _keepAliveInterval ) ;
214235 var readTask = ReadLoopAsync ( cancellationToken ) ;
215- var keepaliveTask = KeepAliveLoopAsync ( keepaliveTimer , cancellationToken ) ;
236+ var keepaliveTask = HoldTimerLoopAsync ( keepaliveTimer , cancellationToken ) ;
216237
217238 await Task . WhenAny ( readTask , keepaliveTask ) ;
218- _cts . Cancel ( ) ;
239+ await _cts . CancelAsync ( ) ;
219240
220- try { await readTask ; } catch { }
221- try { await keepaliveTask ; } catch { }
241+ await AwaitLoopTaskAsync ( readTask , "read" ) ;
242+ await AwaitLoopTaskAsync ( keepaliveTask , "keepalive" ) ;
243+ }
244+
245+ private async Task AwaitLoopTaskAsync ( Task task , string label )
246+ {
247+ try { await task ; }
248+ catch ( OperationCanceledException ) { }
249+ catch ( Exception ex ) { _logger . LogWarning ( ex , "{Label} loop faulted for {Peer}" , label , _peerConfig . Address ) ; }
222250 }
223251
224252 private async Task ReadLoopAsync ( CancellationToken cancellationToken )
225253 {
226254 while ( ! cancellationToken . IsCancellationRequested )
227255 {
228256 var message = await ReceiveMessageAsync ( cancellationToken ) ;
257+ Interlocked . Exchange ( ref _lastReceivedTicks , DateTime . UtcNow . Ticks ) ;
229258
230259 switch ( message )
231260 {
@@ -244,10 +273,20 @@ private async Task ReadLoopAsync(CancellationToken cancellationToken)
244273 }
245274 }
246275
247- private async Task KeepAliveLoopAsync ( PeriodicTimer timer , CancellationToken cancellationToken )
276+ private async Task HoldTimerLoopAsync ( PeriodicTimer timer , CancellationToken cancellationToken )
248277 {
278+ var holdTime = TimeSpan . FromSeconds ( _negotiatedHoldTime ) ;
249279 while ( await timer . WaitForNextTickAsync ( cancellationToken ) )
250280 {
281+ // Hold timer: tear down if no message was received within the negotiated hold time (RFC 4271 §6.6).
282+ if ( DateTime . UtcNow . Ticks - Interlocked . Read ( ref _lastReceivedTicks ) >= holdTime . Ticks )
283+ {
284+ _logger . LogWarning ( "Hold timer expired for {Peer} (no message for {Hold}s)" ,
285+ _peerConfig . Address , _negotiatedHoldTime ) ;
286+ await SendNotificationAsync ( BgpConstants . Error . HoldTimerExpired , BgpConstants . SubError . Unspecific ) ;
287+ return ;
288+ }
289+
251290 await SendKeepaliveAsync ( ) ;
252291 _logger . LogDebug ( "KeepAliveSent to {Peer}" , _peerConfig . Address ) ;
253292 }
@@ -278,12 +317,16 @@ private async Task HandleUpdateAsync(BgpUpdateMessage update)
278317 switch ( attr . TypeCode )
279318 {
280319 case BgpConstants . Attribute . Origin :
320+ if ( attr . Data . Length < 1 )
321+ throw new BgpNotificationException ( BgpConstants . Error . UpdateMessageError , BgpConstants . SubError . Unspecific , "Malformed ORIGIN attribute" ) ;
281322 origin = AttributeHelper . ReadOrigin ( attr ) ;
282323 break ;
283324 case BgpConstants . Attribute . AsPath :
284325 asPath = AttributeHelper . ReadAsPath ( attr , _remoteFourByteAsn ) ;
285326 break ;
286327 case BgpConstants . Attribute . NextHop :
328+ if ( attr . Data . Length < 4 )
329+ throw new BgpNotificationException ( BgpConstants . Error . UpdateMessageError , BgpConstants . SubError . Unspecific , "Malformed NEXT_HOP attribute" ) ;
287330 nextHop = AttributeHelper . ReadNextHop ( attr ) ;
288331 break ;
289332 case BgpConstants . Attribute . Community :
@@ -552,23 +595,29 @@ private async Task SendAllRoutesAsync()
552595 }
553596 }
554597
555- // Final fallback: send from shared route table
556- var tableRoutes = _routeTable . GetAll ( ) ;
557- if ( tableRoutes . Count == 0 ) return ;
598+ // Final fallback: send from shared route table (single pass — one allocation, not two)
599+ var filtered = new List < Route > ( ) ;
600+ foreach ( var r in _routeTable . Enumerate ( ) )
601+ {
602+ if ( _routeFilter . AcceptOutgoing ( r , _peerConfig ) )
603+ filtered . Add ( r ) ;
604+ }
605+ if ( filtered . Count == 0 ) return ;
558606
559- var filtered = tableRoutes . Where ( r => _routeFilter . AcceptOutgoing ( r , _peerConfig ) ) . ToList ( ) ;
560607 await SendRoutesAsync ( nextHop , filtered ) ;
561608 }
562609
563610 private async Task SendRoutesAsync ( uint nextHop , List < Route > routes )
564611 {
565612 const int maxNlriPerUpdate = 100 ;
613+ _advertisedPrefixes . EnsureCapacity ( _advertisedPrefixes . Count + routes . Count ) ;
566614 var sent = 0 ;
567615 var batch = new List < Route > ( maxNlriPerUpdate ) ;
568616
569617 foreach ( var route in routes )
570618 {
571619 batch . Add ( route ) ;
620+ _advertisedPrefixes . Add ( new IpPrefix ( route . Prefix , route . PrefixLength ) ) ;
572621 if ( batch . Count >= maxNlriPerUpdate )
573622 {
574623 await SendRouteBatchAsync ( nextHop , batch ) ;
@@ -583,7 +632,6 @@ private async Task SendRoutesAsync(uint nextHop, List<Route> routes)
583632 sent += batch . Count ;
584633 }
585634
586- _advertisedPrefixes . AddRange ( routes . Select ( r => new IpPrefix ( r . Prefix , r . PrefixLength ) ) ) ;
587635 _logger . LogInformation ( "UpdateSent {Count} routes to {Peer}" , sent , _peerConfig . Address ) ;
588636 }
589637
@@ -617,7 +665,7 @@ private async Task SendUpdateBatchAsync(List<PathAttribute> attrs, List<IpPrefix
617665 Nlri = nlri
618666 } ;
619667
620- await SendMessageAsync ( update ) ;
668+ await WriteMessageAsync ( update ) ; // caller (initial send / refresh) already holds _sendLock
621669 _metrics . UpdateSent ( ) ;
622670 }
623671
@@ -631,8 +679,8 @@ private async Task<BgpMessage> ReceiveMessageAsync(CancellationToken cancellatio
631679 await ReadExactAsync ( headerBuffer . AsMemory ( 0 , BgpConstants . MessageHeaderSize ) , cancellationToken ) ;
632680
633681 var length = BgpMessageReader . GetMessageLength ( headerBuffer ) ;
634- if ( length < 0 )
635- throw new BgpParseException ( "Incomplete message header ") ;
682+ if ( length is < BgpConstants . MinMessageSize or > BgpConstants . MaxMessageSize )
683+ throw new BgpParseException ( $ "Invalid message length: { length } ") ;
636684
637685 var payloadSize = length - BgpConstants . MessageHeaderSize ;
638686 var messageBuffer = ArrayPool < byte > . Shared . Rent ( length ) ;
@@ -657,6 +705,14 @@ private async Task<BgpMessage> ReceiveMessageAsync(CancellationToken cancellatio
657705 }
658706
659707 private async Task SendMessageAsync ( BgpMessage message )
708+ {
709+ await _sendLock . WaitAsync ( ) ;
710+ try { await WriteMessageAsync ( message ) ; }
711+ finally { _sendLock . Release ( ) ; }
712+ }
713+
714+ // Writes one message without acquiring the lock — caller MUST already hold _sendLock.
715+ private async Task WriteMessageAsync ( BgpMessage message )
660716 {
661717 var bufferSize = BgpMessageWriter . GetBufferSize ( message ) ;
662718 var buffer = ArrayPool < byte > . Shared . Rent ( bufferSize ) ;
@@ -751,23 +807,31 @@ private async Task SendNotificationAsync(byte errorCode, byte subErrorCode)
751807 private void ValidateOpen ( BgpOpenMessage open )
752808 {
753809 if ( open . Version != BgpConstants . BgpVersion )
754- throw new BgpParseException ( $ "Unsupported BGP version: { open . Version } ") ;
810+ throw new BgpNotificationException ( BgpConstants . Error . OpenMessageError , BgpConstants . SubError . UnsupportedVersion , $ "Unsupported BGP version: { open . Version } ") ;
755811
756812 _remoteFourByteAsn = CapabilityHelper . GetRemoteAsn ( open ) . HasValue ;
757813 _remoteAsn = CapabilityHelper . GetEffectiveAsn ( open ) ;
758814
759815 _onPeerIdentified ? . Invoke ( _peerConfig . Address , _remoteAsn ) ;
760816
761817 if ( _peerConfig . RemoteAsn . HasValue && _remoteAsn != _peerConfig . RemoteAsn . Value )
762- throw new BgpParseException ( $ "Unexpected ASN: expected { _peerConfig . RemoteAsn } , got { _remoteAsn } ") ;
818+ throw new BgpNotificationException ( BgpConstants . Error . OpenMessageError , BgpConstants . SubError . BadPeerAs , $ "Unexpected ASN: expected { _peerConfig . RemoteAsn } , got { _remoteAsn } ") ;
763819
764820 var holdTime = open . HoldTime ;
765821 if ( holdTime != 0 && holdTime < 3 )
766- throw new BgpParseException ( $ "Unacceptable hold time: { holdTime } ") ;
822+ throw new BgpNotificationException ( BgpConstants . Error . OpenMessageError , BgpConstants . SubError . UnacceptableHoldTime , $ "Unacceptable hold time: { holdTime } ") ;
823+
824+ // BGP Identifier must be non-zero and must not collide with our own (RFC 4271 §6.2).
825+ if ( open . RouterId == 0 )
826+ throw new BgpNotificationException ( BgpConstants . Error . OpenMessageError , BgpConstants . SubError . BadBgpIdentifier , "Invalid BGP identifier: 0.0.0.0" ) ;
827+
828+ var localRouterId = BgpConstants . IPAddressToUint ( _bgpConfig . GetRouterIdAddress ( ) ) ;
829+ if ( open . RouterId == localRouterId )
830+ throw new BgpNotificationException ( BgpConstants . Error . OpenMessageError , BgpConstants . SubError . BadBgpIdentifier , "BGP identifier collision with local RouterId" ) ;
767831
768832 _negotiatedHoldTime = holdTime ;
769833 _keepAliveInterval = holdTime == 0
770- ? TimeSpan . FromSeconds ( _bgpConfig . KeepAlive )
834+ ? TimeSpan . Zero
771835 : TimeSpan . FromSeconds ( Math . Max ( holdTime / 3 , 1 ) ) ;
772836 }
773837
0 commit comments