Skip to content

Commit dfa286c

Browse files
committed
feat: enhance BGP session stability and compliance with RFC 4271
- Add hold timer enforcement with `_lastReceivedTicks` to detect and handle session liveness. - Improve exception handling with `BgpNotificationException` for protocol-compliant error reporting. - Refactor route withdrawal and update messaging to ensure consistency and reliability. - Optimize `HoldTimerLoopAsync` and `AwaitLoopTaskAsync` for robust timer and task management. - Add validation for BGP identifiers, hold times, and message length to prevent protocol violations.
1 parent 0367f83 commit dfa286c

1 file changed

Lines changed: 91 additions & 27 deletions

File tree

BGPLite.Server/BgpSession.cs

Lines changed: 91 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public sealed class BgpSession : IDisposable
3131
private ushort _negotiatedHoldTime;
3232
private List<IpPrefix> _advertisedPrefixes = [];
3333
private TimeSpan _keepAliveInterval;
34+
private long _lastReceivedTicks; // UTC ticks of last received message; drives the HoldTimer (Interlocked)
3435

3536
public BgpFsmState State => _state;
3637
public PeerConfig Peer => _peerConfig;
@@ -59,24 +60,25 @@ public async Task RefreshRoutesAsync()
5960

6061
private async Task WithdrawAllAsync()
6162
{
62-
if (_advertisedPrefixes.Count == 0) return;
63+
var count = _advertisedPrefixes.Count;
64+
if (count == 0) return;
6365

6466
const int maxPerUpdate = 100;
65-
for (var i = 0; i < _advertisedPrefixes.Count; i += maxPerUpdate)
67+
for (var i = 0; i < count; i += maxPerUpdate)
6668
{
67-
var batch = _advertisedPrefixes.Skip(i).Take(maxPerUpdate).ToList();
69+
var batch = _advertisedPrefixes.GetRange(i, Math.Min(maxPerUpdate, count - i));
6870
var update = new BgpUpdateMessage
6971
{
7072
WithdrawnRoutes = batch,
7173
PathAttributes = [],
7274
Nlri = []
7375
};
74-
await SendMessageAsync(update);
76+
await WriteMessageAsync(update); // caller (initial send / refresh) already holds _sendLock
7577
_metrics.UpdateSent();
7678
}
7779

78-
_logger.LogInformation("Withdrawn {Count} routes from {Peer}", _advertisedPrefixes.Count, _peerConfig.Address);
79-
_advertisedPrefixes = [];
80+
_logger.LogInformation("Withdrawn {Count} routes from {Peer}", count, _peerConfig.Address);
81+
_advertisedPrefixes.Clear();
8082
}
8183

8284
public BgpSession(
@@ -108,7 +110,7 @@ public BgpSession(
108110

109111
public async Task RunAsync(CancellationToken cancellationToken = default)
110112
{
111-
var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, _cts.Token);
113+
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, _cts.Token);
112114

113115
try
114116
{
@@ -172,8 +174,11 @@ public async Task RunAsync(CancellationToken cancellationToken = default)
172174
_metrics.SessionEstablished();
173175
_logger.LogInformation("SessionEstablished with {Peer} ASN={Asn}", _peerConfig.Address, _remoteAsn);
174176

175-
// Send initial routes
176-
await SendAllRoutesAsync();
177+
// Send initial routes. Hold the send lock so _advertisedPrefixes stays consistent
178+
// w.r.t. a RefreshRoutesAsync fired from the API the instant IsEstablished became true.
179+
await _sendLock.WaitAsync(linkedCts.Token);
180+
try { await SendAllRoutesAsync(); }
181+
finally { _sendLock.Release(); }
177182

178183
// Run main loop: read messages + send keepalives
179184
await RunEstablishedAsync(linkedCts.Token);
@@ -182,6 +187,11 @@ public async Task RunAsync(CancellationToken cancellationToken = default)
182187
{
183188
_logger.LogInformation("SessionClosed (cancelled) with {Peer}", _peerConfig.Address);
184189
}
190+
catch (BgpNotificationException ex)
191+
{
192+
_logger.LogWarning(ex, "BGP error from {Peer}: {Error}/{SubError}", _peerConfig.Address, ex.ErrorCode, ex.SubErrorCode);
193+
await SendNotificationAsync(ex.ErrorCode, ex.SubErrorCode);
194+
}
185195
catch (BgpParseException ex)
186196
{
187197
_logger.LogError(ex, "Parse error from {Peer}", _peerConfig.Address);
@@ -190,6 +200,8 @@ public async Task RunAsync(CancellationToken cancellationToken = default)
190200
catch (Exception ex)
191201
{
192202
_logger.LogError(ex, "Session error with {Peer}", _peerConfig.Address);
203+
// Best-effort Cease so the peer sees a clean close instead of a bare TCP RST.
204+
try { await SendNotificationAsync(BgpConstants.Error.Cease, BgpConstants.SubError.Unspecific); } catch { }
193205
}
194206
finally
195207
{
@@ -209,23 +221,40 @@ public async Task RunAsync(CancellationToken cancellationToken = default)
209221

210222
private async Task RunEstablishedAsync(CancellationToken cancellationToken)
211223
{
212-
using var keepaliveTimer = new PeriodicTimer(_keepAliveInterval);
224+
// Hold time 0 -> KEEPALIVE timer and Hold Timer are disabled (RFC 4271 §4.2/§6.5).
225+
if (_negotiatedHoldTime == 0)
226+
{
227+
await ReadLoopAsync(cancellationToken);
228+
await _cts.CancelAsync();
229+
return;
230+
}
213231

232+
Interlocked.Exchange(ref _lastReceivedTicks, DateTime.UtcNow.Ticks);
233+
234+
using var keepaliveTimer = new PeriodicTimer(_keepAliveInterval);
214235
var readTask = ReadLoopAsync(cancellationToken);
215-
var keepaliveTask = KeepAliveLoopAsync(keepaliveTimer, cancellationToken);
236+
var keepaliveTask = HoldTimerLoopAsync(keepaliveTimer, cancellationToken);
216237

217238
await Task.WhenAny(readTask, keepaliveTask);
218-
_cts.Cancel();
239+
await _cts.CancelAsync();
219240

220-
try { await readTask; } catch { }
221-
try { await keepaliveTask; } catch { }
241+
await AwaitLoopTaskAsync(readTask, "read");
242+
await AwaitLoopTaskAsync(keepaliveTask, "keepalive");
243+
}
244+
245+
private async Task AwaitLoopTaskAsync(Task task, string label)
246+
{
247+
try { await task; }
248+
catch (OperationCanceledException) { }
249+
catch (Exception ex) { _logger.LogWarning(ex, "{Label} loop faulted for {Peer}", label, _peerConfig.Address); }
222250
}
223251

224252
private async Task ReadLoopAsync(CancellationToken cancellationToken)
225253
{
226254
while (!cancellationToken.IsCancellationRequested)
227255
{
228256
var message = await ReceiveMessageAsync(cancellationToken);
257+
Interlocked.Exchange(ref _lastReceivedTicks, DateTime.UtcNow.Ticks);
229258

230259
switch (message)
231260
{
@@ -244,10 +273,20 @@ private async Task ReadLoopAsync(CancellationToken cancellationToken)
244273
}
245274
}
246275

247-
private async Task KeepAliveLoopAsync(PeriodicTimer timer, CancellationToken cancellationToken)
276+
private async Task HoldTimerLoopAsync(PeriodicTimer timer, CancellationToken cancellationToken)
248277
{
278+
var holdTime = TimeSpan.FromSeconds(_negotiatedHoldTime);
249279
while (await timer.WaitForNextTickAsync(cancellationToken))
250280
{
281+
// Hold timer: tear down if no message was received within the negotiated hold time (RFC 4271 §6.6).
282+
if (DateTime.UtcNow.Ticks - Interlocked.Read(ref _lastReceivedTicks) >= holdTime.Ticks)
283+
{
284+
_logger.LogWarning("Hold timer expired for {Peer} (no message for {Hold}s)",
285+
_peerConfig.Address, _negotiatedHoldTime);
286+
await SendNotificationAsync(BgpConstants.Error.HoldTimerExpired, BgpConstants.SubError.Unspecific);
287+
return;
288+
}
289+
251290
await SendKeepaliveAsync();
252291
_logger.LogDebug("KeepAliveSent to {Peer}", _peerConfig.Address);
253292
}
@@ -278,12 +317,16 @@ private async Task HandleUpdateAsync(BgpUpdateMessage update)
278317
switch (attr.TypeCode)
279318
{
280319
case BgpConstants.Attribute.Origin:
320+
if (attr.Data.Length < 1)
321+
throw new BgpNotificationException(BgpConstants.Error.UpdateMessageError, BgpConstants.SubError.Unspecific, "Malformed ORIGIN attribute");
281322
origin = AttributeHelper.ReadOrigin(attr);
282323
break;
283324
case BgpConstants.Attribute.AsPath:
284325
asPath = AttributeHelper.ReadAsPath(attr, _remoteFourByteAsn);
285326
break;
286327
case BgpConstants.Attribute.NextHop:
328+
if (attr.Data.Length < 4)
329+
throw new BgpNotificationException(BgpConstants.Error.UpdateMessageError, BgpConstants.SubError.Unspecific, "Malformed NEXT_HOP attribute");
287330
nextHop = AttributeHelper.ReadNextHop(attr);
288331
break;
289332
case BgpConstants.Attribute.Community:
@@ -552,23 +595,29 @@ private async Task SendAllRoutesAsync()
552595
}
553596
}
554597

555-
// Final fallback: send from shared route table
556-
var tableRoutes = _routeTable.GetAll();
557-
if (tableRoutes.Count == 0) return;
598+
// Final fallback: send from shared route table (single pass — one allocation, not two)
599+
var filtered = new List<Route>();
600+
foreach (var r in _routeTable.Enumerate())
601+
{
602+
if (_routeFilter.AcceptOutgoing(r, _peerConfig))
603+
filtered.Add(r);
604+
}
605+
if (filtered.Count == 0) return;
558606

559-
var filtered = tableRoutes.Where(r => _routeFilter.AcceptOutgoing(r, _peerConfig)).ToList();
560607
await SendRoutesAsync(nextHop, filtered);
561608
}
562609

563610
private async Task SendRoutesAsync(uint nextHop, List<Route> routes)
564611
{
565612
const int maxNlriPerUpdate = 100;
613+
_advertisedPrefixes.EnsureCapacity(_advertisedPrefixes.Count + routes.Count);
566614
var sent = 0;
567615
var batch = new List<Route>(maxNlriPerUpdate);
568616

569617
foreach (var route in routes)
570618
{
571619
batch.Add(route);
620+
_advertisedPrefixes.Add(new IpPrefix(route.Prefix, route.PrefixLength));
572621
if (batch.Count >= maxNlriPerUpdate)
573622
{
574623
await SendRouteBatchAsync(nextHop, batch);
@@ -583,7 +632,6 @@ private async Task SendRoutesAsync(uint nextHop, List<Route> routes)
583632
sent += batch.Count;
584633
}
585634

586-
_advertisedPrefixes.AddRange(routes.Select(r => new IpPrefix(r.Prefix, r.PrefixLength)));
587635
_logger.LogInformation("UpdateSent {Count} routes to {Peer}", sent, _peerConfig.Address);
588636
}
589637

@@ -617,7 +665,7 @@ private async Task SendUpdateBatchAsync(List<PathAttribute> attrs, List<IpPrefix
617665
Nlri = nlri
618666
};
619667

620-
await SendMessageAsync(update);
668+
await WriteMessageAsync(update); // caller (initial send / refresh) already holds _sendLock
621669
_metrics.UpdateSent();
622670
}
623671

@@ -631,8 +679,8 @@ private async Task<BgpMessage> ReceiveMessageAsync(CancellationToken cancellatio
631679
await ReadExactAsync(headerBuffer.AsMemory(0, BgpConstants.MessageHeaderSize), cancellationToken);
632680

633681
var length = BgpMessageReader.GetMessageLength(headerBuffer);
634-
if (length < 0)
635-
throw new BgpParseException("Incomplete message header");
682+
if (length is < BgpConstants.MinMessageSize or > BgpConstants.MaxMessageSize)
683+
throw new BgpParseException($"Invalid message length: {length}");
636684

637685
var payloadSize = length - BgpConstants.MessageHeaderSize;
638686
var messageBuffer = ArrayPool<byte>.Shared.Rent(length);
@@ -657,6 +705,14 @@ private async Task<BgpMessage> ReceiveMessageAsync(CancellationToken cancellatio
657705
}
658706

659707
private async Task SendMessageAsync(BgpMessage message)
708+
{
709+
await _sendLock.WaitAsync();
710+
try { await WriteMessageAsync(message); }
711+
finally { _sendLock.Release(); }
712+
}
713+
714+
// Writes one message without acquiring the lock — caller MUST already hold _sendLock.
715+
private async Task WriteMessageAsync(BgpMessage message)
660716
{
661717
var bufferSize = BgpMessageWriter.GetBufferSize(message);
662718
var buffer = ArrayPool<byte>.Shared.Rent(bufferSize);
@@ -751,23 +807,31 @@ private async Task SendNotificationAsync(byte errorCode, byte subErrorCode)
751807
private void ValidateOpen(BgpOpenMessage open)
752808
{
753809
if (open.Version != BgpConstants.BgpVersion)
754-
throw new BgpParseException($"Unsupported BGP version: {open.Version}");
810+
throw new BgpNotificationException(BgpConstants.Error.OpenMessageError, BgpConstants.SubError.UnsupportedVersion, $"Unsupported BGP version: {open.Version}");
755811

756812
_remoteFourByteAsn = CapabilityHelper.GetRemoteAsn(open).HasValue;
757813
_remoteAsn = CapabilityHelper.GetEffectiveAsn(open);
758814

759815
_onPeerIdentified?.Invoke(_peerConfig.Address, _remoteAsn);
760816

761817
if (_peerConfig.RemoteAsn.HasValue && _remoteAsn != _peerConfig.RemoteAsn.Value)
762-
throw new BgpParseException($"Unexpected ASN: expected {_peerConfig.RemoteAsn}, got {_remoteAsn}");
818+
throw new BgpNotificationException(BgpConstants.Error.OpenMessageError, BgpConstants.SubError.BadPeerAs, $"Unexpected ASN: expected {_peerConfig.RemoteAsn}, got {_remoteAsn}");
763819

764820
var holdTime = open.HoldTime;
765821
if (holdTime != 0 && holdTime < 3)
766-
throw new BgpParseException($"Unacceptable hold time: {holdTime}");
822+
throw new BgpNotificationException(BgpConstants.Error.OpenMessageError, BgpConstants.SubError.UnacceptableHoldTime, $"Unacceptable hold time: {holdTime}");
823+
824+
// BGP Identifier must be non-zero and must not collide with our own (RFC 4271 §6.2).
825+
if (open.RouterId == 0)
826+
throw new BgpNotificationException(BgpConstants.Error.OpenMessageError, BgpConstants.SubError.BadBgpIdentifier, "Invalid BGP identifier: 0.0.0.0");
827+
828+
var localRouterId = BgpConstants.IPAddressToUint(_bgpConfig.GetRouterIdAddress());
829+
if (open.RouterId == localRouterId)
830+
throw new BgpNotificationException(BgpConstants.Error.OpenMessageError, BgpConstants.SubError.BadBgpIdentifier, "BGP identifier collision with local RouterId");
767831

768832
_negotiatedHoldTime = holdTime;
769833
_keepAliveInterval = holdTime == 0
770-
? TimeSpan.FromSeconds(_bgpConfig.KeepAlive)
834+
? TimeSpan.Zero
771835
: TimeSpan.FromSeconds(Math.Max(holdTime / 3, 1));
772836
}
773837

0 commit comments

Comments
 (0)