Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions src/vs/platform/agentHost/browser/webSocketClientTransport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ export class WebSocketClientTransport extends Disposable implements IClientTrans
private _ws: WebSocket | undefined;
private _malformedFrames = 0;

/** Guards against firing onClose more than once. */
private _closeFired = false;

get isOpen(): boolean {
return this._ws?.readyState === WebSocket.OPEN;
}
Expand All @@ -42,6 +45,7 @@ export class WebSocketClientTransport extends Disposable implements IClientTrans
private readonly _address: string,
private readonly _connectionToken?: string,
) {
// TODO: @osortega remove console.logs
super();
Comment thread
osortega marked this conversation as resolved.
}

Expand Down Expand Up @@ -138,20 +142,44 @@ export class WebSocketClientTransport extends Disposable implements IClientTrans
});

ws.addEventListener('close', () => {
this._onClose.fire();
if (!this._closeFired) {
this._closeFired = true;
this._onClose.fire();
}
});

ws.addEventListener('error', () => {
// Error always precedes close - closing is handled in the close handler.
this._onClose.fire();
// Only fire if close hasn't already been fired (e.g. from send failure).
if (!this._closeFired) {
this._closeFired = true;
this._onClose.fire();
}
});
});
}

send(message: ProtocolMessage | AhpServerNotification | JsonRpcResponse): void {
/**
* Send a message to the remote end. Returns `true` if the message was
* sent, `false` if it was dropped (socket not open). On failure, the
* transport is force-closed so reconnection is triggered immediately
* rather than silently losing messages.
*/
send(message: ProtocolMessage | AhpServerNotification | JsonRpcResponse): boolean {
if (this._ws?.readyState === WebSocket.OPEN) {
this._ws.send(JSON.stringify(message));
return true;
}
console.warn(
`[WebSocketClientTransport] Message dropped: readyState=${this._ws?.readyState ?? 'no-socket'}`
);
// Force-close and fire onClose exactly once to trigger reconnection
this._ws?.close(4001, 'send-on-dead-socket');
if (!this._closeFired) {
this._closeFired = true;
this._onClose.fire();
}
return false;
}

override dispose(): void {
Expand Down
103 changes: 99 additions & 4 deletions src/vs/sessions/common/sessionsTelemetry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ type TunnelConnectAttemptEvent = {
type TunnelConnectAttemptClassification = {
owner: 'osortega';
comment: 'Tracks individual agent-host tunnel connect attempts for performance and reliability.';
isReconnect: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Whether this attempt was part of a reconnect cycle (true) or an initial connect (false).' };
isReconnect: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'Whether this attempt was part of a reconnect cycle (true) or an initial connect (false).' };
attempt: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Attempt number within the current connect session (1-based).' };
durationMs: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Duration of this individual attempt in milliseconds.' };
success: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Whether this individual attempt succeeded.' };
success: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Whether this individual attempt succeeded.' };
errorCategory: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Category of error when the attempt failed (relayConnectionFailed, auth, authExpired, network, other); empty on success.' };
};

Expand All @@ -166,10 +166,10 @@ type TunnelConnectResolvedEvent = {
type TunnelConnectResolvedClassification = {
owner: 'osortega';
comment: 'Tracks overall agent-host tunnel connect session outcomes for reliability.';
isReconnect: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; isMeasurement: true; comment: 'Whether the resolved session was a reconnect cycle (true) or an initial connect (false).' };
isReconnect: { classification: 'SystemMetaData'; purpose: 'FeatureInsight'; comment: 'Whether the resolved session was a reconnect cycle (true) or an initial connect (false).' };
totalAttempts: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Total number of attempts made before resolution.' };
totalDurationMs: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Total elapsed time from session start to resolution in milliseconds.' };
success: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Whether the connect session ultimately succeeded.' };
success: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Whether the connect session ultimately succeeded.' };
failureReason: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Reason the session terminated without connecting (hostOffline, maxAttemptsReached, auth, authExpired); empty on success.' };
};

Expand All @@ -182,3 +182,98 @@ export function logTunnelConnectResolved(telemetryService: ITelemetryService, da
failureReason: data.failureReason ?? '',
});
}

// --- Socket lifecycle telemetry ---

export type SocketCloseTrigger =
| 'server'
| 'sendOnDeadSocket'
| 'visibility'
| 'offline'
| 'malformedFrames'
| 'disposed'
| 'error';

type SocketCloseEvent = {
closeCode: number;
wasClean: boolean;
lifetimeMs: number;
messagesSent: number;
messagesReceived: number;
messagesDropped: number;
trigger: string;
};

type SocketCloseClassification = {
owner: 'osortega';
comment: 'Tracks WebSocket close events for agent host connections to measure connection reliability.';
closeCode: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'WebSocket close code.' };
wasClean: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Whether the close was clean.' };
lifetimeMs: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'How long the socket was alive in milliseconds.' };
messagesSent: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Total messages sent.' };
messagesReceived: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Total messages received.' };
messagesDropped: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Total messages dropped due to non-OPEN socket.' };
trigger: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'What triggered the close (server, sendOnDeadSocket, visibility, offline, malformedFrames, disposed, error).' };
};

export function logSocketClose(telemetryService: ITelemetryService, data: { closeCode: number; wasClean: boolean; lifetimeMs: number; messagesSent: number; messagesReceived: number; messagesDropped: number; trigger: SocketCloseTrigger }): void {
telemetryService.publicLog2<SocketCloseEvent, SocketCloseClassification>('vscodeAgents.socket/close', data);
}

// --- Send dropped telemetry ---

type SendDroppedEvent = {
readyState: number;
timeSinceLastReceiveMs: number;
timeSinceLastSendMs: number;
};

type SendDroppedClassification = {
owner: 'osortega';
comment: 'Tracks when a message is silently dropped due to a non-OPEN WebSocket, indicating a zombie socket.';
readyState: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'WebSocket readyState at drop time (0=CONNECTING, 1=OPEN, 2=CLOSING, 3=CLOSED).' };
timeSinceLastReceiveMs: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Milliseconds since last received message.' };
timeSinceLastSendMs: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Milliseconds since last sent message.' };
};

export function logSendDropped(telemetryService: ITelemetryService, data: { readyState: number; timeSinceLastReceiveMs: number; timeSinceLastSendMs: number }): void {
telemetryService.publicLog2<SendDroppedEvent, SendDroppedClassification>('vscodeAgents.socket/sendDropped', data);
}

// --- Visibility resumed telemetry ---

type VisibilityResumedEvent = {
hiddenDurationMs: number;
socketAlive: boolean;
forceClosed: boolean;
};

type VisibilityResumedClassification = {
owner: 'osortega';
comment: 'Tracks tab visibility resume events to measure zombie socket detection effectiveness.';
hiddenDurationMs: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'How long the tab was hidden in milliseconds.' };
socketAlive: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Whether the socket was alive after zombie detection check.' };
forceClosed: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; comment: 'Whether the socket was force-closed on resume.' };
};

export function logVisibilityResumed(telemetryService: ITelemetryService, data: { hiddenDurationMs: number; socketAlive: boolean; forceClosed: boolean }): void {
telemetryService.publicLog2<VisibilityResumedEvent, VisibilityResumedClassification>('vscodeAgents.socket/visibilityResumed', data);
}

// --- Terminal recovery telemetry ---

type TerminalRecoveryEvent = {
recoveredCount: number;
totalCount: number;
};

type TerminalRecoveryClassification = {
owner: 'osortega';
comment: 'Tracks terminal reconnection outcomes after agent host disconnect.';
recoveredCount: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Number of terminals successfully reconnected.' };
totalCount: { classification: 'SystemMetaData'; purpose: 'PerformanceAndHealth'; isMeasurement: true; comment: 'Total number of active terminals at reconnect time.' };
};

export function logTerminalRecovery(telemetryService: ITelemetryService, data: { recoveredCount: number; totalCount: number }): void {
telemetryService.publicLog2<TerminalRecoveryEvent, TerminalRecoveryClassification>('vscodeAgents.terminal/recovery', data);
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ import { createRemoteAgentHarnessDescriptor, RemoteAgentCustomizationItemProvide
import { RemoteAgentHostSessionsProvider } from './remoteAgentHostSessionsProvider.js';
import { SyncedCustomizationBundler } from './syncedCustomizationBundler.js';
import { ISSHRemoteAgentHostService } from '../../../../platform/agentHost/common/sshRemoteAgentHost.js';
import { IAgentHostTerminalService } from '../../../../workbench/contrib/terminal/browser/agentHostTerminalService.js';
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry.js';
import { logTerminalRecovery } from '../../../common/sessionsTelemetry.js';

/** Per-connection state bundle, disposed when a connection is removed. */
class ConnectionState extends Disposable {
Expand Down Expand Up @@ -99,6 +102,8 @@ export class RemoteAgentHostContribution extends Disposable implements IWorkbenc
@ICustomizationHarnessService private readonly _customizationHarnessService: ICustomizationHarnessService,
@IStorageService private readonly _storageService: IStorageService,
@IAgentPluginService private readonly _agentPluginService: IAgentPluginService,
@IAgentHostTerminalService private readonly _agentHostTerminalService: IAgentHostTerminalService,
@ITelemetryService private readonly _telemetryService: ITelemetryService,
) {
super();

Expand Down Expand Up @@ -260,11 +265,34 @@ export class RemoteAgentHostContribution extends Disposable implements IWorkbenc
}
const existing = this._connections.get(connectionInfo.address);
if (existing) {
const nameChanged = existing.name !== connectionInfo.name;
const clientIdChanged = existing.loggedConnection.clientId !== connectionInfo.clientId;

// If the name or clientId changed, tear down and re-register
if (existing.name !== connectionInfo.name || existing.loggedConnection.clientId !== connectionInfo.clientId) {
this._logService.info(`[RemoteAgentHost] Reconnecting contribution for ${connectionInfo.address}: oldClientId=${existing.loggedConnection.clientId}, newClientId=${connectionInfo.clientId}, nameChanged=${existing.name !== connectionInfo.name}`);
if (nameChanged || clientIdChanged) {
this._logService.info(`[RemoteAgentHost] Reconnecting contribution for ${connectionInfo.address}: oldClientId=${existing.loggedConnection.clientId}, newClientId=${connectionInfo.clientId}, nameChanged=${nameChanged}`);
const oldClientId = existing.loggedConnection.clientId;
this._connections.deleteAndDispose(connectionInfo.address);
this._setupConnection(connectionInfo);

// Reconnect active terminals only when the backing
// client changed. Name-only updates don't invalidate
// subscriptions and would cause unnecessary buffer
// clear/replay flicker.
if (clientIdChanged) {
const newConnection = this._remoteAgentHostService.getConnection(connectionInfo.address);
if (newConnection) {
this._agentHostTerminalService.reconnectTerminals(newConnection, oldClientId).then(
({ recovered, total }) => {
if (total > 0) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: what if total == 0

this._logService.info(`[RemoteAgentHost] Terminal reconnection: ${recovered}/${total} recovered`);
logTerminalRecovery(this._telemetryService, { recoveredCount: recovered, totalCount: total });
}
},
err => this._logService.warn('[RemoteAgentHost] Terminal reconnection failed', err)
);
}
}
}
} else {
this._setupConnection(connectionInfo);
Expand Down
80 changes: 79 additions & 1 deletion src/vs/workbench/contrib/terminal/browser/agentHostPty.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ export class AgentHostPty extends BasePty implements ITerminalChildProcess {

constructor(
id: number,
private readonly _connection: IAgentConnection,
private _connection: IAgentConnection,
private readonly _terminalUri: URI,
private readonly _options?: IAgentHostPtyOptions,
) {
Expand Down Expand Up @@ -378,6 +378,84 @@ export class AgentHostPty extends BasePty implements ITerminalChildProcess {
// Not applicable
}

/**
* Reconnect this pty to a new agent host connection. Tears down the
* old subscription and re-subscribes with the new connection, replaying
* content from the server-side snapshot. Terminal output during the
* disconnect gap is a stream (not state), so some loss is expected.
*
* @returns `true` if reconnection succeeded, `false` otherwise.
*/
async reconnect(newConnection: IAgentConnection): Promise<boolean> {
// Clean up old subscription
this._subscriptionDisposables.clear();
this._subscriptionRef?.dispose();
this._subscriptionRef = undefined;
Comment thread
osortega marked this conversation as resolved.

// Swap connection
this._connection = newConnection;

try {
// Re-subscribe to the terminal state
this._subscriptionRef = this._connection.getSubscription(StateComponents.Terminal, this._terminalUri);
const subscription = this._subscriptionRef.object;

// Wait for hydration with a timeout — the terminal may no longer
// exist on the server (e.g. agent process restarted).
if (subscription.value === undefined) {
const RECONNECT_HYDRATE_TIMEOUT_MS = 10_000;
await new Promise<void>((resolve, reject) => {
const timer = setTimeout(() => {
listener.dispose();
reject(new Error('Reconnect hydration timed out'));
}, RECONNECT_HYDRATE_TIMEOUT_MS);
const listener = subscription.onDidChange(() => {
clearTimeout(timer);
listener.dispose();
resolve();
});
this._subscriptionDisposables.add(listener);
});
}

const state = subscription.value as TerminalState;

if (state.supportsCommandDetection && !this._supportsCommandDetection) {
this._supportsCommandDetection = true;
this._onSupportsCommandDetection.fire();
}

// Clear the terminal buffer before replaying to avoid duplicate
// content. ESC[2J clears the screen, ESC[3J clears scrollback,
// ESC[H moves cursor to home position.
this.handleData('\x1b[2J\x1b[3J\x1b[H');
this._replayContent(state.content);

// Update cwd/title if they changed
if (state.cwd) {
this._properties.cwd = state.cwd.toString();
}
if (state.title) {
this._properties.title = state.title;
}

// Wire up action listener for streaming updates
this._subscriptionDisposables.add(subscription.onDidApplyAction(envelope => {
this._handleAction(envelope);
}));

return true;
} catch (err) {
console.warn('[AgentHostPty] Reconnection failed:', err instanceof Error ? err.message : String(err));
return false;
}
}

/** The terminal URI this pty is subscribed to. */
get terminalUri(): URI {
return this._terminalUri;
}

override dispose(): void {
this._subscriptionRef?.dispose();
this._subscriptionRef = undefined;
Expand Down
Loading
Loading