Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 144 additions & 1 deletion daemon/cc_info_timer.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ type CCInfoTimerService struct {
// Anthropic rate limit cache
rateLimitCache *anthropicRateLimitCache

// Codex rate limit cache
codexRateLimitCache *codexRateLimitCache

// User profile cache (permanent for daemon lifetime)
userLogin string
userLoginFetched bool
Expand All @@ -67,7 +70,8 @@ func NewCCInfoTimerService(config *model.ShellTimeConfig) *CCInfoTimerService {
cache: make(map[CCInfoTimeRange]CCInfoCache),
activeRanges: make(map[CCInfoTimeRange]bool),
gitCache: make(map[string]*GitCacheEntry),
rateLimitCache: &anthropicRateLimitCache{},
rateLimitCache: &anthropicRateLimitCache{},
codexRateLimitCache: &codexRateLimitCache{},
stopChan: make(chan struct{}),
}
}
Expand Down Expand Up @@ -152,6 +156,11 @@ func (s *CCInfoTimerService) stopTimer() {
s.rateLimitCache.fetchedAt = time.Time{}
s.rateLimitCache.lastAttemptAt = time.Time{}
s.rateLimitCache.mu.Unlock()
s.codexRateLimitCache.mu.Lock()
s.codexRateLimitCache.usage = nil
s.codexRateLimitCache.fetchedAt = time.Time{}
s.codexRateLimitCache.lastAttemptAt = time.Time{}
s.codexRateLimitCache.mu.Unlock()
Comment on lines +159 to +163
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

When stopping the timer due to inactivity, the lastError field in the Codex rate limit cache should be cleared along with other fields. This ensures that stale error messages aren't displayed when the service resumes activity and a new fetch is pending.

Suggested change
s.codexRateLimitCache.mu.Lock()
s.codexRateLimitCache.usage = nil
s.codexRateLimitCache.fetchedAt = time.Time{}
s.codexRateLimitCache.lastAttemptAt = time.Time{}
s.codexRateLimitCache.mu.Unlock()
s.codexRateLimitCache.mu.Lock()
s.codexRateLimitCache.usage = nil
s.codexRateLimitCache.fetchedAt = time.Time{}
s.codexRateLimitCache.lastAttemptAt = time.Time{}
s.codexRateLimitCache.lastError = ""
s.codexRateLimitCache.mu.Unlock()


slog.Info("CC info timer stopped due to inactivity")
}
Expand All @@ -171,6 +180,7 @@ func (s *CCInfoTimerService) timerLoop() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
s.fetchRateLimit(ctx)
s.fetchCodexRateLimit(ctx)
Comment on lines 182 to +183
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The Anthropic and Codex rate limit fetches are executed sequentially within a shared 10-second timeout. If the first fetch takes a significant amount of time or times out, the second fetch may be delayed or skipped entirely due to context expiration. Since these fetches are independent, running them in parallel would be more efficient and robust. This also applies to the fetch logic inside the ticker loop (lines 206-207).

		var wg sync.WaitGroup
		wg.Add(2)
		go func() { defer wg.Done(); s.fetchRateLimit(ctx) }()
		go func() { defer wg.Done(); s.fetchCodexRateLimit(ctx) }()
		wg.Wait()

Comment on lines 182 to +183
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Shared 10-second context timeout insufficient for two sequential network calls

Both fetchRateLimit(ctx) and fetchCodexRateLimit(ctx) are called sequentially within the same goroutine sharing a single 10-second context timeout (daemon/cc_info_timer.go:180-183 and daemon/cc_info_timer.go:204-207). Each function makes an HTTP call with a 5-second client timeout, so in the worst case both calls together need ~10 seconds just for HTTP, leaving zero margin for other operations (macOS Keychain exec.Command in anthropic_ratelimit.go:76, file I/O in codex_ratelimit.go:68). If fetchRateLimit consumes most of the context budget, fetchCodexRateLimit will fail with a context deadline exceeded error. Critically, fetchCodexRateLimit records lastAttemptAt before the API call (daemon/cc_info_timer.go:582-584), so a context-induced failure triggers the 10-minute TTL backoff (codexUsageCacheTTL), preventing retry and leaving codex rate limit data stale for up to 10 minutes.

Prompt for agents
The fetchRateLimit and fetchCodexRateLimit calls are sequential inside a single goroutine sharing one 10-second context. Each has an internal 5-second HTTP client timeout, so in the worst case, the budget is exactly consumed by the first call, leaving nothing for the second. This is made worse because fetchCodexRateLimit records lastAttemptAt before attempting the fetch, so a context-deadline failure triggers a 10-minute TTL backoff.

Approach 1: Give each fetch its own independent context with a 10-second timeout. Replace the single shared context with two separate context.WithTimeout calls, one for fetchRateLimit and one for fetchCodexRateLimit.

Approach 2: Increase the shared context timeout to 20 seconds to accommodate both calls.

Approach 3: Run both fetches concurrently (each with its own context) inside the goroutine, since they are independent operations. This would also improve latency.

The same pattern appears twice: once in the immediate-fetch block at lines 175-184 and once in the ticker loop at lines 199-208. Both should be updated.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

}()
go s.fetchUserProfile(context.Background())

Expand All @@ -194,6 +204,7 @@ func (s *CCInfoTimerService) timerLoop() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
s.fetchRateLimit(ctx)
s.fetchCodexRateLimit(ctx)
Comment on lines 204 to +207
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Decouple Codex fetch timeout from Anthropic fetch

The periodic loop runs Anthropic and Codex fetches sequentially under a single 10-second context. If the Anthropic request stalls or consumes most of that budget, fetchCodexRateLimit starts with an already-expired (or near-expired) context and fails; because lastAttemptAt is still updated, Codex retries are suppressed for the full cache TTL. This makes Codex monitoring unreliable during Anthropic latency/outage windows.

Useful? React with 👍 / 👎.

}()

case <-s.stopChan:
Expand Down Expand Up @@ -551,6 +562,138 @@ func (s *CCInfoTimerService) GetCachedRateLimitError() string {
return s.rateLimitCache.lastError
}

// fetchCodexRateLimit fetches Codex rate limit data if cache is stale.
func (s *CCInfoTimerService) fetchCodexRateLimit(ctx context.Context) {
if runtime.GOOS != "darwin" && runtime.GOOS != "linux" {
return
}

// Check cache TTL under read lock
s.codexRateLimitCache.mu.RLock()
sinceLastFetch := time.Since(s.codexRateLimitCache.fetchedAt)
sinceLastAttempt := time.Since(s.codexRateLimitCache.lastAttemptAt)
s.codexRateLimitCache.mu.RUnlock()

if sinceLastFetch < codexUsageCacheTTL || sinceLastAttempt < codexUsageCacheTTL {
return
}

// Record attempt time
s.codexRateLimitCache.mu.Lock()
s.codexRateLimitCache.lastAttemptAt = time.Now()
s.codexRateLimitCache.mu.Unlock()

auth, err := loadCodexAuth()
if err != nil || auth == nil {
slog.Debug("Failed to load Codex auth", slog.Any("err", err))
s.codexRateLimitCache.mu.Lock()
s.codexRateLimitCache.lastError = "auth"
s.codexRateLimitCache.mu.Unlock()
return
}

usage, err := fetchCodexUsage(ctx, auth)
if err != nil {
slog.Warn("Failed to fetch Codex usage", slog.Any("err", err))
s.codexRateLimitCache.mu.Lock()
s.codexRateLimitCache.lastError = shortenCodexAPIError(err)
s.codexRateLimitCache.mu.Unlock()
return
}

s.codexRateLimitCache.mu.Lock()
s.codexRateLimitCache.usage = usage
s.codexRateLimitCache.fetchedAt = time.Now()
s.codexRateLimitCache.lastError = ""
s.codexRateLimitCache.mu.Unlock()

// Send usage data to server (fire-and-forget)
go func() {
bgCtx, bgCancel := context.WithTimeout(context.Background(), 10*time.Second)
defer bgCancel()
s.sendCodexUsageToServer(bgCtx, usage)
}()

slog.Debug("Codex rate limit updated",
slog.String("plan", usage.Plan),
slog.Int("windows", len(usage.Windows)))
}

// sendCodexUsageToServer sends Codex usage data to the ShellTime server
// for scheduling push notifications when rate limits reset.
func (s *CCInfoTimerService) sendCodexUsageToServer(ctx context.Context, usage *CodexRateLimitData) {
if s.config.Token == "" {
return
}

type usageWindow struct {
LimitID string `json:"limit_id"`
UsagePercentage float64 `json:"usage_percentage"`
ResetsAt string `json:"resets_at"`
WindowDurationMinutes int `json:"window_duration_minutes"`
}
type usagePayload struct {
Plan string `json:"plan"`
Windows []usageWindow `json:"windows"`
}

windows := make([]usageWindow, len(usage.Windows))
for i, w := range usage.Windows {
windows[i] = usageWindow{
LimitID: w.LimitID,
UsagePercentage: w.UsagePercentage,
ResetsAt: time.Unix(w.ResetAt, 0).UTC().Format(time.RFC3339),
WindowDurationMinutes: w.WindowDurationMinutes,
}
}

payload := usagePayload{
Plan: usage.Plan,
Windows: windows,
}

err := model.SendHTTPRequestJSON(model.HTTPRequestOptions[usagePayload, any]{
Context: ctx,
Endpoint: model.Endpoint{
Token: s.config.Token,
APIEndpoint: s.config.APIEndpoint,
},
Method: "POST",
Path: "/api/v1/codex-usage",
Payload: payload,
Timeout: 5 * time.Second,
})
if err != nil {
slog.Warn("Failed to send codex usage to server", slog.Any("err", err))
}
}

// GetCachedCodexRateLimit returns a copy of the cached Codex rate limit data, or nil if not available.
func (s *CCInfoTimerService) GetCachedCodexRateLimit() *CodexRateLimitData {
s.codexRateLimitCache.mu.RLock()
defer s.codexRateLimitCache.mu.RUnlock()

if s.codexRateLimitCache.usage == nil {
return nil
}

// Return a copy
copy := *s.codexRateLimitCache.usage
windowsCopy := make([]CodexRateLimitWindow, len(copy.Windows))
for i, w := range copy.Windows {
windowsCopy[i] = w
}
copy.Windows = windowsCopy
return &copy
}

// GetCachedCodexRateLimitError returns the last error from Codex rate limit fetching, or empty string if none.
func (s *CCInfoTimerService) GetCachedCodexRateLimitError() string {
s.codexRateLimitCache.mu.RLock()
defer s.codexRateLimitCache.mu.RUnlock()
return s.codexRateLimitCache.lastError
}

// shortenAPIError converts an Anthropic usage API error into a short string for statusline display.
func shortenAPIError(err error) string {
msg := err.Error()
Expand Down
169 changes: 169 additions & 0 deletions daemon/codex_ratelimit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package daemon

import (
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"path/filepath"
"sync"
"time"
)

const codexUsageCacheTTL = 10 * time.Minute

// CodexRateLimitData holds the parsed rate limit data from the Codex API
type CodexRateLimitData struct {
Plan string
Windows []CodexRateLimitWindow
}

// CodexRateLimitWindow holds a single rate limit window from the Codex API
type CodexRateLimitWindow struct {
LimitID string
UsagePercentage float64
ResetAt int64 // Unix timestamp
WindowDurationMinutes int
}

type codexRateLimitCache struct {
mu sync.RWMutex
usage *CodexRateLimitData
fetchedAt time.Time
lastAttemptAt time.Time
lastError string // short error description for statusline display
}

// codexAuthData maps the relevant fields from ~/.codex/auth.json
type codexAuthData struct {
AccessToken string
AccountID string
}

// codexAuthJSON maps the full ~/.codex/auth.json structure
type codexAuthJSON struct {
AuthMode string `json:"authMode"`
APIKey *string `json:"apiKey"`
TokenData *codexAuthTokenData `json:"tokenData"`
}

type codexAuthTokenData struct {
AccessToken string `json:"accessToken"`
RefreshToken string `json:"refreshToken"`
IDTokenClaims *codexIDTokenClaims `json:"idTokenClaims"`
}

type codexIDTokenClaims struct {
AccountID string `json:"accountId"`
}

// loadCodexAuth reads the Codex authentication data from ~/.codex/auth.json.
func loadCodexAuth() (*codexAuthData, error) {
homeDir, err := os.UserHomeDir()
if err != nil {
return nil, fmt.Errorf("failed to get home directory: %w", err)
}

data, err := os.ReadFile(filepath.Join(homeDir, ".codex", "auth.json"))
if err != nil {
return nil, fmt.Errorf("codex auth file read failed: %w", err)
}

var auth codexAuthJSON
if err := json.Unmarshal(data, &auth); err != nil {
return nil, fmt.Errorf("failed to parse codex auth JSON: %w", err)
}

if auth.TokenData == nil || auth.TokenData.AccessToken == "" {
return nil, fmt.Errorf("no access token found in codex auth")
}
Comment on lines +78 to +80
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Support API-key auth mode in loadCodexAuth

This loader always errors unless tokenData.accessToken is present, even though the parsed schema includes authMode and apiKey. When ~/.codex/auth.json is in API-key mode (no tokenData), Codex usage fetching will always return auth and the daemon never sends Codex usage to /api/v1/codex-usage for those users.

Useful? React with 👍 / 👎.


accountID := ""
if auth.TokenData.IDTokenClaims != nil {
accountID = auth.TokenData.IDTokenClaims.AccountID
}

return &codexAuthData{
AccessToken: auth.TokenData.AccessToken,
AccountID: accountID,
}, nil
}

// codexUsageResponse maps the Codex usage API response
type codexUsageResponse struct {
RateLimits codexRateLimitSnapshot `json:"rateLimits"`
}

type codexRateLimitSnapshot struct {
Plan string `json:"plan"`
RateLimitWindows []codexRateLimitWindowRaw `json:"rateLimitWindows"`
}

type codexRateLimitWindowRaw struct {
LimitID string `json:"limitId"`
UsagePercentage float64 `json:"usagePercentage"`
ResetAt int64 `json:"resetAt"`
WindowDurationMinutes int `json:"windowDurationMinutes"`
}

// fetchCodexUsage calls the Codex usage API and returns rate limit data.
func fetchCodexUsage(ctx context.Context, auth *codexAuthData) (*CodexRateLimitData, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.openai.com/api/codex/usage", nil)
if err != nil {
return nil, err
}

req.Header.Set("Authorization", "Bearer "+auth.AccessToken)
if auth.AccountID != "" {
req.Header.Set("ChatGPT-Account-Id", auth.AccountID)
}
req.Header.Set("User-Agent", "shelltime-daemon")

client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("codex usage API returned status %d", resp.StatusCode)
}

var usage codexUsageResponse
if err := json.NewDecoder(resp.Body).Decode(&usage); err != nil {
return nil, fmt.Errorf("failed to decode codex usage response: %w", err)
}

windows := make([]CodexRateLimitWindow, len(usage.RateLimits.RateLimitWindows))
for i, w := range usage.RateLimits.RateLimitWindows {
windows[i] = CodexRateLimitWindow{
LimitID: w.LimitID,
UsagePercentage: w.UsagePercentage,
ResetAt: w.ResetAt,
WindowDurationMinutes: w.WindowDurationMinutes,
}
}

return &CodexRateLimitData{
Plan: usage.RateLimits.Plan,
Windows: windows,
}, nil
}

// shortenCodexAPIError converts a Codex usage API error into a short string for statusline display.
func shortenCodexAPIError(err error) string {
msg := err.Error()

var status int
if _, scanErr := fmt.Sscanf(msg, "codex usage API returned status %d", &status); scanErr == nil {
return fmt.Sprintf("api:%d", status)
}

if len(msg) >= 6 && msg[:6] == "failed" {
return "api:decode"
}

return "network"
}
Loading