Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

give a long gregor reconnect backoff to devices that don't send chats #9720

Merged
merged 2 commits into from Dec 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
82 changes: 82 additions & 0 deletions go/chat/active.go
@@ -0,0 +1,82 @@
package chat

import (
"context"
"time"

"github.com/keybase/client/go/chat/globals"
"github.com/keybase/client/go/chat/utils"
"github.com/keybase/client/go/libkb"
)

// All devices are presumed active for the first 24 hours. See comment below.
var InitialAssumedActiveInterval = 24 * time.Hour
var ActiveIntervalAfterSend = 30 * 24 * time.Hour

func chatActiveDBKey(name string) libkb.DbKey {
return libkb.DbKey{
Typ: libkb.DBChatActive,
Key: name,
}
}

func firstQueryTimeDbKey() libkb.DbKey {
return chatActiveDBKey("first_query_time")
}

func lastSendTimeDbKey() libkb.DbKey {
return chatActiveDBKey("last_send_time")
}

// If no first query time is found in the local db, this function writes the
// current time.
func TouchFirstChatActiveQueryTime(ctx context.Context, g *globals.Context, log utils.DebugLabeler) time.Time {
now := time.Now()
var firstQueryUnixTime int64
found, err := g.LocalChatDb.GetInto(&firstQueryUnixTime, firstQueryTimeDbKey())
// Warn for errors and bail.
if err != nil {
log.Debug(ctx, "Failed to get chat active query time: %s", err)
return now
}
// If the first query time doesn't exist, store Now(). Don't return Now()
// directly, though, since that has extra metadata in it what won't be
// there when we deserialize from the db.
if !found {
log.Debug(ctx, "Chat active query time not found. Storing current time.")
firstQueryUnixTime = now.Unix()
err := g.LocalChatDb.PutObj(firstQueryTimeDbKey(), nil, firstQueryUnixTime)
if err != nil {
log.Debug(ctx, "Failed to store chat active query time: %s", err)
}
}
// Otherwise return what we found.
return time.Unix(firstQueryUnixTime, 0)
}

// Returns the zero time if there is no recorded last send time (either because
// the device has never sent a message, or because it hasn't sent one since we
// started recording).
func GetLastSendTime(ctx context.Context, g *globals.Context, log utils.DebugLabeler) time.Time {
var zeroTime time.Time
var lastSendUnixTime int64
found, err := g.LocalChatDb.GetInto(&lastSendUnixTime, lastSendTimeDbKey())
// Warn for errors and return zero.
if err != nil {
log.Debug(ctx, "Failed to get chat active last send time: %s", err)
return zeroTime
}
// If the last time doesn't exist, again return zero.
if !found {
return zeroTime
}
// Otherwise return what we found.
return time.Unix(lastSendUnixTime, 0)
}

func RecordChatSend(ctx context.Context, g *globals.Context, log utils.DebugLabeler) {
err := g.LocalChatDb.PutObj(lastSendTimeDbKey(), nil, time.Now().Unix())
if err != nil {
log.Debug(ctx, "Failed to store chat last send time: %s", err)
}
}
54 changes: 54 additions & 0 deletions go/chat/active_test.go
@@ -0,0 +1,54 @@
package chat

import (
"testing"
"time"

"github.com/keybase/client/go/chat/globals"
"github.com/keybase/client/go/chat/utils"
"github.com/stretchr/testify/require"
)

func TestChatActive(t *testing.T) {
ctx, world, _, _, _, _ := setupTest(t, 1)

u := world.GetUsers()[0]
tc := world.Tcs[u.Username]
defer tc.Cleanup()
g := globals.NewContext(tc.G, tc.ChatG)
log := utils.NewDebugLabeler(g.GetLog(), "TestChatActive", false)

// The first chat active query should store the current time. Use Unix
// times, because truncating seconds on only one half of the comparison
// throws everything off.
beforeTouch := time.Now().Unix()
touch := TouchFirstChatActiveQueryTime(ctx, g, log).Unix()
afterTouch := time.Now().Unix()
if touch < beforeTouch {
t.Fatalf("touch unexpectly early: %d < %d", touch, beforeTouch)
}
if touch > afterTouch {
t.Fatalf("touch unexpectly late: %d > %d", touch, afterTouch)
}

// Subsequent queries shouldn't change the first query time.
touchAgain := TouchFirstChatActiveQueryTime(ctx, g, log).Unix()
require.Equal(t, touch, touchAgain)

// Initially the last send time should be zero.
zeroLastSend := GetLastSendTime(ctx, g, log)
require.True(t, zeroLastSend.IsZero())

// Now do the same exercise with the last send time as we did with the
// first query time above.
beforeSend := time.Now().Unix()
RecordChatSend(ctx, g, log)
lastSend := GetLastSendTime(ctx, g, log).Unix()
afterSend := time.Now().Unix()
if lastSend < beforeSend {
t.Fatalf("send unexpectly early: %d < %d", lastSend, beforeSend)
}
if lastSend > afterSend {
t.Fatalf("send unexpectly late: %d > %d", lastSend, afterSend)
}
}
4 changes: 4 additions & 0 deletions go/chat/sender.go
Expand Up @@ -490,6 +490,10 @@ func (s *BlockingSender) Send(ctx context.Context, convID chat1.ConversationID,
msg chat1.MessagePlaintext, clientPrev chat1.MessageID, outboxID *chat1.OutboxID) (obid chat1.OutboxID, boxed *chat1.MessageBoxed, rl *chat1.RateLimit, err error) {
defer s.Trace(ctx, func() error { return err }, fmt.Sprintf("Send(%s)", convID))()

// Record that this user is "active in chat", which we use to determine
// gregor reconnect backoffs.
RecordChatSend(ctx, s.G(), s.DebugLabeler)

ri := s.getRi()
if ri == nil {
return chat1.OutboxID{}, nil, nil, fmt.Errorf("Send(): no remote client found")
Expand Down
1 change: 1 addition & 0 deletions go/libkb/db.go
Expand Up @@ -183,6 +183,7 @@ const (
DBSigChainTailPublic = 0xe7
DBSigChainTailSemiprivate = 0xe8
DBSigChainTailEncrypted = 0xe9
DBChatActive = 0xea
DBMerkleRoot = 0xf0
DBTrackers = 0xf1
DBGregor = 0xf2
Expand Down
77 changes: 68 additions & 9 deletions go/service/gregor.go
Expand Up @@ -33,7 +33,8 @@ import (
)

const GregorRequestTimeout time.Duration = 30 * time.Second
const GregorConnectionRetryInterval time.Duration = 2 * time.Second
const GregorConnectionShortRetryInterval time.Duration = 2 * time.Second
const GregorConnectionLongRetryInterval time.Duration = 10 * time.Second
const GregorGetClientTimeout time.Duration = 4 * time.Second

type IdentifyUIHandler struct {
Expand Down Expand Up @@ -1410,6 +1411,60 @@ func (g *gregorHandler) pingLoop() {
}
}

// Our heuristic for figuring out whether your device is "active in chat" is
// whether you've sent a message in the last month. We're recording your last
// send as a simple timestamp, and we'll compare that to the current time.
// However, we want to avoid the situation where we ship this code for the
// first time, and suddenly *all* devices appear inactive, because no one has
// written the timestamp yet. So we add a second timestamp, which is the first
// time you ran any of this code. For 24 hours after the first time a device
// queries these keys, we treat all devices as active. We'll want to keep
// (something like) this code even after it's all stable in the wild, because
// it covers newly provisioned devices too.
func (g *gregorHandler) chatAwareReconnectIsLong(ctx context.Context) bool {
now := time.Now()
firstQueryTime := chat.TouchFirstChatActiveQueryTime(ctx, g.G(), g.chatLog)

// As a special case, always use a short backoff on mobile. Mobile devices
// aren't responsible for the thundering herd issues that this logic is
// trying to mitigate, and mobile users are much more likely to notice a
// connection delay.
if g.G().Env.GetAppType() == libkb.MobileAppType {
return false
}

// All devices are presumed active for the first 24 hours after they start
// checking this, and we give them a short backoff.
if now.Sub(firstQueryTime) < chat.InitialAssumedActiveInterval {
return false
}

// Otherwise, devices that haven't recorded a send in the last month are
// given a long backoff.
lastSendTime := chat.GetLastSendTime(ctx, g.G(), g.chatLog)
if now.Sub(lastSendTime) < chat.ActiveIntervalAfterSend {
return false
}
g.chatLog.Debug(ctx, "Device isn't active in chat. Using long reconnect backoff.", g.chatLog)
return true
}

func (g *gregorHandler) chatAwareReconnectBackoff(ctx context.Context) backoff.BackOff {
if g.chatAwareReconnectIsLong(ctx) {
return backoff.NewConstantBackOff(GregorConnectionLongRetryInterval)
}
return backoff.NewConstantBackOff(GregorConnectionShortRetryInterval)
}

// Similar to the backoff above, except that the "short" window is zero, so
// that active clients don't wait at all before their first reconnect attempt.
func (g *gregorHandler) chatAwareInitialReconnectBackoffWindow(ctx context.Context) time.Duration {
if g.chatAwareReconnectIsLong(ctx) {
return GregorConnectionLongRetryInterval
}
return 0
}

// connMutex must be locked before calling this
func (g *gregorHandler) connectTLS() error {
ctx := context.Background()
Expand All @@ -1428,11 +1483,13 @@ func (g *gregorHandler) connectTLS() error {
// Let people know we are trying to sync
g.G().NotifyRouter.HandleChatInboxSyncStarted(ctx, g.G().Env.GetUID())

constBackoff := backoff.NewConstantBackOff(GregorConnectionRetryInterval)
opts := rpc.ConnectionOpts{
TagsFunc: logger.LogTagsFromContextRPC,
WrapErrorFunc: libkb.MakeWrapError(g.G().ExternalG()),
ReconnectBackoff: func() backoff.BackOff { return constBackoff },
TagsFunc: logger.LogTagsFromContextRPC,
WrapErrorFunc: libkb.MakeWrapError(g.G().ExternalG()),
InitialReconnectBackoffWindow: func() time.Duration { return g.chatAwareInitialReconnectBackoffWindow(ctx) },
ReconnectBackoff: func() backoff.BackOff { return g.chatAwareReconnectBackoff(ctx) },
// We deliberately avoid ForceInitialBackoff here, becuase we don't
// want to penalize mobile, which tears down its connection frequently.
}
g.conn = rpc.NewTLSConnection(rpc.NewFixedRemote(uri.HostPort),
[]byte(rawCA), libkb.NewContextifiedErrorUnwrapper(g.G().ExternalG()), g, libkb.NewRPCLogFactory(g.G().ExternalG()), g.G().Log, opts)
Expand Down Expand Up @@ -1465,11 +1522,13 @@ func (g *gregorHandler) connectNoTLS() error {
t := newConnTransport(g.G().ExternalG(), uri.HostPort)
g.transportForTesting = t

constBackoff := backoff.NewConstantBackOff(GregorConnectionRetryInterval)
opts := rpc.ConnectionOpts{
TagsFunc: logger.LogTagsFromContextRPC,
WrapErrorFunc: libkb.MakeWrapError(g.G().ExternalG()),
ReconnectBackoff: func() backoff.BackOff { return constBackoff },
TagsFunc: logger.LogTagsFromContextRPC,
WrapErrorFunc: libkb.MakeWrapError(g.G().ExternalG()),
InitialReconnectBackoffWindow: func() time.Duration { return g.chatAwareInitialReconnectBackoffWindow(ctx) },
ReconnectBackoff: func() backoff.BackOff { return g.chatAwareReconnectBackoff(ctx) },
// We deliberately avoid ForceInitialBackoff here, becuase we don't
// want to penalize mobile, which tears down its connection frequently.
}
g.conn = rpc.NewConnectionWithTransport(g, t, libkb.NewContextifiedErrorUnwrapper(g.G().ExternalG()), g.G().Log, opts)

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions go/vendor/vendor.json
Expand Up @@ -284,10 +284,10 @@
"revisionTime": "2017-05-08T13:19:45Z"
},
{
"checksumSHA1": "ub85JBvVGqt8k0uYhH4n0JmeOaA=",
"checksumSHA1": "BdtHbEo/t/HE+WRJSaQzcZiwg/o=",
"path": "github.com/keybase/go-framed-msgpack-rpc/rpc",
"revision": "b123be5e037d718432cc16a4d37a972b2b4dddd7",
"revisionTime": "2017-12-05T16:34:57Z"
"revision": "c416da7cf9cfb85545098e346be6583a1649b0d5",
"revisionTime": "2017-12-06T17:53:16Z"
},
{
"checksumSHA1": "RLs8GIV4e+D350pyzZh5RC3mgQg=",
Expand Down