Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing #184 out of memory #185

Merged
merged 10 commits into from
Feb 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 17 additions & 23 deletions export/conversation.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,26 @@ import (

const dateFmt = "2006-01-02"

// byDate sorts the messages by date and returns a map date->[]slack.Message.
// users should contain the users in the conversation for population of required
// fields.
// Threads are flattened.
func (Export) byDate(c *types.Conversation, userIdx structures.UserIndex) (map[string][]ExportMessage, error) {
msgsByDate := make(map[string][]ExportMessage)
if err := populateMsgs(msgsByDate, c.Messages, userIdx); err != nil {
// byDate sorts the messages by date and returns a map date->[]ExportMessage.
// userIdx should contain the users in the conversation for populating the
// required fields. Threads are flattened.
func (Export) byDate(c *types.Conversation, userIdx structures.UserIndex) (messagesByDate, error) {
msgsByDate := make(map[string][]*ExportMessage, 0)
if err := flattenMsgs(msgsByDate, c.Messages, userIdx); err != nil {
return nil, err
}

// sort messages by Time within each date.
for date, messages := range msgsByDate {
sort.Slice(msgsByDate[date], func(i, j int) bool {
return messages[i].Time().Before(messages[j].Time())
return messages[i].slackdumpTime.Before(messages[j].slackdumpTime)
})
}

return msgsByDate, nil
}

type messagesByDate map[string][]ExportMessage
type messagesByDate map[string][]*ExportMessage

// validate checks if mbd keys are valid dates.
func (mbd messagesByDate) validate() error {
Expand All @@ -46,26 +45,21 @@ func (mbd messagesByDate) validate() error {
return nil
}

// populateMsgs takes the messages input, splits them by the date and
// flattenMsgs takes the messages input, splits them by the date and
// populates the msgsByDate map.
func populateMsgs(msgsByDate messagesByDate, messages []types.Message, usrIdx structures.UserIndex) error {
for _, msg := range messages {
expMsg := newExportMessage(&msg, usrIdx)
func flattenMsgs(msgsByDate messagesByDate, messages []types.Message, usrIdx structures.UserIndex) error {
for i := range messages {
expMsg := newExportMessage(&messages[i], usrIdx)

if len(msg.ThreadReplies) > 0 {
if len(messages[i].ThreadReplies) > 0 {
// Recursive call: are you ready, mr. stack?
if err := populateMsgs(msgsByDate, msg.ThreadReplies, usrIdx); err != nil {
return fmt.Errorf("thread ID %s: %w", msg.Timestamp, err)
if err := flattenMsgs(msgsByDate, messages[i].ThreadReplies, usrIdx); err != nil {
return fmt.Errorf("thread ID %s: %w", messages[i].Timestamp, err)
}
}

dt, err := msg.Datetime()
if err != nil {
return fmt.Errorf("updateDateMsgs: unable to parse message timestamp (%s): %w", msg.Timestamp, err)
}

formattedDt := dt.Format(dateFmt)
msgsByDate[formattedDt] = append(msgsByDate[formattedDt], *expMsg)
formattedDt := expMsg.slackdumpTime.Format(dateFmt)
msgsByDate[formattedDt] = append(msgsByDate[formattedDt], expMsg)
}

return nil
Expand Down
82 changes: 73 additions & 9 deletions export/conversation_test.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
package export

import (
"context"
"encoding/json"
"os"
"runtime/trace"
"testing"
"time"

"github.com/rusq/slackdump/v2/internal/fixtures"
"github.com/rusq/slackdump/v2/internal/fixtures/fixgen"
"github.com/rusq/slackdump/v2/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestConversation_ByDate(t *testing.T) {
// TODO
var exp Export

conversations := fixtures.Load[types.Conversation](fixtures.TestConversationJSON)
Expand All @@ -21,12 +27,35 @@ func TestConversation_ByDate(t *testing.T) {
}

// uncomment to write the json for fixtures
// require.NoError(t, writeOutput("convDt", convDt))
require.NoError(t, writeOutput("convDt", convDt))

want := fixtures.Load[map[string][]ExportMessage](fixtures.TestConversationExportJSON)
want := fixtures.Load[messagesByDate](fixtures.TestConversationExportJSON)

// we need to depopulate slackdumpTime for comparison, as it is not saved
// in the fixture.
zeroSlackdumpTime(convDt)
assert.Equal(t, want, convDt)
}

func zeroSlackdumpTime(m messagesByDate) {
for _, msgs := range m {
for i := range msgs {
msgs[i].slackdumpTime = time.Time{}
}
}
}

func writeOutput(name string, v interface{}) error {
f, err := os.Create(name + ".json")
if err != nil {
return err
}
defer f.Close()
enc := json.NewEncoder(f)
enc.SetIndent("", "\t")
return enc.Encode(v)
}

func Test_messagesByDate_validate(t *testing.T) {
tests := []struct {
name string
Expand All @@ -35,22 +64,22 @@ func Test_messagesByDate_validate(t *testing.T) {
}{
{"valid",
messagesByDate{
"2019-09-16": []ExportMessage{},
"2020-12-31": []ExportMessage{},
"2019-09-16": []*ExportMessage{},
"2020-12-31": []*ExportMessage{},
},
false,
},
{"empty key",
messagesByDate{
"": []ExportMessage{},
"2020-12-31": []ExportMessage{},
"": []*ExportMessage{},
"2020-12-31": []*ExportMessage{},
},
true,
},
{"invalid key",
messagesByDate{
"2019-09-16": []ExportMessage{},
"2020-31-12": []ExportMessage{}, //swapped month and date
"2019-09-16": []*ExportMessage{},
"2020-31-12": []*ExportMessage{}, //swapped month and date
},
true,
},
Expand All @@ -63,3 +92,38 @@ func Test_messagesByDate_validate(t *testing.T) {
})
}
}

var (
benchResult messagesByDate
benchConv types.Conversation
)

func init() {
var (
startDate = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
endDate = time.Date(2020, 1, 1, 15, 0, 0, 0, time.UTC)
numMessages = 10_000
)
benchConv = fixgen.GenerateTestConversation("test", startDate, endDate, numMessages)
}

func BenchmarkByDate(b *testing.B) {

ctx, task := trace.NewTask(context.Background(), "BenchmarkByDate")
defer task.End()

var (
ex Export
err error
)
region := trace.StartRegion(ctx, "byDateBenchRun")
defer region.End()
var m messagesByDate
for i := 0; i < b.N; i++ {
m, err = ex.byDate(&benchConv, nil)
if err != nil {
b.Fatal(err)
}
}
benchResult = m
}
4 changes: 2 additions & 2 deletions export/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import (

// Export is the instance of Slack Exporter.
type Export struct {
fs fsadapter.FS // target filesystem
sd *slackdump.Session // Session instance
fs fsadapter.FS // target filesystem
sd dumper // Session instance
lg logger.Interface
dl dl.Exporter

Expand Down
Loading