Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: de-duplicate php package data #871

Merged
merged 9 commits into from
Apr 18, 2024
85 changes: 81 additions & 4 deletions daemon/internal/newrelic/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ type App struct {
HarvestTrigger HarvestTriggerFunc
LastActivity time.Time
Rules MetricRules
PhpPackages map[PhpPackagesKey]struct{}
}

func (app *App) String() string {
Expand Down Expand Up @@ -180,6 +181,7 @@ func NewApp(info *AppInfo) *App {
info: info,
HarvestTrigger: nil,
LastActivity: now,
PhpPackages: make(map[PhpPackagesKey]struct{}),
}
}

Expand Down Expand Up @@ -303,10 +305,10 @@ func (app *App) NeedsConnectAttempt(now time.Time, backoff time.Duration) bool {
return false
}

//Since span events are not included in Faster Event Harvest due to concerns
//about downsampling within a distributed trace, the report period and harvest
//limit are reported separately in span_event_harvest_config instead of
//event_harvest_config. Combine them both into EventHarvestConfig here.
// Since span events are not included in Faster Event Harvest due to concerns
// about downsampling within a distributed trace, the report period and harvest
// limit are reported separately in span_event_harvest_config instead of
// event_harvest_config. Combine them both into EventHarvestConfig here.
func combineEventConfig(ehc collector.EventHarvestConfig, sehc collector.SpanEventHarvestConfig) collector.EventHarvestConfig {
ehc.EventConfigs.SpanEventConfig.Limit = sehc.SpanEventConfig.Limit
ehc.EventConfigs.SpanEventConfig.ReportPeriod = sehc.SpanEventConfig.ReportPeriod
Expand Down Expand Up @@ -338,3 +340,78 @@ func (app *App) Inactive(threshold time.Duration) bool {
}
return time.Since(app.LastActivity) > threshold
}

// filter seen php packages data to avoid sending duplicates
//
// the `App` structure contains a map of PHP Packages the reporting
// application has encountered.
//
// the map of packages should persist for the duration of the
// current connection
//
// takes the `PhpPackages.data` byte array as input and unmarshals
// into an anonymous interface array
//
// the JSON format received from the agent is:
//
// [["package_name","version",{}],...]
//
// for each entry, assign the package name and version to the `PhpPackagesKey`
// struct and use the key to verify data does not exist in the map. If the
// key does not exist, add it to the map and the array of 'new' packages.
//
// convert the array of 'new' packages into a byte array representing
// the expected data that should match input, minus the duplicates.
func (app *App) filterPhpPackages(data []byte) []byte {
if data == nil {
return nil
zsistla marked this conversation as resolved.
Show resolved Hide resolved
}

var pkgKey PhpPackagesKey
var newPkgs []PhpPackagesKey
var x []interface{}

err := json.Unmarshal(data, &x)
if nil != err {
log.Errorf("failed to unmarshal php package json: %s", err)
return nil
}

for _, pkgJson := range x {
pkg, _ := pkgJson.([]interface{})
if len(pkg) != 3 {
log.Errorf("invalid php package json structure: %+v", pkg)
return nil
}
name, ok := pkg[0].(string)
version, ok := pkg[1].(string)
pkgKey = PhpPackagesKey{name, version}
_, ok = app.PhpPackages[pkgKey]
if !ok {
app.PhpPackages[pkgKey] = struct{}{}
newPkgs = append(newPkgs, pkgKey)
mfulb marked this conversation as resolved.
Show resolved Hide resolved
}
}

if newPkgs == nil {
return nil
}

buf := &bytes.Buffer{}
buf.WriteString(`[`)
for _, pkg := range newPkgs {
buf.WriteString(`["`)
buf.WriteString(pkg.Name)
buf.WriteString(`","`)
buf.WriteString(pkg.Version)
buf.WriteString(`",{}],`)
}

resJson := buf.Bytes()

// swap last ',' character with ']'
mfulb marked this conversation as resolved.
Show resolved Hide resolved
resJson = resJson[:len(resJson)-1]
resJson = append(resJson, ']')

return resJson
}
51 changes: 51 additions & 0 deletions daemon/internal/newrelic/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -613,3 +613,54 @@ func TestMaxPayloadSizeInBytesFromConnectReply(t *testing.T) {
t.Errorf("parseConnectReply(something), got [%v], expected [%v]", c.MaxPayloadSizeInBytes, expectedMaxPayloadSizeInBytes)
}
}

func TestFilterPhpPackages(t *testing.T) {
app := App{
PhpPackages: make(map[PhpPackagesKey]struct{}),
}
var nilData []byte = nil
emptyData := []byte(`[[{}]]`)
validData := []byte(`[["drupal","6.0",{}]]`)
moreValidData := []byte(`[["wordpress","7.0",{}],["symfony","5.1",{}]]`)
duplicateData := []byte(`[["drupal","6.0",{}]]`)
versionData := []byte(`[["drupal","9.0",{}]]`)
invalidData := []byte(`[[["1","2","3"],["4","5"]{}]]`)

mfulb marked this conversation as resolved.
Show resolved Hide resolved
filteredData := app.filterPhpPackages(nilData)
if filteredData != nil {
t.Errorf("expected 'nil' result on 'nil' input, got [%v]", filteredData)
}

filteredData = app.filterPhpPackages(emptyData)
if filteredData != nil {
t.Errorf("expected 'nil' result on empty data input, got [%v]", filteredData)
}

expect := []byte(`[["drupal","6.0",{}]]`)
filteredData = app.filterPhpPackages(validData)
if string(filteredData) != string(expect) {
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
}

expect = []byte(`[["wordpress","7.0",{}],["symfony","5.1",{}]]`)
filteredData = app.filterPhpPackages(moreValidData)
if string(filteredData) != string(expect) {
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
}

filteredData = app.filterPhpPackages(duplicateData)
if filteredData != nil {
t.Errorf("expected 'nil', got [%v]", filteredData)
}

expect = []byte(`[["drupal","9.0",{}]]`)
filteredData = app.filterPhpPackages(versionData)
if string(filteredData) != string(expect) {
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
}

filteredData = app.filterPhpPackages(invalidData)
if filteredData != nil {
t.Errorf("expected 'nil', go [%v]", filteredData)
}
}
15 changes: 15 additions & 0 deletions daemon/internal/newrelic/harvest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,19 @@ func TestHarvestEmpty(t *testing.T) {
if h.empty() {
t.Errorf("Harvest.empty() = true, want false")
}

// verify that php packages does not send harvest when data is nil
h = NewHarvest(startTime, collector.NewHarvestLimits(nil))
h.PhpPackages.AddPhpPackagesFromData(nil)
if !h.empty() {
t.Errorf("Harvest.empty = false, want true")
}

// verify that valid php package data sends a harvest
h = NewHarvest(startTime, collector.NewHarvestLimits(nil))
h.PhpPackages.AddPhpPackagesFromData([]byte(`[["testpackage","testversion",{}]]`))
if h.empty() {
t.Errorf("Harvest.empty = true, want false")
}

}
5 changes: 5 additions & 0 deletions daemon/internal/newrelic/php_packages.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ import (
"github.com/newrelic/newrelic-php-agent/daemon/internal/newrelic/log"
)

type PhpPackagesKey struct {
Name string
Version string
}

// phpPackages represents all detected packages reported by an agent.
type PhpPackages struct {
numSeen int
Expand Down
3 changes: 3 additions & 0 deletions daemon/internal/newrelic/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,8 @@ func harvestByType(ah *AppHarvest, args *harvestArgs, ht HarvestType, du_chan ch
// In such cases, harvest all types and return.
if ht&HarvestAll == HarvestAll {
ah.Harvest = NewHarvest(time.Now(), ah.App.connectReply.EventHarvestConfig.EventConfigs)
// filter already seen php packages
harvest.PhpPackages.data = ah.App.filterPhpPackages(harvest.PhpPackages.data)
if args.blocking {
// Invoked primarily by CleanExit
harvestAll(harvest, args, ah.connectReply.EventHarvestConfig, ah.TraceObserver, du_chan)
Expand All @@ -698,6 +700,7 @@ func harvestByType(ah *AppHarvest, args *harvestArgs, ht HarvestType, du_chan ch
slowSQLs := harvest.SlowSQLs
txnTraces := harvest.TxnTraces
phpPackages := harvest.PhpPackages
phpPackages.data = ah.App.filterPhpPackages(phpPackages.data)

harvest.Metrics = NewMetricTable(limits.MaxMetrics, time.Now())
harvest.Errors = NewErrorHeap(limits.MaxErrors)
Expand Down
10 changes: 6 additions & 4 deletions daemon/internal/newrelic/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ var (
sampleSpanEvent = []byte("belated birthday")
sampleLogEvent = []byte("log event test birthday")
sampleErrorEvent = []byte("forgotten birthday")
samplePhpPackages = []byte(`["package", "1.2.3",{}]`)
samplePhpPackages = []byte(`[["package","1.2.3",{}]]`)
)

type ClientReturn struct {
Expand Down Expand Up @@ -297,9 +297,11 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {
// collect php packages
m.clientReturn <- ClientReturn{nil, nil, 202}
cp_pkgs := <-m.clientParams

// collect metrics
m.clientReturn <- ClientReturn{nil, nil, 202}
cp_metrics := <-m.clientParams

// collect usage metrics
m.clientReturn <- ClientReturn{nil, nil, 202}
cp_usage := <-m.clientParams
Expand All @@ -308,7 +310,7 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {

// check pkgs and metric data - it appears these can
// come in different orders so check both
toTestPkgs := `["Jars",["package", "1.2.3",{}]]`
toTestPkgs := `["Jars",[["package","1.2.3",{}]]]`
if toTestPkgs != string(cp_pkgs.data) {
if toTestPkgs != string(cp_metrics.data) {
t.Fatalf("packages data: expected '%s', got '%s'", toTestPkgs, string(cp_pkgs.data))
Expand All @@ -318,9 +320,9 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {
time1 := strings.Split(string(cp_usage.data), ",")[1]
time2 := strings.Split(string(cp_usage.data), ",")[2]
usageMetrics := `["one",` + time1 + `,` + time2 + `,` +
`[[{"name":"Supportability/C/Collector/Output/Bytes"},[2,1285,0,0,0,0]],` +
`[[{"name":"Supportability/C/Collector/Output/Bytes"},[2,1286,0,0,0,0]],` +
`[{"name":"Supportability/C/Collector/metric_data/Output/Bytes"},[1,1253,0,0,0,0]],` +
`[{"name":"Supportability/C/Collector/update_loaded_modules/Output/Bytes"},[1,32,0,0,0,0]]]]`
`[{"name":"Supportability/C/Collector/update_loaded_modules/Output/Bytes"},[1,33,0,0,0,0]]]]`
if got, _ := OrderScrubMetrics(cp_usage.data, nil); string(got) != usageMetrics {
t.Fatalf("metrics data: expected '%s', got '%s'", string(usageMetrics), string(got))
}
Expand Down
Loading