diff --git a/geolite2v2/geo-ip-compare_test.go b/geolite2v2/geo-ip-compare_test.go index 7f2fe938..65854a8f 100644 --- a/geolite2v2/geo-ip-compare_test.go +++ b/geolite2v2/geo-ip-compare_test.go @@ -3,8 +3,10 @@ package geolite2v2_test import ( "bytes" "log" + "net" _ "net/http/pprof" "sort" + "sync" "testing" "github.com/Pallinder/go-randomdata" @@ -18,63 +20,94 @@ import ( // TestCompareAnnotations tests if the new implementation annotates the same way as the old // implementation -func TestCompareAnnotations(t *testing.T) { +func TestCompareV4Annotations(t *testing.T) { if testing.Short() { log.Println("Skipping test that relies on mlab-testing bucket") return } - oldAnnotators := loadOld(t) - newAnnotators := loadNew(t) - - // sort the annotators to be able to compare their resuults - sort.Slice(oldAnnotators, createSorterFor(oldAnnotators)) - sort.Slice(newAnnotators, createSorterFor(newAnnotators)) + oldAnnotators, newAnnotators := oldAndNew(t) // we need minimum 200 IP hits per annotator - minimumHitCountPerAnnotator := 200 + minimumHitCountPerAnnotator := 10000 // get each annotator for idx, oldAnn := range oldAnnotators { newAnn := newAnnotators[idx] - notFoundCount, v4HitCount, v6HitCount := 0, 0, 0 - ipV4 := true + notFoundCount, v4HitCount := 0, 0 // annotate v4 and v6 IP addresses and compare the resuults - for (v4HitCount + v6HitCount) < minimumHitCountPerAnnotator { + // TODO - hit rate is only about 1 in 10K. WHY??? + for v4HitCount < minimumHitCountPerAnnotator { var oldResp, newResp api.GeoData var oldErr, newErr error var address string - if ipV4 { - address = randomdata.IpV4Address() - } else { - address = randomdata.IpV6Address() - } + address = randomdata.IpV4Address() // the error should be the same if there's any oldErr = oldAnn.Annotate(address, &oldResp) newErr = newAnn.Annotate(address, &newResp) - if oldErr != nil { + if oldErr != nil || newErr != nil { notFoundCount++ + if notFoundCount%1000 == 0 { + log.Println("Not found:", address) + } assert.EqualError(t, newErr, oldErr.Error()) continue } // the content should be the same if there's any assertSameGeoData(t, &oldResp, &newResp) - if ipV4 { - v4HitCount++ + v4HitCount++ + + if v4HitCount%2000 == 0 { + log.Printf("Not found count: %d, v4 hit count: %d", notFoundCount, v4HitCount) + } + } + + log.Printf("annotator[%d] - Not found count: %d, v4 hit count: %d", idx, notFoundCount, v4HitCount) + } + + // Now do the ipv6, which needs a slightly different strategy. + for idx, oldAnn := range oldAnnotators { + newAnn := newAnnotators[idx] + notFoundCount, v6HitCount := 0, 0 + + // annotate v4 and v6 IP addresses and compare the resuults + // TODO - hit rate is only about 1 in 10K. WHY??? + for v6HitCount < minimumHitCountPerAnnotator { + var oldResp, newResp api.GeoData + var oldErr, newErr error + + var address net.IP + if v6HitCount%2 == 0 { + _, address = randomValidIPv6(oldAnn) } else { - v6HitCount++ + _, address = randomValidIPv6(newAnn) } - ipV4 = !ipV4 - if v4HitCount%100 == 0 || v6HitCount%100 == 0 { - log.Printf("Not found count: %d, v4 hit count: %d, v6 hit count %d", notFoundCount, v4HitCount, v6HitCount) + // the error should be the same if there's any + oldErr = oldAnn.Annotate(address.String(), &oldResp) + newErr = newAnn.Annotate(address.String(), &newResp) + if oldErr != nil || newErr != nil { + notFoundCount++ + if notFoundCount%1000 == 0 { + log.Println("Not found:", address) + } + assert.EqualError(t, newErr, oldErr.Error()) + continue + } + // the content should be the same if there's any + assertSameGeoData(t, &oldResp, &newResp) + + v6HitCount++ + + if v6HitCount%2000 == 0 { + log.Printf("Not found count: %d, v6 hit count: %d", notFoundCount, v6HitCount) } } - log.Printf("annotator[%d] - Not found count: %d, v4 hit count: %d, v6 hit count %d", idx, notFoundCount, v4HitCount, v6HitCount) + log.Printf("annotator[%d] - Not found count: %d, v6 hit count: %d", idx, notFoundCount, v6HitCount) } } @@ -98,8 +131,7 @@ func TestCompareOldNewContent(t *testing.T) { log.Println("Skipping test that relies on mlab-testing bucket") return } - oldAnnotators := loadOld(t) - newAnnotators := loadNew(t) + oldAnnotators, newAnnotators := oldAndNew(t) // assert if we have the same number of annotators assert.Equal(t, len(oldAnnotators), len(newAnnotators)) @@ -185,6 +217,7 @@ func createSorterFor(forList []api.Annotator) func(int, int) bool { } // loadOld loads only data from march with the old loader +// TODO preload these only once func loadOld(t *testing.T) []api.Annotator { year, month, day := "2018", "03", "01" geoloader.UseSpecificGeolite2DateForTesting(&year, &month, &day) @@ -195,6 +228,7 @@ func loadOld(t *testing.T) []api.Annotator { } // loadNew loads only data from march with the new loader +// TODO preload these only once func loadNew(t *testing.T) []api.Annotator { year, month, day := "2018", "03", "01" geoloader.UseSpecificGeolite2DateForTesting(&year, &month, &day) @@ -203,3 +237,25 @@ func loadNew(t *testing.T) []api.Annotator { assert.Nil(t, err) return g2loader.Fetch() } + +func oldAndNew(t *testing.T) ([]api.Annotator, []api.Annotator) { + var oldAnnotators []api.Annotator + var newAnnotators []api.Annotator + wg := sync.WaitGroup{} + wg.Add(2) + go func() { + oldAnnotators = loadOld(t) + wg.Done() + }() + + go func() { + newAnnotators = loadNew(t) + wg.Done() + }() + + wg.Wait() + // sort the annotators to be able to compare their resuults + sort.Slice(oldAnnotators, createSorterFor(oldAnnotators)) + sort.Slice(newAnnotators, createSorterFor(newAnnotators)) + return oldAnnotators, newAnnotators +}