From 253f9121f058af21c1802a2e99071ca450e7c772 Mon Sep 17 00:00:00 2001 From: Greg Russell Date: Thu, 29 Nov 2018 16:33:35 -0500 Subject: [PATCH 1/3] correctly handle legacy request --- geolite2/geo-ip.go | 14 +++++---- geolite2/geo-ip_test.go | 5 +++ handler/handler.go | 68 +++++++++++++++++++++++++++++------------ handler/handler_test.go | 6 ++++ 4 files changed, 68 insertions(+), 25 deletions(-) diff --git a/geolite2/geo-ip.go b/geolite2/geo-ip.go index b1b42e88..5ec45a2a 100644 --- a/geolite2/geo-ip.go +++ b/geolite2/geo-ip.go @@ -11,6 +11,7 @@ import ( "regexp" "strconv" "strings" + "time" "github.com/m-lab/annotation-service/api" ) @@ -102,12 +103,7 @@ func convertIPNodeToGeoData(ipNode IPNode, locationNodes []LocationNode) *api.Ge } -// This just allows compiler to check that GeoDataset satisfies the Finder interface. -func assertAnnotator(f api.Annotator) { - func(api.Annotator) {}(&GeoDataset{}) -} - -// Find looks up the IP address and returns the corresponding GeoData +// GetAnnotation looks up the IP address and returns the corresponding GeoData // TODO - improve the format handling. Perhaps pass in a net.IP ? func (ds *GeoDataset) GetAnnotation(request *api.RequestData) (*api.GeoData, error) { var node IPNode @@ -126,6 +122,12 @@ func (ds *GeoDataset) GetAnnotation(request *api.RequestData) (*api.GeoData, err return convertIPNodeToGeoData(node, ds.LocationNodes), nil } +// StartDate returns the date that the dataset was published. +// TODO implement actual dataset time!! +func (ds *GeoDataset) StartDate() time.Time { + return time.Time{} +} + // Verify column length func checkNumColumns(record []string, size int) error { if len(record) != size { diff --git a/geolite2/geo-ip_test.go b/geolite2/geo-ip_test.go index edaa1298..b3bc93fd 100644 --- a/geolite2/geo-ip_test.go +++ b/geolite2/geo-ip_test.go @@ -19,6 +19,11 @@ import ( "google.golang.org/appengine/aetest" ) +// This just allows compiler to check that GeoDataset satisfies the Finder interface. +func assertAnnotator(f api.Annotator) { + func(api.Annotator) {}(&geolite2.GeoDataset{}) +} + // Returns nil if two IP Lists are equal func isEqualIPLists(listComp, list []geolite2.IPNode) error { for index, element := range list { diff --git a/handler/handler.go b/handler/handler.go index 0bc3a756..96b53841 100644 --- a/handler/handler.go +++ b/handler/handler.go @@ -106,11 +106,52 @@ func NewBatchResponse(size int) *BatchResponse { return &BatchResponse{"", time.Time{}, responseMap} } +// TODO move to annotatormanager package soon. +var ErrNoAnnotator = errors.New("no Annotator found") + +// TODO move to annotatormanager package soon. +func AnnotateLegacy(date time.Time, ips []api.RequestData) (map[string]*api.GeoData, time.Time, error) { + responseMap := make(map[string]*api.GeoData) + + // For now, use the date of the first item. In future the items will not have individual timestamps. + legacyAPI := date == time.Time{} + if legacyAPI { + // For legacyAPI, use the timestamp of the first IP to choose the annotator. + date = ips[0].Timestamp + } + ann := geolite2.GetAnnotator(date) + if ann == nil { + // stop sending more request in the same batch because w/ high chance the dataset is not ready + return nil, time.Time{}, ErrNoAnnotator + } + + for i := range ips { + request := ips[i] + metrics.Metrics_totalLookups.Inc() + annotation, err := ann.GetAnnotation(&request) + if err != nil { + // TODO need better error handling. + continue + } + // This requires that the caller should ignore the dateString. + // TODO - the unit tests do not catch this problem, so maybe it isn't a problem. + dateString := "" + if legacyAPI { + // When using the old API, encode the actual timestamp from the request. + dateString = strconv.FormatInt(request.Timestamp.Unix(), encodingBase) + } + responseMap[request.IP+dateString] = annotation + } + // TODO use annotator's actual start date. + return responseMap, time.Time{}, nil +} + // BatchAnnotate is a URL handler that expects the body of the request // to contain a JSON encoded slice of api.RequestDatas. It will // look up all the ip addresses and bundle them into a map of metadata // structs (with the keys being the ip concatenated with the base 36 // encoded timestamp) and send them back, again JSON encoded. +// TODO update this comment when we switch to new API. func BatchAnnotate(w http.ResponseWriter, r *http.Request) { // Setup timers and counters for prometheus metrics. timerStart := time.Now() @@ -125,34 +166,23 @@ func BatchAnnotate(w http.ResponseWriter, r *http.Request) { r.Body.Close() if err != nil { - fmt.Println(err) fmt.Fprintf(w, "Invalid Request!") return } - responseMap := make(map[string]*api.GeoData) + var responseMap map[string]*api.GeoData // For now, use the date of the first item. In future the items will not have individual timestamps. if len(dataSlice) > 0 { - date := dataSlice[0].Timestamp - ann := geolite2.GetAnnotator(date) - if ann == nil { - // stop sending more request in the same batch because w/ high chance the dataset is not ready - fmt.Fprintf(w, "Batch Request Error") + // For old request format, we use empty date. + date := time.Time{} + responseMap, _, err = AnnotateLegacy(date, dataSlice) + if err != nil { + fmt.Fprintf(w, err.Error()) return } - dateString := strconv.FormatInt(date.Unix(), encodingBase) - - for _, request := range dataSlice { - metrics.Metrics_totalLookups.Inc() - annotation, err := ann.GetAnnotation(&request) - if err != nil { - continue - } - // This requires that the caller should ignore the dateString. - // TODO - the unit tests do not catch this problem, so maybe it isn't a problem. - responseMap[request.IP+dateString] = annotation - } + } else { + responseMap = make(map[string]*api.GeoData) } encodedResult, err := json.Marshal(responseMap) diff --git a/handler/handler_test.go b/handler/handler_test.go index 58ab51da..a76b0dcf 100644 --- a/handler/handler_test.go +++ b/handler/handler_test.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "io" + "log" "net" "net/http" "net/http/httptest" @@ -18,6 +19,11 @@ import ( "github.com/m-lab/annotation-service/handler" ) +func init() { + // Always prepend the filename and line number. + log.SetFlags(log.LstdFlags | log.Lshortfile) +} + func TestAnnotate(t *testing.T) { tests := []struct { ip string From b0efc0755813a69dcb0b390bf09eca2fcf53b09f Mon Sep 17 00:00:00 2001 From: Greg Russell Date: Thu, 29 Nov 2018 17:54:09 -0500 Subject: [PATCH 2/3] use different times in batch test --- handler/handler_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/handler/handler_test.go b/handler/handler_test.go index a76b0dcf..4d1aa27d 100644 --- a/handler/handler_test.go +++ b/handler/handler_test.go @@ -184,10 +184,10 @@ func TestBatchAnnotate(t *testing.T) { }, { body: `[{"ip": "127.0.0.1", "timestamp": "2017-08-25T13:31:12.149678161-04:00"}, - {"ip": "2620:0:1003:1008:5179:57e3:3c75:1886", "timestamp": "2017-08-25T13:31:12.149678161-04:00"}]`, - res: `{"127.0.0.1ov94o0":{"Geo":{"region":"ME","city":"Not A Real City","postal_code":"10583"},"ASN":{}},"2620:0:1003:1008:5179:57e3:3c75:1886ov94o0":{"Geo":{"region":"ME","city":"Not A Real City","postal_code":"10583"},"ASN":{}}}`, + {"ip": "2620:0:1003:1008:5179:57e3:3c75:1886", "timestamp": "2017-08-25T14:32:13.149678161-04:00"}]`, + res: `{"127.0.0.1ov94o0":{"Geo":{"region":"ME","city":"Not A Real City","postal_code":"10583"},"ASN":{}},"2620:0:1003:1008:5179:57e3:3c75:1886ov97hp":{"Geo":{"region":"ME","city":"Not A Real City","postal_code":"10583"},"ASN":{}}}`, // TODO - remove alt after updating json annotations to omitempty. - alt: `{"127.0.0.1ov94o0":{"Geo":{"region":"ME","city":"Not A Real City","postal_code":"10583","latitude":0,"longitude":0},"ASN":{}},"2620:0:1003:1008:5179:57e3:3c75:1886ov94o0":{"Geo":{"region":"ME","city":"Not A Real City","postal_code":"10583","latitude":0,"longitude":0},"ASN":{}}}`, + alt: `{"127.0.0.1ov94o0":{"Geo":{"region":"ME","city":"Not A Real City","postal_code":"10583","latitude":0,"longitude":0},"ASN":{}},"2620:0:1003:1008:5179:57e3:3c75:1886ov97hp":{"Geo":{"region":"ME","city":"Not A Real City","postal_code":"10583","latitude":0,"longitude":0},"ASN":{}}}`, }, } // TODO - make a test utility in geolite2 package. From c97cd73a086aaef7873112e475c249bdd6994f5e Mon Sep 17 00:00:00 2001 From: Gregory Russell Date: Sun, 2 Dec 2018 21:12:03 -0500 Subject: [PATCH 3/3] Add annotator-ss.yaml --- .travis.yml | 7 +++++-- annotator-ss.yaml | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 annotator-ss.yaml diff --git a/.travis.yml b/.travis.yml index 3163bc27..a423344d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -91,8 +91,11 @@ deploy: # SANDBOX: Allows any branch with sandbox-*, to trigger deploying that # branch to sandbox for pre-review testing. - provider: script - script: $TRAVIS_BUILD_DIR/travis/deploy_app.sh mlab-sandbox /tmp/mlab-sandbox.json - $TRAVIS_BUILD_DIR annotator.yaml + script: + - $TRAVIS_BUILD_DIR/travis/deploy_app.sh mlab-sandbox /tmp/mlab-sandbox.json + $TRAVIS_BUILD_DIR annotator.yaml + - $TRAVIS_BUILD_DIR/travis/deploy_app.sh mlab-sandbox /tmp/mlab-sandbox.json + $TRAVIS_BUILD_DIR annotator-ss.yaml skip_cleanup: true on: repo: m-lab/annotation-service diff --git a/annotator-ss.yaml b/annotator-ss.yaml new file mode 100644 index 00000000..13d035df --- /dev/null +++ b/annotator-ss.yaml @@ -0,0 +1,38 @@ +runtime: custom +env: flex +service: annotatorss + +# Resource and scaling options. For more background, see: +# https://cloud.google.com/appengine/docs/flexible/go/configuring-your-app-with-app-yaml + +# TODO(dev): adjust CPU and memory based on actual requirements. +resources: + cpu: 2 + # Instances support between [(cpu * 0.9) - 0.4, (cpu * 6.5) - 0.4] + # Actual memory available is exposed via GAE_MEMORY_MB environment variable. + memory_gb: 6 + + # TODO - Do we need any disk? Adjust once we understand requirements. + disk_size_gb: 10 + +automatic_scaling: + # We expect negligible load, so this is unlikely to trigger. + min_num_instances: 2 + max_num_instances: 20 + cool_down_period_sec: 1800 + cpu_utilization: + target_utilization: 0.60 + +# Note: add a public port for GCE auto discovery by prometheus. +# TODO(dev): are any values redundant or irrelevant? +network: + instance_tag: annotator + name: default + # Forward port 9090 on the GCE instance address to the same port in the + # container address. Only forward TCP traffic. + # Note: the default AppEngine container port 8080 cannot be forwarded. + forwarded_ports: + - 9090/tcp + +env_variables: + # TODO add custom service-account, instead of using default credentials.