Skip to content

Commit

Permalink
Merge pull request #111 from m-lab/interface
Browse files Browse the repository at this point in the history
Annotator Interface
  • Loading branch information
gfr10598 committed Nov 29, 2018
2 parents a87a98d + 0be4929 commit 100af63
Show file tree
Hide file tree
Showing 12 changed files with 238 additions and 157 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "travis"]
path = travis
url = https://github.com/m-lab/travis.git
[submodule "git-hooks"]
path = git-hooks
url = https://github.com/m-lab/git-hooks
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ script:
- go test -bench . ./search/...

# Run all tests, collecting coverage data.
- go test -covermode=count -coverprofile=api.cov -v github.com/m-lab/annotation-service/api
- go test -covermode=count -coverprofile=geoip.cov -v github.com/m-lab/annotation-service/geoip
- go test -covermode=count -coverprofile=handler.cov -v github.com/m-lab/annotation-service/handler
- go test -covermode=count -coverprofile=loader.cov -v github.com/m-lab/annotation-service/loader
Expand All @@ -74,7 +75,7 @@ script:
- go test -covermode=count -coverprofile=search.cov -v github.com/m-lab/annotation-service/search

# Coveralls
- $HOME/gopath/bin/gocovmerge handler.cov geoip.cov loader.cov parser.cov search.cov > merge.cov
- $HOME/gopath/bin/gocovmerge api.cov handler.cov geoip.cov loader.cov parser.cov search.cov > merge.cov
- $HOME/gopath/bin/goveralls -coverprofile=merge.cov -service=travis-ci

# Build and prepare for deployment
Expand Down
34 changes: 33 additions & 1 deletion common/geo.go → api/api.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package common
// Package api contains interfaces and low level structs required across
// multiple packages or repositories.
package api

import (
"errors"
"os"
"regexp"
"time"
Expand All @@ -21,6 +24,7 @@ const (
// geolocation data that will be inserted into big query. The fiels are
// capitalized for exporting, although the originals in the DB schema
// are not.
// TODO update these to proper camelCase.
type GeolocationIP struct {
Continent_code string `json:"continent_code,,omitempty"` // Gives a shorthand for the continent
Country_code string `json:"country_code,,omitempty"` // Gives a shorthand for the country
Expand Down Expand Up @@ -56,3 +60,31 @@ type RequestData struct {
IPFormat int // Holds the ip format, 4 or 6
Timestamp time.Time // Holds the timestamp from an incoming request
}

// Annotator provides the GetAnnotation method, which retrieves the annotation for a given IP address.
type Annotator interface {
// TODO use net.IP, and drop the bool
// TODO return struct instead of pointer.
GetAnnotation(request *RequestData) (*GeoData, error)
// These return the date range covered by the annotator.
// TODO GetStartDate() time.Time
// TODO GetEndDate() time.Time
}

// AnnotationLoader provides the Load function, which loads an annotator.
// TODO - do we really need this, or should we just have a single maxmind.Load function.
type AnnotationLoader interface {
Load(date time.Time) (Annotator, error)
}

// ExtractDateFromFilename return the date for a filename like
// gs://downloader-mlab-oti/Maxmind/2017/05/08/20170508T080000Z-GeoLiteCity.dat.gz
// TODO move this to maxmind package
func ExtractDateFromFilename(filename string) (time.Time, error) {
re := regexp.MustCompile(`[0-9]{8}T`)
filedate := re.FindAllString(filename, -1)
if len(filedate) != 1 {
return time.Time{}, errors.New("cannot extract date from input filename")
}
return time.Parse(time.RFC3339, filedate[0][0:4]+"-"+filedate[0][4:6]+"-"+filedate[0][6:8]+"T00:00:00Z")
}
19 changes: 19 additions & 0 deletions api/api_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package api_test

import (
"testing"

"github.com/m-lab/annotation-service/api"
)

func TestExtractDateFromFilename(t *testing.T) {
date, err := api.ExtractDateFromFilename("Maxmind/2017/05/08/20170508T080000Z-GeoLiteCity.dat.gz")
if date.Year() != 2017 || date.Month() != 5 || date.Day() != 8 || err != nil {
t.Errorf("Did not extract data correctly. Expected %d, got %v, %+v.", 20170508, date, err)
}

date2, err := api.ExtractDateFromFilename("Maxmind/2017/10/05/20171005T033334Z-GeoLite2-City-CSV.zip")
if date2.Year() != 2017 || date2.Month() != 10 || date2.Day() != 5 || err != nil {
t.Errorf("Did not extract data correctly. Expected %d, got %v, %+v.", 20171005, date2, err)
}
}
1 change: 1 addition & 0 deletions git-hooks
Submodule git-hooks added at 766182
69 changes: 5 additions & 64 deletions handler/geolite2-dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ package handler

import (
"context"
"errors"
"log"

"github.com/m-lab/annotation-service/common"
"github.com/m-lab/annotation-service/api"
"github.com/m-lab/annotation-service/loader"
"github.com/m-lab/annotation-service/parser"
"github.com/m-lab/annotation-service/search"

"google.golang.org/api/iterator"

Expand All @@ -26,7 +24,7 @@ func PopulateLatestData() {
log.Fatal(err)
}
currentDataMutex.Lock()
CurrentGeoDataset = data
CurrentAnnotator = data
currentDataMutex.Unlock()
}

Expand All @@ -39,13 +37,13 @@ func determineFilenameOfLatestGeolite2File() (string, error) {
if err != nil {
return "", err
}
prospectiveFiles := client.Bucket(common.MaxmindBucketName).Objects(ctx, &storage.Query{Prefix: common.MaxmindPrefix})
prospectiveFiles := client.Bucket(api.MaxmindBucketName).Objects(ctx, &storage.Query{Prefix: api.MaxmindPrefix})
filename := ""
for file, err := prospectiveFiles.Next(); err != iterator.Done; file, err = prospectiveFiles.Next() {
if err != nil {
return "", err
}
if file.Name > filename && common.GeoLite2Regex.MatchString(file.Name) {
if file.Name > filename && api.GeoLite2Regex.MatchString(file.Name) {
filename = file.Name
}

Expand All @@ -70,62 +68,5 @@ func LoadLatestGeolite2File() (*parser.GeoDataset, error) {
if err != nil {
return nil, err
}
return LoadGeoLite2Dataset(filename, common.MaxmindBucketName)
}

// ConvertIPNodeToGeoData takes a parser.IPNode, plus a list of
// locationNodes. It will then use that data to fill in a GeoData
// struct and return its pointer.
func ConvertIPNodeToGeoData(ipNode parser.IPNode, locationNodes []parser.LocationNode) *common.GeoData {
locNode := parser.LocationNode{}
if ipNode.LocationIndex >= 0 {
locNode = locationNodes[ipNode.LocationIndex]
}
return &common.GeoData{
Geo: &common.GeolocationIP{
Continent_code: locNode.ContinentCode,
Country_code: locNode.CountryCode,
Country_code3: "", // missing from geoLite2 ?
Country_name: locNode.CountryName,
Region: locNode.RegionCode,
Metro_code: locNode.MetroCode,
City: locNode.CityName,
Area_code: 0, // new geoLite2 does not have area code.
Postal_code: ipNode.PostalCode,
Latitude: ipNode.Latitude,
Longitude: ipNode.Longitude,
},
ASN: &common.IPASNData{},
}

}

// UseGeoLite2Dataset return annotation for a request from a given Geolite2 dataset.
func UseGeoLite2Dataset(request *common.RequestData, dataset *parser.GeoDataset) (*common.GeoData, error) {
if dataset == nil {
// TODO: Block until the value is not nil
return nil, errors.New("Dataset is not ready")
}

err := errors.New("unknown IP format")
var node parser.IPNode
// TODO: Push this logic down to searchlist (after binary search is implemented)
if request.IPFormat == 4 {
node, err = search.SearchBinary(
dataset.IP4Nodes, request.IP)
} else if request.IPFormat == 6 {
node, err = search.SearchBinary(
dataset.IP6Nodes, request.IP)
}

if err != nil {
// ErrNodeNotFound is super spammy - 10% of requests, so suppress those.
if err != search.ErrNodeNotFound {
log.Println(err, request.IP)
}
//TODO metric here
return nil, err
}

return ConvertIPNodeToGeoData(node, dataset.LocationNodes), nil
return LoadGeoLite2Dataset(filename, api.MaxmindBucketName)
}
70 changes: 36 additions & 34 deletions handler/handler.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// Package handler provides functions for handling incoming requests.
// It should only include top level code for parsing the request and assembling
// the response.
package handler

import (
Expand All @@ -8,24 +11,26 @@ import (
"io/ioutil"
"net"
"net/http"
"regexp"
"strconv"
"sync"
"time"

"github.com/m-lab/annotation-service/common"
"github.com/m-lab/annotation-service/api"
"github.com/m-lab/annotation-service/metrics"
"github.com/m-lab/annotation-service/parser"
)

var (
// A mutex to make sure that we are not reading from the dataset
// ErrNilDataset is returned when CurrentAnnotator is nil.
ErrNilDataset = errors.New("CurrentAnnotator is nil")

// A mutex to make sure that we are not reading from the CurrentAnnotator
// pointer while trying to update it
currentDataMutex = &sync.RWMutex{}

// This is a pointer to a GeoDataset struct containing the absolute
// CurrentAnnotator points to a GeoDataset struct containing the absolute
// latest data for the annotator to search and reply with
CurrentGeoDataset *parser.GeoDataset = nil
// TODO: This will be moved elsewhere in next PR.
CurrentAnnotator api.Annotator
)

const (
Expand All @@ -34,7 +39,7 @@ const (
encodingBase = 36
)

// A function to set up any handlers that are needed, including url
// SetupHandlers sets up any handlers that are needed, including url
// handlers and pubsub handlers
func SetupHandlers() {
http.HandleFunc("/annotate", Annotate)
Expand Down Expand Up @@ -77,7 +82,7 @@ func Annotate(w http.ResponseWriter, r *http.Request) {
// ValidateAndParse takes a request and validates the URL parameters,
// verifying that it has a valid ip address and time. Then, it uses
// that to construct a RequestData struct and returns the pointer.
func ValidateAndParse(r *http.Request) (*common.RequestData, error) {
func ValidateAndParse(r *http.Request) (*api.RequestData, error) {
query := r.URL.Query()

time_milli, err := strconv.ParseInt(query.Get("since_epoch"), 10, 64)
Expand All @@ -92,29 +97,29 @@ func ValidateAndParse(r *http.Request) (*common.RequestData, error) {
return nil, errors.New("invalid IP address")
}
if newIP.To4() != nil {
return &common.RequestData{ip, 4, time.Unix(time_milli, 0)}, nil
return &api.RequestData{ip, 4, time.Unix(time_milli, 0)}, nil
}
return &common.RequestData{ip, 6, time.Unix(time_milli, 0)}, nil
return &api.RequestData{ip, 6, time.Unix(time_milli, 0)}, nil
}

// BatchResponse is the response type for batch requests. It is converted to
// json for HTTP requests.
type BatchResponse struct {
Version string
Date time.Time
Results map[string]*common.GeoData
Results map[string]*api.GeoData
}

// NewBatchResponse returns a new response struct.
// Caller must properly initialize the version and date strings.
// TODO - pass in the data source and use to populate the version/date.
func NewBatchResponse(size int) *BatchResponse {
responseMap := make(map[string]*common.GeoData, size)
responseMap := make(map[string]*api.GeoData, size)
return &BatchResponse{"", time.Time{}, responseMap}
}

// BatchAnnotate is a URL handler that expects the body of the request
// to contain a JSON encoded slice of common.RequestDatas. It will
// to contain a JSON encoded slice of api.RequestDatas. It will
// look up all the ip addresses and bundle them into a map of metadata
// structs (with the keys being the ip concatenated with the base 36
// encoded timestamp) and send them back, again JSON encoded.
Expand All @@ -137,7 +142,7 @@ func BatchAnnotate(w http.ResponseWriter, r *http.Request) {
return
}

responseMap := make(map[string]*common.GeoData)
responseMap := make(map[string]*api.GeoData)
for _, data := range dataSlice {
responseMap[data.IP+strconv.FormatInt(data.Timestamp.Unix(), encodingBase)], err = GetMetadataForSingleIP(&data)
if err != nil {
Expand All @@ -146,6 +151,7 @@ func BatchAnnotate(w http.ResponseWriter, r *http.Request) {
return
}
}

encodedResult, err := json.Marshal(responseMap)
if err != nil {
fmt.Fprintf(w, "Unknown JSON Encoding Error")
Expand All @@ -156,52 +162,48 @@ func BatchAnnotate(w http.ResponseWriter, r *http.Request) {

// BatchValidateAndParse will take a reader (likely the body of a
// request) containing the JSON encoded array of
// common.RequestDatas. It will then validate that json and use it to
// construct a slice of common.RequestDatas, which it will return. If
// api.RequestDatas. It will then validate that json and use it to
// construct a slice of api.RequestDatas, which it will return. If
// it encounters an error, then it will return nil and that error.
func BatchValidateAndParse(source io.Reader) ([]common.RequestData, error) {
func BatchValidateAndParse(source io.Reader) ([]api.RequestData, error) {
jsonBuffer, err := ioutil.ReadAll(source)
if err != nil {
return nil, err
}
uncheckedData := []common.RequestData{}
uncheckedData := []api.RequestData{}

err = json.Unmarshal(jsonBuffer, &uncheckedData)
if err != nil {
return nil, err
}
validatedData := make([]common.RequestData, 0, len(uncheckedData))
validatedData := make([]api.RequestData, 0, len(uncheckedData))
for _, data := range uncheckedData {
newIP := net.ParseIP(data.IP)
if newIP == nil {
// TODO - shouldn't bail out because of a single error.
return nil, errors.New("invalid IP address")
}
ipType := 6
if newIP.To4() != nil {
ipType = 4
}
validatedData = append(validatedData, common.RequestData{data.IP, ipType, data.Timestamp})
validatedData = append(validatedData, api.RequestData{data.IP, ipType, data.Timestamp})
}
return validatedData, nil
}

// GetMetadataForSingleIP takes a pointer to a common.RequestData
// GetMetadataForSingleIP takes a pointer to a api.RequestData
// struct and will use it to fetch the appropriate associated
// metadata, returning a pointer. It is gaurenteed to return a non-nil
// pointer, even if it cannot find the appropriate metadata.
func GetMetadataForSingleIP(request *common.RequestData) (*common.GeoData, error) {
func GetMetadataForSingleIP(request *api.RequestData) (*api.GeoData, error) {
metrics.Metrics_totalLookups.Inc()

return UseGeoLite2Dataset(request, CurrentGeoDataset)
}

// ExtractDateFromFilename return the date for a filename like
// gs://downloader-mlab-oti/Maxmind/2017/05/08/20170508T080000Z-GeoLiteCity.dat.gz
func ExtractDateFromFilename(filename string) (time.Time, error) {
re := regexp.MustCompile(`[0-9]{8}T`)
filedate := re.FindAllString(filename, -1)
if len(filedate) != 1 {
return time.Time{}, errors.New("cannot extract date from input filename")
currentDataMutex.RLock()
ann := CurrentAnnotator
currentDataMutex.RUnlock()
if ann == nil {
return nil, ErrNilDataset
}
return time.Parse(time.RFC3339, filedate[0][0:4]+"-"+filedate[0][4:6]+"-"+filedate[0][6:8]+"T00:00:00Z")

return ann.GetAnnotation(request)
}
Loading

0 comments on commit 100af63

Please sign in to comment.