diff --git a/docs/api.md b/docs/api.md index 2aef363..3937d3d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -135,4 +135,73 @@ a HTTP DELETE request to the url `/v1/flush`. Like this: https://autocompeter.com/v1/flush This will reset the counts all related to your domain. The only thing that -isn't removed is your auth key. +isn't removed is your auth key. + +## Bulk upload + +Instead of submitting one "document" at a time you can instead send in a +whole big JSON blob. The struct needs to be like this example: + + { + "documents": [ + { + "url": "http://example.com/page1", + "title": "Page One" + }, + { + "url": "http://example.com/page2", + "title": "Other page", + "popularity": 123 + }, + { + "url": "http://example.com/page3", + "title": "Last page", + "group": "admins" + }, + ] + } + +Note that the `popularity` and the `group` keys are optional. Each +dictionary in the array called `documents` needs to have a `url` and `title`. + +The endpoint to use `https://autocompeter.com/v1/bulk` and you need to do a +HTTP POST or a HTTP PUT. + +Here's an example using curl: + + url -X POST -H "Auth-Key: 3b14d7c280bf525b779d0a01c601fe44" \ + -d '{"documents": [{"url":"/url", "title":"My Title", "popularity":1001}]}' \ + https://autocompeter.com/v1/bulk + +And here's an example using +Python [requests](http://requests.readthedocs.org/en/latest/): + + +```python +import json +import requests + +documents = [ + { + 'url': '/some/page', + 'title': 'Some title', + 'popularity': 10 + }, + { + 'url': '/other/page', + 'title': 'Other title', + }, + { + 'url': '/private/page', + 'title': 'Other private page', + 'group': 'private' + }, +] +print requests.post( + 'https://autocompeter.com/v1/bulk', + data=json.dumps({'documents': documents}), + headers={ + 'Auth-Key': '3b14d7c280bf525b779d0a01c601fe44', + } +) +``` diff --git a/sampleloader/populate.py b/sampleloader/populate.py index 0a27edb..c0e5b14 100755 --- a/sampleloader/populate.py +++ b/sampleloader/populate.py @@ -3,6 +3,7 @@ import os import json import hashlib +import time import click import requests @@ -28,7 +29,7 @@ def get_events(): yield (item['title'], item['url'], item['popularity'], group) -def populate(database, destination, domain, flush=False): +def populate(database, destination, domain, flush=False, bulk=False): c = redis.StrictRedis(host='localhost', port=6379, db=database) if flush: c.flushdb() @@ -38,6 +39,16 @@ def populate(database, destination, domain, flush=False): #items = get_blogposts() items = get_events() + t0 = time.time() + if bulk: + _in_bulk(destination, items) + else: + _one_at_a_time(destination, items) + t1 = time.time() + print "TOOK", t1 - t0 + + +def _one_at_a_time(destination, items): for title, url, popularity, group in items: _url = destination + '/v1' data = { @@ -56,14 +67,42 @@ def populate(database, destination, domain, flush=False): assert r.status_code == 201, r.status_code +def _in_bulk(destination, items): + data = { + 'documents': [ + dict( + title=t, + url=u, + popularity=p, + group=g + ) + for t, u, p, g in items + ] + } + _url = destination + '/v1/bulk' + r = requests.post( + _url, + data=json.dumps(data), + headers={'Auth-Key': key} + ) + assert r.status_code == 201, r.status_code + + @click.command() @click.option('--database', '-d', default=8) @click.option('--destination', default='http://autocompeter.com') @click.option('--domain', default='autocompeter.com') @click.option('--flush', default=False, is_flag=True) -def run(database, destination, domain, flush=False): +@click.option('--bulk', default=False, is_flag=True) +def run(database, destination, domain, flush=False, bulk=False): #print (database, domain, flush) - populate(database, destination, domain, flush=flush) + populate( + database, + destination, + domain, + flush=flush, + bulk=bulk, + ) if __name__ == '__main__': run() diff --git a/server.go b/server.go index d3bb23e..a0d74bd 100644 --- a/server.go +++ b/server.go @@ -3,6 +3,7 @@ package main import ( "crypto/md5" "encoding/base64" + "encoding/json" "fmt" "github.com/codegangsta/negroni" "github.com/fiam/gounidecode/unidecode" @@ -132,7 +133,7 @@ func updateHandler(w http.ResponseWriter, req *http.Request) { } form.Title = strings.Trim(form.Title, " ") form.URL = strings.Trim(form.URL, " ") - group := form.Group + // group := form.Group c, err := redisPool.Get() errHndlr(err) @@ -145,30 +146,44 @@ func updateHandler(w http.ResponseWriter, req *http.Request) { return } + insertDocument( + domain, + form.Title, + form.URL, + form.Group, + form.Popularity, + c, + ) + + output := map[string]string{"message": "OK"} + renderer.JSON(w, http.StatusCreated, output) +} + +func insertDocument(domain, title, url, group string, popularity float64, c *redis.Client) { encoded := encodeString(domain) - encodedURL := encodeString(form.URL) + encodedURL := encodeString(url) - title, _ := c.Cmd("HGET", encoded+"$titles", encodedURL).Str() - if title == "" { - err = c.Cmd("HINCRBY", "$domaindocuments", domain, 1).Err + existingTitle, _ := c.Cmd("HGET", encoded+"$titles", encodedURL).Str() + if existingTitle == "" { + err := c.Cmd("HINCRBY", "$domaindocuments", domain, 1).Err errHndlr(err) } pipedCommands := 0 - for _, prefix := range getPrefixes(form.Title) { + for _, prefix := range getPrefixes(title) { if group != "" { encodedGroup := encodeString(group) - c.Append("ZADD", encoded+encodedGroup+prefix, form.Popularity, encodedURL) + c.Append("ZADD", encoded+encodedGroup+prefix, popularity, encodedURL) c.Append("HSET", encoded+"$groups", encodedURL, encodedGroup) pipedCommands++ } else { - c.Append("ZADD", encoded+prefix, form.Popularity, encodedURL) + c.Append("ZADD", encoded+prefix, popularity, encodedURL) } pipedCommands++ } - c.Append("HSET", encoded+"$titles", encodedURL, form.Title) + c.Append("HSET", encoded+"$titles", encodedURL, title) pipedCommands++ - c.Append("HSET", encoded+"$urls", encodedURL, form.URL) + c.Append("HSET", encoded+"$urls", encodedURL, url) pipedCommands++ for i := 1; i <= pipedCommands; i++ { if err := c.GetReply().Err; err != nil { @@ -176,6 +191,54 @@ func updateHandler(w http.ResponseWriter, req *http.Request) { } } +} + +type bulkDocuments struct { + Documents []bulkDocument `json:"documents"` +} + +type bulkDocument struct { + URL string `json:"url"` + Title string `json:"title"` + Popularity float64 `json:"popularity"` + Group string `json:"group"` +} + +func bulkHandler(w http.ResponseWriter, req *http.Request) { + key := req.Header.Get("AUTH-KEY") + if key == "" { + output := map[string]string{"error": "Auth-Key header not set"} + renderer.JSON(w, http.StatusForbidden, output) + return + } + c, err := redisPool.Get() + errHndlr(err) + defer redisPool.Put(c) + + domain, err := GetDomain(key, c) + if domain == "" { + output := map[string]string{"error": "Auth-Key not recognized"} + renderer.JSON(w, http.StatusForbidden, output) + return + } + + // encoded := encodeString(domain) + + decoder := json.NewDecoder(req.Body) + var bs bulkDocuments + err = decoder.Decode(&bs) + errHndlr(err) + for _, b := range bs.Documents { + // fmt.Println(b.URL, b.Title, b.Popularity, b.Group) + insertDocument( + domain, + b.Title, + b.URL, + b.Group, + b.Popularity, + c, + ) + } output := map[string]string{"message": "OK"} renderer.JSON(w, http.StatusCreated, output) } @@ -674,6 +737,7 @@ func main() { mux.HandleFunc("/v1", deleteHandler).Methods("DELETE") mux.HandleFunc("/v1/stats", privateStatsHandler).Methods("GET") mux.HandleFunc("/v1/flush", flushHandler).Methods("DELETE") + mux.HandleFunc("/v1/bulk", bulkHandler).Methods("POST", "PUT") n := negroni.Classic() diff --git a/templates/index.tmpl b/templates/index.tmpl index 9df9577..4170f1e 100644 --- a/templates/index.tmpl +++ b/templates/index.tmpl @@ -196,6 +196,7 @@ Autocompeter(
  • You determine the sort order by setting a "popularity" number on each title
  • Can find "Pär is naïve" by typing in "par naive
  • You can submit use "groups" to differentiate content only certain users should see
  • +
  • You can bulk upload as well as posting one at a time
  • @@ -333,7 +334,7 @@ Autocompeter( ga('send', 'pageview'); {{end}} - + diff --git a/tests.py b/tests.py index dc0b364..ac8c840 100644 --- a/tests.py +++ b/tests.py @@ -6,6 +6,7 @@ """ import datetime +import json import unittest from nose.tools import ok_, eq_ @@ -559,3 +560,41 @@ def test_get_stats(self): eq_(r.status_code, 200) stats = r.json() eq_(stats['documents'], 1) + + def test_bulk_upload(self): + documents = [ + { + 'url': '/some/page', + 'title': 'Some title', + 'popularity': 10 + }, + { + 'url': '/other/page', + 'title': 'Other title', + }, + { + 'url': '/private/page', + 'title': 'Other private page', + 'group': 'private' + }, + ] + r = self.post( + '/v1/bulk', + data=json.dumps({'documents': documents}), + headers={ + 'Auth-Key': 'xyz123', + # 'content-type': 'application/json' + } + ) + eq_(r.status_code, 201) + r = self.get('/v1/stats', headers={'Auth-Key': 'xyz123'}) + eq_(r.status_code, 200) + stats = r.json() + eq_(stats['documents'], 3) + + r = self.get('/v1?q=titl&d=peterbecom') + eq_(len(r.json()['results']), 2) + urls = [x[0] for x in r.json()['results']] + eq_(urls, ['/some/page', '/other/page']) + r = self.get('/v1?q=other&d=peterbecom&g=private') + eq_(len(r.json()['results']), 2)