Skip to content
This repository has been archived by the owner on Apr 20, 2019. It is now read-only.

Commit

Permalink
bulk
Browse files Browse the repository at this point in the history
  • Loading branch information
peterbe committed Feb 22, 2015
1 parent f1b5dd6 commit 50c90bc
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 15 deletions.
71 changes: 70 additions & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,73 @@ a HTTP DELETE request to the url `/v1/flush`. Like this:
https://autocompeter.com/v1/flush

This will reset the counts all related to your domain. The only thing that
isn't removed is your auth key.
isn't removed is your auth key.

## Bulk upload

Instead of submitting one "document" at a time you can instead send in a
whole big JSON blob. The struct needs to be like this example:

{
"documents": [
{
"url": "http://example.com/page1",
"title": "Page One"
},
{
"url": "http://example.com/page2",
"title": "Other page",
"popularity": 123
},
{
"url": "http://example.com/page3",
"title": "Last page",
"group": "admins"
},
]
}

Note that the `popularity` and the `group` keys are optional. Each
dictionary in the array called `documents` needs to have a `url` and `title`.

The endpoint to use `https://autocompeter.com/v1/bulk` and you need to do a
HTTP POST or a HTTP PUT.

Here's an example using curl:

url -X POST -H "Auth-Key: 3b14d7c280bf525b779d0a01c601fe44" \
-d '{"documents": [{"url":"/url", "title":"My Title", "popularity":1001}]}' \
https://autocompeter.com/v1/bulk

And here's an example using
Python [requests](http://requests.readthedocs.org/en/latest/):


```python
import json
import requests

documents = [
{
'url': '/some/page',
'title': 'Some title',
'popularity': 10
},
{
'url': '/other/page',
'title': 'Other title',
},
{
'url': '/private/page',
'title': 'Other private page',
'group': 'private'
},
]
print requests.post(
'https://autocompeter.com/v1/bulk',
data=json.dumps({'documents': documents}),
headers={
'Auth-Key': '3b14d7c280bf525b779d0a01c601fe44',
}
)
```
45 changes: 42 additions & 3 deletions sampleloader/populate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import json
import hashlib
import time

import click
import requests
Expand All @@ -28,7 +29,7 @@ def get_events():
yield (item['title'], item['url'], item['popularity'], group)


def populate(database, destination, domain, flush=False):
def populate(database, destination, domain, flush=False, bulk=False):
c = redis.StrictRedis(host='localhost', port=6379, db=database)
if flush:
c.flushdb()
Expand All @@ -38,6 +39,16 @@ def populate(database, destination, domain, flush=False):

#items = get_blogposts()
items = get_events()
t0 = time.time()
if bulk:
_in_bulk(destination, items)
else:
_one_at_a_time(destination, items)
t1 = time.time()
print "TOOK", t1 - t0


def _one_at_a_time(destination, items):
for title, url, popularity, group in items:
_url = destination + '/v1'
data = {
Expand All @@ -56,14 +67,42 @@ def populate(database, destination, domain, flush=False):
assert r.status_code == 201, r.status_code


def _in_bulk(destination, items):
data = {
'documents': [
dict(
title=t,
url=u,
popularity=p,
group=g
)
for t, u, p, g in items
]
}
_url = destination + '/v1/bulk'
r = requests.post(
_url,
data=json.dumps(data),
headers={'Auth-Key': key}
)
assert r.status_code == 201, r.status_code


@click.command()
@click.option('--database', '-d', default=8)
@click.option('--destination', default='http://autocompeter.com')
@click.option('--domain', default='autocompeter.com')
@click.option('--flush', default=False, is_flag=True)
def run(database, destination, domain, flush=False):
@click.option('--bulk', default=False, is_flag=True)
def run(database, destination, domain, flush=False, bulk=False):
#print (database, domain, flush)
populate(database, destination, domain, flush=flush)
populate(
database,
destination,
domain,
flush=flush,
bulk=bulk,
)

if __name__ == '__main__':
run()
84 changes: 74 additions & 10 deletions server.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"crypto/md5"
"encoding/base64"
"encoding/json"
"fmt"
"github.com/codegangsta/negroni"
"github.com/fiam/gounidecode/unidecode"
Expand Down Expand Up @@ -132,7 +133,7 @@ func updateHandler(w http.ResponseWriter, req *http.Request) {
}
form.Title = strings.Trim(form.Title, " ")
form.URL = strings.Trim(form.URL, " ")
group := form.Group
// group := form.Group

c, err := redisPool.Get()
errHndlr(err)
Expand All @@ -145,37 +146,99 @@ func updateHandler(w http.ResponseWriter, req *http.Request) {
return
}

insertDocument(
domain,
form.Title,
form.URL,
form.Group,
form.Popularity,
c,
)

output := map[string]string{"message": "OK"}
renderer.JSON(w, http.StatusCreated, output)
}

func insertDocument(domain, title, url, group string, popularity float64, c *redis.Client) {
encoded := encodeString(domain)
encodedURL := encodeString(form.URL)
encodedURL := encodeString(url)

title, _ := c.Cmd("HGET", encoded+"$titles", encodedURL).Str()
if title == "" {
err = c.Cmd("HINCRBY", "$domaindocuments", domain, 1).Err
existingTitle, _ := c.Cmd("HGET", encoded+"$titles", encodedURL).Str()
if existingTitle == "" {
err := c.Cmd("HINCRBY", "$domaindocuments", domain, 1).Err
errHndlr(err)
}

pipedCommands := 0
for _, prefix := range getPrefixes(form.Title) {
for _, prefix := range getPrefixes(title) {
if group != "" {
encodedGroup := encodeString(group)
c.Append("ZADD", encoded+encodedGroup+prefix, form.Popularity, encodedURL)
c.Append("ZADD", encoded+encodedGroup+prefix, popularity, encodedURL)
c.Append("HSET", encoded+"$groups", encodedURL, encodedGroup)
pipedCommands++
} else {
c.Append("ZADD", encoded+prefix, form.Popularity, encodedURL)
c.Append("ZADD", encoded+prefix, popularity, encodedURL)
}
pipedCommands++
}
c.Append("HSET", encoded+"$titles", encodedURL, form.Title)
c.Append("HSET", encoded+"$titles", encodedURL, title)
pipedCommands++
c.Append("HSET", encoded+"$urls", encodedURL, form.URL)
c.Append("HSET", encoded+"$urls", encodedURL, url)
pipedCommands++
for i := 1; i <= pipedCommands; i++ {
if err := c.GetReply().Err; err != nil {
errHndlr(err)
}
}

}

type bulkDocuments struct {
Documents []bulkDocument `json:"documents"`
}

type bulkDocument struct {
URL string `json:"url"`
Title string `json:"title"`
Popularity float64 `json:"popularity"`
Group string `json:"group"`
}

func bulkHandler(w http.ResponseWriter, req *http.Request) {
key := req.Header.Get("AUTH-KEY")
if key == "" {
output := map[string]string{"error": "Auth-Key header not set"}
renderer.JSON(w, http.StatusForbidden, output)
return
}
c, err := redisPool.Get()
errHndlr(err)
defer redisPool.Put(c)

domain, err := GetDomain(key, c)
if domain == "" {
output := map[string]string{"error": "Auth-Key not recognized"}
renderer.JSON(w, http.StatusForbidden, output)
return
}

// encoded := encodeString(domain)

decoder := json.NewDecoder(req.Body)
var bs bulkDocuments
err = decoder.Decode(&bs)
errHndlr(err)
for _, b := range bs.Documents {
// fmt.Println(b.URL, b.Title, b.Popularity, b.Group)
insertDocument(
domain,
b.Title,
b.URL,
b.Group,
b.Popularity,
c,
)
}
output := map[string]string{"message": "OK"}
renderer.JSON(w, http.StatusCreated, output)
}
Expand Down Expand Up @@ -674,6 +737,7 @@ func main() {
mux.HandleFunc("/v1", deleteHandler).Methods("DELETE")
mux.HandleFunc("/v1/stats", privateStatsHandler).Methods("GET")
mux.HandleFunc("/v1/flush", flushHandler).Methods("DELETE")
mux.HandleFunc("/v1/bulk", bulkHandler).Methods("POST", "PUT")

n := negroni.Classic()

Expand Down
3 changes: 2 additions & 1 deletion templates/index.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ Autocompeter(
<li>You determine the sort order by setting a "popularity" number on each title</li>
<li>Can find "Pär is naïve" by typing in "par naive</li>
<li>You can submit use "groups" to differentiate content only certain users should see</li>
<li>You can bulk upload as well as posting one at a time</li>
</ul>
</div>
</div>
Expand Down Expand Up @@ -333,7 +334,7 @@ Autocompeter(
ga('send', 'pageview');
</script>
{{end}}

</body>

</html>
39 changes: 39 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import datetime
import json
import unittest

from nose.tools import ok_, eq_
Expand Down Expand Up @@ -559,3 +560,41 @@ def test_get_stats(self):
eq_(r.status_code, 200)
stats = r.json()
eq_(stats['documents'], 1)

def test_bulk_upload(self):
documents = [
{
'url': '/some/page',
'title': 'Some title',
'popularity': 10
},
{
'url': '/other/page',
'title': 'Other title',
},
{
'url': '/private/page',
'title': 'Other private page',
'group': 'private'
},
]
r = self.post(
'/v1/bulk',
data=json.dumps({'documents': documents}),
headers={
'Auth-Key': 'xyz123',
# 'content-type': 'application/json'
}
)
eq_(r.status_code, 201)
r = self.get('/v1/stats', headers={'Auth-Key': 'xyz123'})
eq_(r.status_code, 200)
stats = r.json()
eq_(stats['documents'], 3)

r = self.get('/v1?q=titl&d=peterbecom')
eq_(len(r.json()['results']), 2)
urls = [x[0] for x in r.json()['results']]
eq_(urls, ['/some/page', '/other/page'])
r = self.get('/v1?q=other&d=peterbecom&g=private')
eq_(len(r.json()['results']), 2)

0 comments on commit 50c90bc

Please sign in to comment.