Skip to content

Commit

Permalink
working on benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
rosejn committed May 28, 2011
1 parent d607c0d commit a5fda2a
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 9 deletions.
7 changes: 7 additions & 0 deletions TODO
@@ -1,6 +1,13 @@
* extend Peer to support the graph API directly without with-peer-graph
* start using transactions

* add a function to peer that returns its url

* Add a (graph-event-channel [g]) which will publish an event any time the graph
is modified.

* Add a sum operator and query form

* figure out an error handling policy and right some helpers to
- make it quick and easy to throw exceptions from query-plan builder
functions when bad arguments are passed
Expand Down
93 changes: 93 additions & 0 deletions src/benchmark/graph.clj
@@ -0,0 +1,93 @@
(ns benchmark.graph
(:use plasma.core
clojure.stacktrace
[clojure.java.io :only (reader)]
[clojure.contrib.str-utils2 :only (split)])
(:require [logjam.core :as log]
[jiraph.graph :as jiraph]
[plasma.query.core :as q]
[clojure.set :as set])
(:import [java.io FileInputStream BufferedInputStream
BufferedReader InputStreamReader File]
[java.util.zip GZIPInputStream]))

(def test-lines
"# comments at the top
# comments at the top
# comments at the top
23 43
52 532
43 12
234 53
12 78
234 43
1 43
43 132
12 79798
432 7
213 8")

(defn gz-reader [path]
(-> path File. FileInputStream. GZIPInputStream. InputStreamReader. BufferedReader.))

(defn edge-pair-seq
[lines]
(let [no-comments (filter #(not= \# (first %)) lines)
tokenized (map #(split % #"\s+") no-comments)
parsed (map #(Integer/parseInt %) (flatten tokenized))]
(partition 2 parsed)))

(defn load-graph
[g edges node-map]
(let [all-nodes (set (flatten (gnutella-edges)))
src-nodes (set (map first (gnutella-edges)))
plain-nodes (set/difference all-nodes src-nodes)]
(jiraph/with-graph g
(clear-graph)
(doseq [n plain-nodes]
(make-node {:id (get node-map n) :score (rand)}))
(loop [edges edges
n-count 0
e-count 0]
(if-not (empty? edges)
(let [src (ffirst edges)
[src-edges more] (split-with #(= src (first %)) edges)
tgts (map second src-edges)
edge-map (zipmap tgts
(repeat {:label :peer}))
node {:id (get node-map src) :edges edge-map :score (rand)}]
(make-node node)
(recur more (inc n-count) (+ e-count (count tgts))))
[(+ (count plain-nodes) n-count) e-count])))))

(def mem-graph (open-graph))
(def disk-graph (open-graph "db/gnutella"))

(def gnutella "data/p2p-Gnutella08.txt.gz")

(defn gnutella-edges
[]
(edge-pair-seq (line-seq (gz-reader gnutella))))

(def node-map (doall (zipmap (set (flatten (gnutella-edges)))
(repeatedly uuid))))

(defn mem
[]
(load-graph mem-graph (gnutella-edges) node-map))

(defn disk
[]
(load-graph disk-graph (gnutella-edges) node-map))

(defn peer-degrees
[ids]
(map first (map #(q/query (q/count* (q/path [% :peer]))) ids)))

(defn peer-stats
[g]
(jiraph/with-graph mem-graph
(let [degs (peer-degrees (vals node-map))]
{:average (average degs) :max (max degs) :min (min degs)})))


8 changes: 5 additions & 3 deletions src/plasma/core.clj
@@ -1,7 +1,7 @@
(ns plasma.core
(:use plasma.ns)
(:require [plasma util config api graph viz web]
[plasma.net peer connection route bootstrap] ; heartbeat]
[plasma.net peer url connection route bootstrap] ; heartbeat]
[plasma.query core construct]
[logjam.core :as log]))

Expand All @@ -12,7 +12,9 @@
'plasma.graph
'plasma.query.construct
'plasma.viz
'plasma.net.url
'plasma.net.peer
'plasma.net.bootstrap
'plasma.net.route)
; 'plasma.net.bootstrap
; 'plasma.net.heartbeat
'plasma.query.core)
; 'plasma.query.core)
2 changes: 2 additions & 0 deletions src/plasma/net/peer.clj
Expand Up @@ -317,10 +317,12 @@
(setup-peer-presence p))
p)))

; TODO: Make this URL checking generic, maybe hooking into some Java URL class? (Ugh...)
(defn peer-connection
"Returns a connection to a remote peer reachable by url, using the local peer p's
connection manager."
[p url]
(assert-url url)
(get-connection (:manager p) url))

(defn peer-get-node
Expand Down
16 changes: 15 additions & 1 deletion src/plasma/net/route.clj
Expand Up @@ -35,14 +35,28 @@
(number? id) id)
(expt 2 n-bits)))

(defn chord-distance [a b n-bits]
(defn ring-distance
"Compute the distance between points a and b on a ring (finite group)
where values go from zero to 2^n-bits. Note that this distance is
only computed in the clockwise (positive) direction."
[n-bits a b]
(let [a (id-bits a n-bits)
b (id-bits b n-bits)
max-n (expt 2 n-bits)]
(mod (+ (- b a)
max-n)
max-n)))

(defn ring-abs-distance
"Compute the natural distance between two points a and b in either direction
on a ring where values go from zero to 2^n-bits."
[n-bits a b]
(let [a (id-bits a n-bits)
b (id-bits b n-bits)
max-n (expt 2 n-bits)
dist (Math/abs (- a b))]
(min dist (- max-n dist))))

(defn kademlia-distance
"Compute the kademlia distance between two peer-ids hashed into
an n-bit address space."
Expand Down
6 changes: 6 additions & 0 deletions src/plasma/net/url.clj
Expand Up @@ -18,3 +18,9 @@
:host host
:port (Integer. port)}))

(defn assert-url
[url]
(when-not (and (string? url)
(.startsWith url "plasma://"))
(throw (Exception.
(str "Trying to open a peer connection with an invalid URL: " url)))))
3 changes: 3 additions & 0 deletions src/plasma/query/expression.clj
Expand Up @@ -17,6 +17,9 @@

; TODO: Convert to implementing clojure.contrib.generic methods

(def-unary-op rand)
(def-unary-op rand-int)

(def-unary-op not)
(def-unary-op inc)
(def-unary-op dec)
Expand Down
2 changes: 1 addition & 1 deletion src/plasma/query/operator.clj
Expand Up @@ -258,7 +258,7 @@
pts (map #(assoc pt id %) tgts)]
(log/format :flow "[traverse] %s - %s -> [%s]"
src-id edge-predicate
(apply str (interpose " " (map trim-id tgts))))
(apply str (interpose " " (map identity tgts))))
(apply enqueue out pts)))))))
(if (drained? in) (close out))))

Expand Down
2 changes: 1 addition & 1 deletion src/plasma/viz.clj
Expand Up @@ -55,7 +55,7 @@
(defn- dot-node
[id options]
(let [n (find-node id)
d-id (trim-id id)
d-id (if (uuid? id) (trim-id id) id)
node-props (:node-props options)
lbl (if node-props
(dot-record-label (dissoc n :edges) options)
Expand Down
17 changes: 14 additions & 3 deletions test/example/query.clj
Expand Up @@ -12,9 +12,9 @@
(clear-graph)
(construct*
(-> (nodes [employees :employees
alice {:label :alice :gender :female}
bob {:label :bob :gender :male}
maria {:label :maria :gender :female}
alice {:name :alice :gender :female}
bob {:name :bob :gender :male}
maria {:name :maria :gender :female}
lugano :lugano-office
geneva :geneva-office
zurich :zurich-office
Expand Down Expand Up @@ -127,6 +127,12 @@
;For example:
; (acme (with-component-info (components)))

(defn with-manager-info
[query]
(-> query
(q/distinct* 'manager)
(q/project ['manager :name :gender])))

; Could we do something like this?
(comment def product-component-prices
(-> components
Expand Down Expand Up @@ -199,6 +205,11 @@

;(acme (with-product-info (products-by-price)))

(defn most-expensive-product
[]
(-> (products-by-price)
(q/limit 1)))

; limit
; - need offset?
(defn cheapest-component-limit
Expand Down

0 comments on commit a5fda2a

Please sign in to comment.