Permalink
Browse files

export gephi, problème avec mongodb qui bloque à 101 lignes

  • Loading branch information...
1 parent d50ba3b commit 278765377de9e97caa741bef00f2ed1a8b83a6a0 @trecouvr trecouvr committed Mar 15, 2012
Showing with 107 additions and 10 deletions.
  1. +2 −2 crawler/crawler.py
  2. +1 −1 crawler/extractor.py
  3. +3 −3 crawler/mongodbapi.py
  4. +4 −3 db/Makefile
  5. +91 −0 db/src/exportgephi.opa
  6. +2 −0 db/src/main.opa
  7. +4 −1 gephiAPI/gephiAPI.py
View
@@ -69,8 +69,8 @@ def stop(self):
if __name__ == "__main__":
- c = Crawler(10, 4, MONGODB_HOST, MONGODB_PORT, MONGODB_DBNAME, MONGODB_COLLECTION,
- feeds=['http://www.youporn.com'],
+ c = Crawler(10, 2, MONGODB_HOST, MONGODB_PORT, MONGODB_DBNAME, MONGODB_COLLECTION,
+ feeds=["http://www.utc.fr"],
nb_ask_feeds=0)
try:
c.loop()
View
@@ -32,7 +32,7 @@ def __init__(self, url, html):
def get_links(self):
- return [ normalize_url(self.url, link.get('href')) for link in self.soup.find_all('a') if link.get('href') ]
+ return list([ normalize_url(self.url, link.get('href')) for link in self.soup.find_all('a') if link.get('href') ])
def get_keywords(self):
View
@@ -31,14 +31,14 @@ def add_page(self, *, url):
page = {
'url': url
}
- self.queue.put("add_page", dict_to_json(page))
+ self.queue.put(("add_page", dict_to_json(page)))
def add_link(self, *, source, target):
link = {
'source': source,
'target': target,
}
- self.queue.put("add_link", dict_to_json(link))
+ self.queue.put(("add_link", dict_to_json(link)))
def url_need_a_visit(self, url):
url = { 'url' : url }
@@ -72,7 +72,7 @@ def loop_send(self):
while not self.e_stop.is_set():
try:
operation, req = self.queue.get(True, 0.5)
- except:
+ except queue.Empty:
pass
else:
self.send(operation,req)
View
@@ -3,10 +3,11 @@
OPA=opa
EXE=main.exe
-OPT=--database mongo -o $(EXE)
+DATABASE=db3
+OPT=-o $(EXE) --database $(DATABASE)
DEBUG=--debug-editable-css --verbose 100
FILES=$(shell find src -name '*.opa')
-RUN-OPT=#--db-remote localhost:27017 #--db-force-upgrade #--db-local db/db --db-force-upgrade
+RUN-OPT=--db-force-upgrade #--db-remote localhost:27017 #--db-force-upgrade #--db-local db/db --db-force-upgrade
#-----------------------------------------------#
@@ -49,7 +50,7 @@ clean-db:
rm -rf db/*
clean:
- rm -rf $(BUILD_DIR)/*
+ rm -rf _build
rm -f *.exe
rm -rf doc
rm -rf _tracks
View
@@ -0,0 +1,91 @@
+
+
+
+
+
+
+
+function page_export() {
+ html = <h1>Export gephi</h1>
+ <div id=#forumlaire>
+ <input id=#gephi_host value="localhost"/>
+ <input id=#gephi_port value="8081"/>
+ <button onclick={onclick_send}>Send</button>
+ </div>
+ <div id=#exports />
+ Resource.styled_page("Export Gephi", ["/resources/css.css"], html);
+}
+
+function onclick_send(_e) {
+ match (Parser.int(Dom.get_content(#gephi_port))) {
+ case {none}: jlog("le port doit être une valeur entière");
+ case {some:port}:
+ host = Dom.get_content(#gephi_host);
+ #exports = gephi_loader(host, port);
+ do_export(host, port)
+ }
+}
+
+@async function do_export(host, port) {
+ str_url = "http://{host}:{port}/workspace0?operation=updateGraph"
+ match (Uri.of_string(str_url)) {
+ case {none}: jlog("impossible de créer une url à partir de {host} et {port}");
+ case {some: uri}:
+ //export_pages(host, port, uri);
+ export_links(host, port, uri);
+ }
+}
+
+
+function onresult_post(string id)(e) {
+ match (e) {
+ case {~failure}: #{id} =+ <>ECHEC: {"{failure}"}<br /></>;
+ case {~success}: void // #{id} =+ <>SUCCES: {"{success.content}"}<br /></>;
+ }
+}
+
+@async function export_pages(host,port,uri) {
+ count = Map.fold(function (_k,page,acc) {
+ js = page_to_js(page);
+ envoyer(uri, js, onresult_post(id_loader_pages(host,port)));
+ acc + 1;
+ }, /mydb/pages, 0);
+ #{id_loader_pages(host,port)} =+ <>{count} pages transférées<br /></>;
+}
+
+@async function export_links(host,port,uri) {
+ count = List.fold(function (link,acc) {
+ js = link_to_js(link);
+ envoyer(uri, js, onresult_post(id_loader_links(host,port)));
+ acc + 1;
+ }, Map.To.val_list(/mydb/links), 0);
+ #{id_loader_links(host,port)} =+ <>{count} liens transférés<br /></>;
+}
+
+function page_to_js(Page page) {
+ "\{ 'an': \{ '{page.url}': \{ 'label': '{page.url}' \} \} \}";
+}
+
+function link_to_js(Link link) {
+ js_link = "\{'ae': \{ '{Link.get_ref(link)}': \{'source':'{link.source}', 'target': '{link.target}' \} \} \}";
+ js_source = "\{ 'an': \{ '{link.source}': \{ 'label': '{link.source}' \} \} \}";
+ js_target = "\{ 'an': \{ '{link.target}': \{ 'label': '{link.target}' \} \} \}";
+ "{js_link}\n\r{js_target}\n\r{js_source}";
+}
+
+
+function envoyer(Uri.uri uri, string data, onresult) {
+ //jlog("envoyer {uri} data={data}")
+ WebClient.Post.try_post_async(uri, data, onresult);
+}
+
+function id_loader(host, port) {"{host}{port}";}
+function id_loader_pages(host,port) { "{id_loader(host,port)}_pages"; }
+function id_loader_links(host,port) { "{id_loader(host,port)}_links"; }
+function gephi_loader(host, port) {
+ <div>
+ Export to : {host}:{port}
+ <div id=#{id_loader_pages(host,port)}></div>
+ <div id=#{id_loader_links(host,port)}></div>
+ </div>
+}
View
@@ -124,6 +124,7 @@ function get_urls_to_visit() {
}
function rest(path) {
+ //jlog("{path}");
match (HttpRequest.get_method()) {
case {some : {post}} :
match (path) {
@@ -152,6 +153,7 @@ function start(url) {
match (url) {
case {path:[] ... }: home();
case {path: ["_rest_" | path] ...}: rest(path)
+ case {path: ["export_gephi" | _path] ...}: page_export();
case {~path ...} :
path = String.concat("/", path);
Resource.styled_page("404", ["/resources/css.css"], <><h1>404</h1><div>{path} doesn't exist</div></>);
View
@@ -34,6 +34,7 @@ def json_edge(self, source, target, *, directed=True, id_edge=None, **params):
return {id_edge: params}
def make_request(self, data, workspace_id):
+ #print(data.encode())
def _f():
url = "http://{host}:{port}/workspace{ws_id}?operation=updateGraph".format(
host=self.host,
@@ -54,15 +55,17 @@ def _f():
import doctest
doctest.testmod()
+ import time
import sys
host = sys.argv[1] if len(sys.argv) > 1 else "localhost"
- port = sys.argv[2] if len(sys.argv) > 2 else 8080
+ port = sys.argv[2] if len(sys.argv) > 2 else 8081
api = GephiAPI(host, port)
api.add_node("Node1")
api.add_node("Node2")
api.add_edge("Node1", "Node2")
+ time.sleep(1)

0 comments on commit 2787653

Please sign in to comment.