Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
egofaktor refinements.
  • Loading branch information
pudo committed Nov 4, 2013
1 parent 684b278 commit 34ac55e
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 1 deletion.
34 changes: 34 additions & 0 deletions Ego.yaml
@@ -0,0 +1,34 @@
common:
database: postgresql://localhost/parlament_etl
prefix: data-exports/
format: csv

exports:

- query: >
SELECT partei, (sum(egos)::float/sum(words)::float)*100 AS faktor, SUM(words) FROM egos
WHERE partei IS NOT NULL GROUP BY partei ORDER BY (sum(egos)::float/sum(words)::float) DESC;
filename: ego-parteien.csv
- query: >
SELECT fingerprint, (egos::float/words::float)*100 AS faktor, words FROM egos
ORDER BY (egos::float/words::float) DESC;
filename: ego-personen.csv
- query: >
SELECT partei, (sum(egos)::float/sum(words)::float)*100 AS faktor,
(1/(sum(egos)::float/sum(words)::float))::bigint AS wordnum, SUM(words) FROM egos
WHERE partei IS NOT NULL GROUP BY partei ORDER BY (sum(egos)::float/sum(words)::float) DESC;
format: 'json'
filename: parteien.json
- query: >
SELECT e.fingerprint, p.partei, p.geschlecht, p.vorname, p.nachname,
(e.egos::float/e.words::float)*100 AS faktor,
(1/(e.egos::float/e.words::float))::bigint AS wordnum, e.words FROM egos e
LEFT JOIN person p ON e.fingerprint = p.fingerprint
ORDER BY (e.egos::float/e.words::float) DESC;
format: 'json'
filename: personen.json
7 changes: 6 additions & 1 deletion contrib/egofaktor.py
Expand Up @@ -3,7 +3,7 @@
from collections import defaultdict
from unicodedata import normalize as ucnorm, category

egos = re.compile(r'\b(ich|mir|mein|meiner|meines|mich)\b')
egos = re.compile(r'\b(ich|mir|mein|meiner|meines|mich|meines)\b', re.U)
words = re.compile('\w{2,}')

engine = dataset.connect('postgresql://localhost/parlament_etl')
Expand Down Expand Up @@ -43,8 +43,13 @@ def normalize(text):
egofaktor = engine['egos']
egofaktor.delete()
for fp in num_egos.keys():
pers = engine['person'].find_one(fingerprint=fp) or {}
partei = None
if 'partei' in pers:
partei = pers['partei']
egofaktor.upsert({
'fingerprint': fp,
'partei': partei,
'egos': num_egos[fp],
'words': num_words[fp]
}, ['fingerprint'])
Expand Down

0 comments on commit 34ac55e

Please sign in to comment.