Skip to content

Commit

Permalink
More redundancy until cluster is stable
Browse files Browse the repository at this point in the history
  • Loading branch information
Kyle Lahnakoski committed Jan 27, 2014
1 parent bd520f1 commit 6aaa233
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 37 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,24 @@ You must prepare a ```settings.json``` file to reference the resources,
and it's filename must be provided as an argument in the command line.
Examples of settings files can be found in [resources/settings](resources/settings)

### Inter-Run State ###

Bugzilla-ETL keeps local run state in the form of two files:
```first_run_time``` and ```last_run_time```. These are both parameters
in the ``settings.json``` file.

* ```first_run_time``` is written only if it does not exist, and triggers a full ETL refresh. Delete this file if you want to create a new ES index and start ETL from the beginning.
* ```last_run_time``` is recorded whenever there has been a successful ETL. This file will not exist until the initial full ETL has completed successfully. Deleteing this file should have no net effect, other than making the program work harder then it should.

### Alias Analysis ###

You will require an alias file that matches the various email addresses that users have over time. This analysis is neccesary for proper CC list history and patch review history. [More on alias analysis](https://wiki.mozilla.org/Auto-tools/Projects/PublicES#Alias_Analysis).

* Make an ```alias_analysis_settings.json``` file. Which can be the same main ETL settings.json file.
* The ```param.alias_file.key``` can be ```null```, or set to a AES256 key of your choice.
* Run [alias_analysis.py](https://github.com/klahnakoski/Bugzilla-ETL/blob/master/resources/scripts/alias_analysis.bat)


Running bz_etl.py
------------------

Expand Down
5 changes: 4 additions & 1 deletion bzETL/alias_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from bzETL.util.multiset import Multiset
from bzETL.util.queries import Q
from bzETL.util.struct import nvl, Struct, Null
from bzETL.util.timer import Timer

bugs = {}
aliases = {}
Expand Down Expand Up @@ -188,8 +189,10 @@ def add_alias(lost, found):
def loadAliases(settings):
try:
try:
alias_json = File(settings.param.alias_file).read()
with Timer("load alias file at {{filename}}", {"filename":nvl(settings.param.alias_file.path, settings.param.alias_file)}):
alias_json = File(settings.param.alias_file).read()
except Exception, e:
Log.warning("No alias file found (looking at {{filename}}", {"filename":nvl(settings.param.alias_file.path, settings.param.alias_file)})
alias_json = "{}"
#self.aliases IS A dict POINTING TO structs
for k, v in CNV.JSON2object(alias_json).iteritems():
Expand Down
2 changes: 2 additions & 0 deletions resources/json/bug_version.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{
"settings":{
"index.number_of_shards":3,
"index.number_of_replicas":2,
"index.store.throttle.type":"merge",
"index.store.throttle.max_bytes_per_sec":"2mb",
"index.cache.filter.expire":"1m",
Expand Down
72 changes: 36 additions & 36 deletions resources/settings/bz_etl_settings.json
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
{
"param":{
"increment":1000,
"alias_increment":100000,
"alias_file":{
"path":"./resources/json/bugzilla_aliases.json",
"comment":"key is only meant to keep the aliases out of clear text. Aliases are public as per https://www.mozilla.org/en-US/privacy/policies/websites/",
"key":""
"param": {
"increment": 1000,
"alias_increment": 100000,
"alias_file": {
"path": "./resources/json/bugzilla_aliases.json",
"comment": "key is only meant to keep the aliases out of clear text. Aliases are public as per https://www.mozilla.org/en-US/privacy/policies/websites/",
"key": ""
},
"first_run_time":"./results/data/first_run_time.txt",
"last_run_time":"./results/data/last_run_time.txt",
"allow_private_bugs":false
},
"bugzilla":{
"host":"localhost",
"preamble":"from https://github.com/klahnakoski/Bugzilla-ETL",
"port":3306,
"username":"username",
"password":"password",
"schema":"bugzilla",
"debug":false
},
"es":{
"host":"http://localhost",
"port":"9200",
"index":"public_bugs",
"type":"bug_version",
"schema_file":"./resources/json/bug_version.json"
},
"es_comments":{
"host":"http://localhost",
"port":"9200",
"index":"public_comments",
"type":"bug_comment",
"schema_file":"./resources/json/bug_comments.json"
},
"debug":{
"first_run_time": "./results/data/first_run_time.txt",
"last_run_time": "./results/data/last_run_time.txt",
"allow_private_bugs": false
},
"bugzilla": {
"host": "localhost",
"preamble": "from https://github.com/klahnakoski/Bugzilla-ETL",
"port": 3306,
"username": "username",
"password": "password",
"schema": "bugzilla",
"debug": false
},
"es": {
"host": "http://localhost",
"port": "9200",
"index": "public_bugs",
"type": "bug_version",
"schema_file": "./resources/json/bug_version.json"
},
"es_comments": {
"host": "http://localhost",
"port": "9200",
"index": "public_comments",
"type": "bug_comment",
"schema_file": "./resources/json/bug_comments.json"
},
"debug": {
"log":[{
"class": "logging.handlers.RotatingFileHandler",
"filename": "./results/logs/bugzilla_etl.log",
Expand All @@ -46,7 +46,7 @@
"stream":"sys.stdout"
}]

}
}


}

0 comments on commit 6aaa233

Please sign in to comment.