-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add Petr2 config file * Use config file for Mongo details * Add Petr2 config file * Forgot about the scraper conns here
- Loading branch information
1 parent
af5c2b0
commit d1639c5
Showing
7 changed files
with
163 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
# Configuration file for release version of PETRARCH event coder | ||
# Codes the GigaWord.sample.PETR.txt using current dictionaries and default options | ||
# Last update: 30 April 2015 | ||
|
||
[Dictionaries] | ||
# See the PETRreader.py file for the purpose and format of these files | ||
verbfile_name = CAMEO.2.0.txt | ||
actorfile_list = Phoenix.Countries.actors.txt, Phoenix.International.actors.txt, Phoenix.MilNonState.actors.txt | ||
agentfile_name = Phoenix.agents.txt | ||
discardfile_name = Phoenix.discards.txt | ||
issuefile_name = Phoenix.IssueCoding.txt | ||
|
||
|
||
|
||
|
||
[Options] | ||
# textfile_list is a comma-delimited list of text files to code. This list has priority if | ||
# both a textfile_list and textfile_name are present | ||
textfile_list = data/text/GigaWord.sample.PETR.xml | ||
#textfile_list = AFP0808-01.xml, AFP0909-01.xml, AFP1210-01.xml | ||
# textfile_name is the name of a file containing a list of names of files to code, one | ||
# file name per line. | ||
#textfile_name = PETR.textfiles.benchmark.txt | ||
|
||
# eventfile_name is the output file for the events | ||
eventfile_name = events.PETR-Demo.txt | ||
|
||
|
||
# INTERFACE OPTIONS: uncomment to activate | ||
# Default: set all of these false, which is equivalent to an A)utocode in TABARI | ||
|
||
# code_by_sentence: show events after each sentence has been coded; default is to | ||
# show events after all of the sentences in a story have been coded | ||
code_by_sentence = True | ||
# pause_by_sentence: pause after the coding of each sentence. Entering 'Return' will | ||
# cause the next sentence to be coded; entering any character will | ||
# cause the program to exit. Default is to code without any pausing. | ||
#pause_by_sentence = True | ||
# pause_by_story: pause after the coding of each story. | ||
#pause_by_story = True | ||
|
||
|
||
# CODING OPTIONS: | ||
# Defaults are more or less equivalent to TABARI | ||
|
||
# write_actor_root: If True, the event record will include the text of the actor root: | ||
# The root is the text at the head of the actor synonym set in the | ||
# dictionary. Default is False | ||
write_actor_root = False | ||
|
||
# write_actor_text: If True, the event record will include include the complete text of | ||
# the noun phrase that was used to identify the actor. Default is False | ||
write_actor_text = True | ||
|
||
# write_event_text: If True, the event record will include include the complete text of | ||
# the verb phrase that was used to identify the event. Default is False | ||
write_event_text = True | ||
|
||
# NULL CODING OPTIONS | ||
# null_verbs: If True, only get verb phrases that are not in the dictionary but are associated | ||
# with coded noun phrases | ||
null_verbs = False | ||
|
||
# null_actors: If True, only get actor phrases that are not in the dictionary but associated with | ||
# coded verb phrases. This also requires new_actor_length to be set to a value > 0: | ||
# typically a value of 4 to 8 will give good results. | ||
null_actors = False | ||
|
||
# new_actor_length: Maximum length for new actors extracted from noun phrases if no | ||
# actor or agent generating a code is found. To disable and just | ||
# use null codes "---", set to zero; this is the default. | ||
# Setting this to a large number will extract anything found in a (NP | ||
# noun phrase, though usually true actors contain a small number of words | ||
# This must be an integer. | ||
new_actor_length = 0 | ||
|
||
# require_dyad: Events require a non-null source and target: setting this false is likely | ||
# to result in a very large number of nonsense events. As happened with the | ||
# infamous GDELT data set of 2013-2014. And certainly no one wants to see | ||
# that again. So the default is True | ||
require_dyad = False | ||
|
||
# stop_on_error: If True, parsing errors causing the program to halt; typically used for | ||
# debugging. With the default [false], the error is written to the error | ||
# file, record is skipped, and processing continues. | ||
stop_on_error = False | ||
|
||
# commas: These adjust the length (in words) of comma-delimited clauses that are eliminated | ||
# from the parse. To deactivate, set the max to zero. | ||
# Defaults, based on TABARI, are in () | ||
# comma_min : internal clause minimum length [2] | ||
# comma_max : internal clause maximum length [8] | ||
# comma_bmin : initial ("begin") clause minimum length [0] | ||
# comma_bmax : initial clause maximum length [0 : deactivated by default] | ||
# comma_emin : terminal ("end") clause minimum length [2] | ||
# comma_emax : terminal clause maximum length [8] | ||
comma_min = 2 | ||
comma_max = 8 | ||
comma_bmin = 0 | ||
comma_bmax = 0 | ||
comma_emin = 2 | ||
comma_emax = 8 | ||
|
||
[StanfordNLP] | ||
stanford_dir = ~/stanford-corenlp/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters