Permalink
Browse files

periodical import, added BSD license

  • Loading branch information...
1 parent d693570 commit cb349ee04da3da63dc38035007b308f71dc11b52 @corajr corajr committed Sep 27, 2012
View
@@ -0,0 +1,9 @@
+Copyright (c) 2012, Chris Johnson-Roberson.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -44,6 +44,8 @@ Zotero_PaperMachines_Dialog.init = function () {
intro.label = this.io.dataIn["prompt"];
list.hidden = false;
+ var idx = 0, selectedIdx = -1;
+
this.io.dataIn["options"].forEach(function (item) {
var row = document.createElement('listitem');
var cell = document.createElement('listcell');
@@ -56,7 +58,16 @@ Zotero_PaperMachines_Dialog.init = function () {
row.setUserData("value", item.value, null);
list.appendChild(row);
+ if ("default" in item) {
+ selectedIdx = idx;
+ }
+
+ idx++;
});
+
+ if (selectedIdx > -1) {
+ list.selectedIndex = selectedIdx;
+ }
break;
}
};
@@ -1,6 +1,8 @@
-Components.utils.import("chrome://papermachines/content/Preferences.jsm");
+Components.utils.import("chrome://papermachines/content/Preferences.js");
-Zotero_PaperMachines_resetPrefsForPane = function (pane) {
+Zotero_PaperMachines_Options = function () {};
+
+Zotero_PaperMachines_Options.resetPrefsForPane = function (pane) {
var paneBranch = "extensions.papermachines." + pane + ".";
Preferences.resetBranch(paneBranch);
};
@@ -10,22 +10,24 @@
<script src="chrome://papermachines/content/options.js"/>
-<prefpane id="papermachines-general-pane" label="General Settings">
+<prefpane id="papermachines-general-pane" label="&papermachines.prefs.general;">
<preferences>
<preference id="pref_lang" name="extensions.papermachines.general.lang" type="unichar"/>
</preferences>
- <vbox>
- <groupbox orient="horizontal">
+ <groupbox orient="vertical">
+ <vbox>
<label control="lang" value="&papermachines.prefs.general.lang;"/>
<menulist id="lang" preference="pref_lang">
<menupopup>
<menuitem label="English" value="en"/>
<menuitem label="Portuguese" value="pt"/>
</menupopup>
</menulist>
- </groupbox>
- </vbox>
+ <separator class="groove-thin"/>
+ <caption label="&papermachines.prefs.after_close;"/>
+ </vbox>
+ </groupbox>
</prefpane>
<prefpane id="papermachines-lda-pane" label="&papermachines.collectionmenu.topicmodeling;">
@@ -41,14 +43,15 @@
<preference id="pref_lda_tfidf" name="extensions.papermachines.lda.tfidf" type="bool"/>
</preferences>
- <groupbox orient="vertical">
<vbox>
<hbox align="center">
<label control="lda_topics" value="&papermachines.prefs.lda.topics;"/>
<textbox preference="pref_lda_topics" id="lda_topics" maxlength="3"/>
</hbox>
<separator class="groove-thin"/>
- <caption value="&papermachines.prefs.advanced;"/>
+ <groupbox orient="vertical">
+
+ <caption label="&papermachines.prefs.advanced;"/>
<hbox align="center">
<label control="lda_iterations" value="&papermachines.prefs.lda.iterations;"/>
<textbox preference="pref_lda_iterations" id="lda_iterations" maxlength="4"/>
@@ -76,8 +79,42 @@
<checkbox preference="pref_lda_symmetric_alpha" label="&papermachines.prefs.lda.symmetric_alpha;" id="lda_symmetric_alpha"/>
<checkbox preference="pref_lda_stemming" label="&papermachines.prefs.lda.stemming;" id="lda_stemming"/>
<checkbox preference="pref_lda_tfidf" label="&papermachines.prefs.lda.tfidf;" id="lda_tfidf"/>
+
+ </groupbox>
<spacer flex="1"/>
- <button id="reset-to-defaults" label="&papermachines.prefs.reset;" oncommand="Zotero_PaperMachines_resetPrefsForPane('lda');"/>
+
+ <button id="reset-to-defaults" label="&papermachines.prefs.reset;" oncommand="Zotero_PaperMachines_Options.resetPrefsForPane('lda');"/>
+
+ </vbox>
+</prefpane>
+
+<prefpane id="papermachines-import-pane" label="&papermachines.prefs.import;">
+ <preferences>
+ <preference id="pref_import_title" name="extensions.papermachines.import.title" type="unichar"/>
+ <preference id="pref_import_pubtitle" name="extensions.papermachines.import.pubtitle" type="unichar"/>
+ <preference id="pref_import_dateformat" name="extensions.papermachines.import.dateformat" type="unichar"/>
+ <preference id="pref_import_startingoffset" name="extensions.papermachines.import.startingoffset" type="int"/>
+
+ </preferences>
+
+ <groupbox orient="horizontal">
+ <vbox>
+ <hbox align="center">
+ <label control="import_title" value="&papermachines.prefs.import.title;"/>
+ <textbox preference="pref_import_title" id="import_title" maxlength="25"/>
+ </hbox>
+ <hbox align="center">
+ <label control="import_pubtitle" value="&papermachines.prefs.import.pubtitle;"/>
+ <textbox preference="pref_import_pubtitle" id="import_pubtitle" maxlength="25"/>
+ </hbox>
+ <hbox align="center">
+ <label control="import_dateformat" value="&papermachines.prefs.import.dateformat;"/>
+ <textbox preference="pref_import_dateformat" id="import_dateformat" maxlength="25"/>
+ </hbox>
+ <hbox align="center">
+ <label control="import_startingoffset" value="&papermachines.prefs.import.startingoffset;"/>
+ <textbox preference="pref_import_startingoffset" id="import_startingoffset" maxlength="25"/>
+ </hbox>
</vbox>
</groupbox>
</prefpane>
@@ -78,10 +78,12 @@
</menu>
<menuseparator hidden="false"/>
+ <menuitem id="bulk-import" label="&papermachines.collectionmenu.bulk_import;"
+ oncommand="Zotero.PaperMachines.bulkImport()"/>
<menuitem id="export-output" label="&papermachines.collectionmenu.export_output;"
- oncommand="Zotero.PaperMachines.exportOutput()"/>
+ disabled="true" oncommand="Zotero.PaperMachines.exportOutput()"/>
<menuitem id="reset-output" label="&papermachines.collectionmenu.reset_output;"
- oncommand="Zotero.PaperMachines.resetOutput()"/>
+ disabled="true" oncommand="Zotero.PaperMachines.resetOutput()"/>
<menuitem id="prefs" label="&papermachines.collectionmenu.preferences;"
oncommand="Zotero.PaperMachines.openPreferences()"/>
</menupopup>
@@ -23,7 +23,7 @@ Zotero.PaperMachines = {
prompts: null,
paramLabels: null,
lang: null,
- wordcloudFilters: [{"name": "none (raw frequency)", "label": " ", "value": "plain"},
+ wordcloudFilters: [{"name": "none (raw frequency)", "label": " ", "value": "plain", "default": true},
{"name": "tf*idf", "label": " ", "value": "tfidf"},
{"name": "Dunning's log-likelihood", "label": " ", "value": "dunning"},
{"name": "Mann-Whitney U", "label": " ", "value": "mww"},
@@ -227,7 +227,9 @@ Zotero.PaperMachines = {
this.log_dir = this._getOrCreateDir("logs", this.out_dir);
this.args_dir = this._getOrCreateDir("args");
- Components.utils.import("chrome://papermachines/content/Preferences.jsm");
+ Components.utils.import("chrome://papermachines/content/Preferences.js");
+ Components.utils.import("chrome://papermachines/content/strptime.js");
+
var stoplist_lang = Preferences.get("extensions.papermachines.general.lang") || "en";
@@ -1150,7 +1152,7 @@ Zotero.PaperMachines = {
return false;
}
},
- processDefaults: {
+ processParamLists: {
"mallet_lda": [{"name": "topics", "type": "text", "pref": "extensions.papermachines.lda.topics"},
{"name": "iterations", "type": "text", "pref": "extensions.papermachines.lda.iterations"},
{"name": "stemming", "type": "check", "pref": "extensions.papermachines.lda.stemming"},
@@ -1161,13 +1163,20 @@ Zotero.PaperMachines = {
{"name": "burn_in", "type": "text", "pref": "extensions.papermachines.lda.burn_in"},
{"name": "optimize_interval", "type": "text", "pref": "extensions.papermachines.lda.optimize_interval"},
{"name": "symmetric_alpha", "type": "check", "pref": "extensions.papermachines.lda.symmetric_alpha"},
+ ],
+ "bulk_import": [{"name": "title", "type": "text", "pref": "extensions.papermachines.import.title"},
+ {"name": "pubtitle", "type": "text", "pref": "extensions.papermachines.import.pubtitle"},
+ {"name": "dateformat", "type": "text", "pref": "extensions.papermachines.import.dateformat"},
+ {"name": "startingoffset", "type": "text", "pref": "extensions.papermachines.import.startingoffset"},
]
},
promptForProcessParams: function(process) {
- var items = Zotero.PaperMachines.processDefaults[process];
+ var items = Zotero.PaperMachines.processParamLists[process];
for (var i in items) {
items[i].label = Zotero.PaperMachines.paramLabels[process][items[i].name];
- items[i].value = Preferences.get(items[i].pref);
+ if ("pref" in items[i]) {
+ items[i].value = Preferences.get(items[i].pref);
+ }
}
var intro = Zotero.PaperMachines.processNames[process];
return Zotero.PaperMachines._promptForProcessParams(intro, items);
@@ -1185,7 +1194,84 @@ Zotero.PaperMachines = {
return false;
}
},
+ RISfields: {
+ "title": "TI",
+ "journaltitle": "T2",
+ "date": "DA",
+ "language": "LA",
+ "year": "PY"
+ },
+ generateRIS: function (items) {
+ var ris = "";
+ for (var i in items) {
+ var item = items[i];
+ var myRis = "TY - " + item.type + "\n"; // type must come first
+ for (var field in Zotero.PaperMachines.RISfields) {
+ if (field in item) {
+ myRis += Zotero.PaperMachines.RISfields[field] + " - " + item[field] + "\n";
+ }
+ }
+ if ("files" in item) {
+ for (var j in item.files) {
+ var f = item.files[j];
+ myRis += "L1 - " + f + "\n";
+ }
+ }
+ myRis += "ER - \n\n";
+ ris += myRis;
+ }
+ return ris;
+ },
+ bulkImport: function () {
+ var import_dir = this.filePrompt("import_dir", "getfolder");
+ if (import_dir) {
+ var params = Zotero.PaperMachines.promptForProcessParams("bulk_import");
+ if (params) {
+ var items = [];
+ var found_obj = {};
+ var n = params["startingoffset"] || 1;
+ Zotero.PaperMachines.findPDFsInDir(import_dir, found_obj);
+ for (var dir_name in found_obj) {
+ var date = false;
+ var files = found_obj[dir_name].map(function (fname) { return "file://" + fname;});
+ var item = {"type": "NEWS", "title": params["title"] + " " + n.toString(), "pubtitle": params["pubtitle"], "files": files};
+ try {
+ date = strptime(dir_name, params["dateformat"]);
+ } catch (e) {
+ Zotero.PaperMachines.LOG("Date not understood: " + dir_name);
+ Zotero.PaperMachines.LOG(e.name + ": " + e.message);
+ }
+ if (date) {
+ item.date = date.toISOString().replace(/-/g,"/").substring(0,10) + "/"; // 2012/09/12/
+ }
+ items.push(item);
+ n++;
+ }
+ var ris_str = Zotero.PaperMachines.generateRIS(items);
+ var ris_file = Zotero.PaperMachines._getOrCreateFile(import_dir.leafName + ".ris", import_dir);
+ Zotero.File.putContents(ris_file, ris_str);
+
+ Zotero.UnresponsiveScriptIndicator.disable();
+ Zotero_File_Interface.importFile(ris_file);
+ Zotero.UnresponsiveScriptIndicator.enable();
+ }
+ }
+ },
+ findPDFsInDir: function (dir, found_obj) {
+ var files = dir.directoryEntries;
+ while (files.hasMoreElements()) {
+ var f = files.getNext().QueryInterface(Components.interfaces.nsIFile);
+ if (f.isFile() && f.leafName.toLowerCase().indexOf(".pdf") != -1) {
+ if (!(dir.leafName in found_obj)) {
+ found_obj[dir.leafName] = [];
+ }
+ found_obj[dir.leafName].push(f.path);
+ } else if (f.isDirectory()) {
+ Zotero.PaperMachines.findPDFsInDir(f, found_obj);
+ }
+ }
+ },
LOG: function(msg) {
var consoleService = Components.classes["@mozilla.org/consoleservice;1"]
.getService(Components.interfaces.nsIConsoleService);
@@ -0,0 +1,57 @@
+/**
+ * https://github.com/cho45/micro-strptime.js
+ * (c) cho45 http://cho45.github.com/mit-license
+ */
+let EXPORTED_SYMBOLS = ["strptime"];
+
+function strptime (str, format) {
+ if (!format) throw Error("Missing format");
+ var ff = [];
+ var re = new RegExp(format.replace(/%(?:([a-zA-Z%])|('[^']+')|("[^"]+"))/g, function (_, a, b, c) {
+ var fd = a || b || c;
+ var d = strptime.fd[fd];
+ if (!d) throw Error("Unknown format descripter: " + fd);
+ ff.push(d[1]);
+ return '(' + d[0] + ')';
+ }), 'i');
+ var matched = str.match(re);
+ if (!matched) throw Error('Failed to parse');
+
+ var date = new Date(0);
+ for (var i = 0, len = ff.length; i < len; i++) {
+ var fun = ff[i];
+ if (!fun) continue;
+ fun.call(date, matched[i + 1]);
+ }
+ if (date.timezone) {
+ date = new Date(date.getTime() - date.timezone * 1000);
+ }
+ if (date.AMPM) {
+ if (date.getUTCHours() == 12) date.setUTCHours(date.getUTCHours() - 12);
+ if (date.AMPM == 'PM') date.setUTCHours(date.getUTCHours() + 12);
+ }
+ return date;
+}
+strptime.fd = {
+ '%' : [ '%', function () {} ],
+ 'A' : [ '[a-z]+', function (matched) {} ],
+ 'B' : [ '[a-z]+', function (matched) { this.setUTCMonth(strptime.B[matched.slice(0, 3)]) } ],
+ 'Y' : [ '[0-9]{4}', function (matched) { this.setUTCFullYear(+matched) } ],
+ 'y' : [ '[0-9]{2}', function (matched) { this.setUTCFullYear(+matched + (+matched > (new Date().getYear() - 100) ? 1900 : 2000)) } ],
+ 'm' : [ '[0-9]{0,2}', function (matched) { this.setUTCMonth(+matched - 1) } ],
+ 'd' : [ '[0-9]{0,2}', function (matched) { this.setUTCDate(+matched) } ],
+ 'H' : [ '[0-9]{0,2}', function (matched) { this.setUTCHours(+matched) } ],
+ 'M' : [ '[0-9]{0,2}', function (matched) { this.setUTCMinutes(+matched) } ],
+ 'S' : [ '[0-9]{0,2}', function (matched) { this.setUTCSeconds(+matched) } ],
+ 's' : [ '[0-9]+', function (matched) { this.setUTCMilliseconds(+matched) } ],
+ 'Z' : [ 'UTC|Z|[+-][0-9][0-9]:?[0-9][0-9]', function (matched) {
+ if (matched == 'Z') return;
+ if (matched == 'UTC') return;
+ // '+09:00' or '+0900'
+ matched = matched.replace(/:/, '');
+ this.timezone = (+matched.slice(0, 3) * (60 * 60)) + (+matched.slice(3, 5) * 60);
+ } ],
+ 'I' : [ '[0-9]{0,2}', function (matched) { this.setUTCHours(+matched) } ],
+ 'p' : [ 'AM|PM', function (matched) { this.AMPM = matched } ]
+};
+strptime.B = { "Jan": 0, "Feb": 1, "Mar": 2, "Apr": 3, "May": 4, "Jun": 5, "Jul": 6, "Aug": 7, "Sep": 8, "Oct": 9, "Nov": 10, "Dec": 11 };
@@ -23,6 +23,7 @@
<!ENTITY papermachines.collectionmenu.mallet_train-classifier "Train Classifier">
<!ENTITY papermachines.collectionmenu.mallet_classify-file "Test Classification...">
+<!ENTITY papermachines.collectionmenu.bulk_import "Periodical Import...">
<!ENTITY papermachines.collectionmenu.export_output "Export Output of Paper Machines...">
<!ENTITY papermachines.collectionmenu.reset_output "Reset Output of Paper Machines...">
@@ -42,6 +43,15 @@
<!ENTITY papermachines.prefs.advanced "Advanced Options">
<!ENTITY papermachines.prefs.reset "Reset to Defaults">
+<!ENTITY papermachines.prefs.after_close "Changes will take effect after the application is closed.">
+<!ENTITY papermachines.prefs.general "General Settings">
+
+<!ENTITY papermachines.prefs.import "Periodical Import">
+<!ENTITY papermachines.prefs.import.title "Title Prefix">
+<!ENTITY papermachines.prefs.import.pubtitle "Publication Title">
+<!ENTITY papermachines.prefs.import.dateformat "Date Format">
+<!ENTITY papermachines.prefs.import.startingoffset "Starting Offset">
+
<!ENTITY papermachines.prefs.general.lang "Stoplist Language: ">
<!ENTITY papermachines.prefs.lda.topics "Number of topics: ">
<!ENTITY papermachines.prefs.lda.iterations "Number of iterations: ">
Oops, something went wrong.

0 comments on commit cb349ee

Please sign in to comment.