Updated version of stats extension #3

Closed
wants to merge 8 commits into
from
View
@@ -1,34 +1,32 @@
h1. refine-stats
-A extension for Google Refine to compute elementary statistics.
+A extension for OpenRefine to compute elementary statistics on a column data.
h2. Installation
-Run Google Refine. On the starting page click the "Browse workspace directory" link in the lower left corner. Make a folder called "extensions" within the the window that pops up. Copy the stats folder into the extensions folder. The final path should be:
+Run OpenRefine. On the starting page click the "Browse workspace directory" link in the lower left corner. Make a folder called "extensions" (if it doesn't exist) within the the window that pops up. Copy the refine-stats folder into the extensions folder. The final path should be something like (it may vary depending on the OS):
<pre>
-/PATH/TO/Google/Refine/extensions/stats/...
+/PATH/TO/Google/Refine/extensions/refine-stats/...
</pre>
-Restart Google Refine.
+Restart OpenRefine.
-(Alternatively, you copy the stats folder into the extensions folder of your Google Refine installation, but it may be lost during upgrades.)
+(Alternatively, you copy the stats folder into the extensions folder of your OpenRefine installation, but it may be lost during upgrades.)
h2. Usage
Select "Column statistics" from the drop-down menu of any column header. Statistics will be calculated based on *filtered rows*, so you can facet your dataset in different ways and calculate statistics for each subset.
h2. Build it yourself
-WARNING: THIS IS NOT REALLY TESTED WE WILL ADD BETTER INSTRUCTIONS SOON.
-
-If you modify this extension, you can build by changing into the stats directoy and executing a command such as:
-
-ant -Drefine.dir=/Users/YOU/src/google-refine/main -Dserver.lib.dir=/Users/YOU/src/google-refine/server/lib build
+If you modify this extension, copy it to extensions folder of OpenRefine installation folder and build it using ./refine build
h2. Credits
-refine-stats is a "Newsapps":http://apps.chicagotribune.com/ project. Development by "Joe Germuska":https://github.com/joegermuska and "Christopher Groskopf":https://github.com/bouvard.
+refine-stats is a "Newsapps":http://apps.chicagotribune.com/ project. Development by "Joe Germuska":https://github.com/joegermuska and "Christopher Groskopf":https://github.com/bouvard.
+
+"Mateja Verlič (sparkica)":https://github.com/sparkica updated it to use newer version of "Apache Commons Math" library and made it work in OpenRefine 2.6 (previously known as Google Refine).
h2. License
File renamed without changes.
@@ -14,7 +14,7 @@ function init() {
"project/scripts",
module,
[
- "scripts/project-injection.js"
+ "scripts/extension.js"
]
);
@@ -23,7 +23,7 @@ function init() {
"project/styles",
module,
[
- "styles/project-injection.less"
+ "styles/stats.less"
]
);
}
Binary file not shown.
@@ -0,0 +1,4 @@
+name = refine-stats
+description = OpenRefine Stats Extension
+templating.macros = macros.vm
+requires = core
File renamed without changes.
File renamed without changes.
@@ -2,7 +2,7 @@ var StatsExtension = {};
DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
var doStatsDialog = function(response) {
- var dialog = $(DOM.loadHTML("stats", "scripts/stats-dialog.html"));
+ var dialog = $(DOM.loadHTML("refine-stats", "scripts/stats-dialog.html"));
var elmts = DOM.bind(dialog);
elmts.dialogHeader.text("Statistics for column \"" + column.name + "\"");
@@ -35,7 +35,7 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
}
Refine.postProcess(
- "stats",
+ "refine-stats",
"summarize",
params,
body,
@@ -46,11 +46,15 @@ DataTableColumnHeaderUI.extendMenu(function(column, columnHeaderUI, menu) {
MenuSystem.insertAfter(
menu,
- [ "core/edit-column" ],
+ [ "core/transpose" ],
+ [
+ {},
{
- id: "stats/summarize",
+ id: "refine-stats/summarize",
label: "Column statistics",
click: prepStatsDialog
}
+ ]
);
});
+
@@ -1,4 +1,4 @@
-<div id="stats-dialog" class="dialog-frame" style="width: 300px;">
+<div id="stats-dialog" class="dialog-frame" style="width: 350px;">
<div class="dialog-header" bind="dialogHeader">Stats</div>
<div class="dialog-body" bind="dialogBody">
<table>
@@ -13,7 +13,7 @@
<tr><td class="key">Variance</td><td class="value" bind="dialogVariance">N/A</td></tr>
</table>
</div>
- <div class="dialog-footer" bind="dialogFooter"><div class="grid-layout layout-normal layout-full">
- <button class="button" bind="okButton" id="stats-ok">Ok</button>
+ <div class="dialog-footer" bind="dialogFooter">
+ <button class="btn btn-success" bind="okButton" id="stats-ok">&nbsp;&nbsp;OK&nbsp;&nbsp;</button>
</div>
</div>
@@ -25,17 +25,18 @@
import com.google.refine.browsing.RowVisitor;
import com.google.refine.util.ParsingUtilities;
-import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
-import org.apache.commons.math.stat.descriptive.rank.Median;
+import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.apache.commons.math3.stat.descriptive.rank.Median;
public class Summarize extends Command {
+
protected RowVisitor createRowVisitor(Project project, int cellIndex, List<Float> values) throws Exception {
return new RowVisitor() {
int cellIndex;
List<Float> values;
public RowVisitor init(int cellIndex, List<Float> values) {
- this.cellIndex = cellIndex;
+ this.cellIndex = cellIndex;
this.values = values;
return this;
}
@@ -91,15 +92,15 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) thro
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, createRowVisitor(project, cellIndex, values));
- HashMap map = computeStatistics(values);
+ HashMap<String, String> map = computeStatistics(values);
JSONWriter writer = new JSONWriter(response.getWriter());
writer.object();
- for (Iterator<Map.Entry> entries = map.entrySet().iterator(); entries.hasNext();) {
- Map.Entry entry = entries.next();
- writer.key(entry.getKey().toString());
- writer.value(entry.getValue().toString());
+ for (Iterator<Map.Entry<String, String>> entries = map.entrySet().iterator(); entries.hasNext();) {
+ Map.Entry<String, String> entry = entries.next();
+ writer.key(entry.getKey());
+ writer.value(entry.getValue());
}
writer.endObject();
@@ -110,8 +111,8 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) thro
}
};
- public HashMap computeStatistics(List<Float> values) {
- HashMap map = new HashMap();
+ public HashMap<String, String> computeStatistics(List<Float> values) {
+ HashMap<String, String> map = new HashMap<String, String>();
HashMap<Float, Integer> modeMap = new HashMap<Float, Integer>();
DescriptiveStatistics stats = new DescriptiveStatistics();
@@ -138,39 +139,39 @@ public HashMap computeStatistics(List<Float> values) {
}
if (!(Double.isNaN(stats.getN()))) {
- map.put("count", stats.getN());
+ map.put("count", Long.toString(stats.getN()));
}
if (!(Double.isNaN(stats.getSum()))) {
- map.put("sum", stats.getSum());
+ map.put("sum", Double.toString(stats.getSum()));
}
if (!(Double.isNaN(stats.getMin()))) {
- map.put("min", stats.getMin());
+ map.put("min", Double.toString(stats.getMin()));
}
if (!(Double.isNaN(stats.getMax()))) {
- map.put("max", stats.getMax());
+ map.put("max", Double.toString(stats.getMax()));
}
if (!(Double.isNaN((stats.getMean())))) {
- map.put("mean", stats.getMean());
+ map.put("mean", Double.toString(stats.getMean()));
}
if (!(Double.isNaN((stats.apply(new Median()))))) {
- map.put("median", stats.apply(new Median()));
+ map.put("median", Double.toString(stats.apply(new Median())));
}
if (mode != null) {
- map.put("mode", mode);
+ map.put("mode", Float.toString(mode));
}
if (!(Double.isNaN((stats.getStandardDeviation())))) {
- map.put("stddev", stats.getStandardDeviation());
+ map.put("stddev", Double.toString(stats.getStandardDeviation()));
}
if (!(Double.isNaN((stats.getVariance())))) {
- map.put("variance", stats.getVariance());
+ map.put("variance", Double.toString(stats.getVariance()));
}
return map;
View
@@ -1,3 +0,0 @@
-*.DS_Store
-.svn
-*.swp
Binary file not shown.
@@ -1,4 +0,0 @@
-name = stats
-description = Google Refine Stats Extension
-templating.macros = macros.vm
-requires = core