Permalink
Browse files

Fix for geomap opacity

  • Loading branch information...
Martin Holste
Martin Holste committed Apr 30, 2016
1 parent 6ad7966 commit 1566d32054cb886a404c68fb6db8d5420d0f85b3
Showing with 230 additions and 6 deletions.
  1. +30 −6 web/inc/dashboard.js
  2. +199 −0 web/inc/simple-statistics-ckmeans.js
  3. +1 −0 web/lib/View/Dashboard.pm
View
@@ -1432,28 +1432,52 @@ YAHOO.ELSA.Chart.prototype.makeGeoChart = function(){
hElem.setAttribute('class', 'chart_title');
hElem.appendChild(document.createTextNode(oSelf.getTitle()));
oSelf.chart_el.appendChild(hElem);
var country_data = {};
var minOpacity = .05;
var min = Infinity, max = 0;
var min = Infinity, max = 0, counts = [], num_unique_counts = 0;
for (var i = 0, len = oSelf.dataTable.rows.length; i < len; i++){
var count = oSelf.dataTable.rows[i][1];
if (count > max) max = count;
if (count < min) min = count;
country_data[ oSelf.dataTable.rows[i][0] ] = count;
if (counts.indexOf(count) < 0)
num_unique_counts++;
counts.push(count);
}
// Group counts into groups
var opacities = [1, .8, .6, .4, .3, .2];
var num_groups = opacities.length;
if (num_unique_counts < num_groups)
num_groups = num_unique_counts;
else if (oSelf.dataTable.rows.length < num_groups)
num_groups = oSelf.dataTable.rows.length;
var group_counts = ckmeans(counts, num_groups);
group_counts = group_counts.reverse();
console.log('counts', counts, 'group_counts', group_counts);
// Helper function to retrieve the group
function get_opacity(group_counts, count){
for (var i = 0, len = group_counts.length; i < len; i++){
for (var j = 0, jlen = group_counts[i].length; j < jlen; j++){
if (group_counts[i][j] === count) return opacities[i];
}
}
console.error('No group found for count ' + count);
return 0;
}
var sorted_countries = Object.keys(country_data).sort(function(a, b){
return country_data[a] > country_data[b] ? -1 : 1;
});
for (var country in country_data){
country_data[country] = {
opacity: country_data[country] / max,
opacity: get_opacity(group_counts, country_data[country]),
count: country_data[country]
};
if (country_data[country] < (minOpacity + .05)){
country_data[country] = (minOpacity + .05);
}
}
console.log('country_data', country_data, 'rows', oSelf.dataTable.rows,
'sorted_countries', sorted_countries, 'oSelf.dataTable', oSelf.dataTable);
@@ -0,0 +1,199 @@
/* These functions are from https://github.com/simple-statistics.
Copyright (c) 2014, Tom MacWright */
function uniqueCountSorted(input/*: Array<any>*/)/*: number */ {
var uniqueValueCount = 0,
lastSeenValue;
for (var i = 0; i < input.length; i++) {
if (i === 0 || input[i] !== lastSeenValue) {
lastSeenValue = input[i];
uniqueValueCount++;
}
}
return uniqueValueCount;
}
function numericSort(array /*: Array<number> */) /*: Array<number> */ {
return array
// ensure the array is changed in-place
.slice()
// comparator function that treats input as numeric
.sort(function(a, b) {
return a - b;
});
}
/**
* Create a new column x row matrix.
*
* @private
* @param {number} columns
* @param {number} rows
* @return {Array<Array<number>>} matrix
* @example
* makeMatrix(10, 10);
*/
function makeMatrix(columns, rows) {
var matrix = [];
for (var i = 0; i < columns; i++) {
var column = [];
for (var j = 0; j < rows; j++) {
column.push(0);
}
matrix.push(column);
}
return matrix;
}
/**
* Ckmeans clustering is an improvement on heuristic-based clustering
* approaches like Jenks. The algorithm was developed in
* [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf)
* as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach
* to the problem of clustering numeric data into groups with the least
* within-group sum-of-squared-deviations.
*
* Minimizing the difference within groups - what Wang & Song refer to as
* `withinss`, or within sum-of-squares, means that groups are optimally
* homogenous within and the data is split into representative groups.
* This is very useful for visualization, where you may want to represent
* a continuous variable in discrete color or style groups. This function
* can provide groups that emphasize differences between data.
*
* Being a dynamic approach, this algorithm is based on two matrices that
* store incrementally-computed values for squared deviations and backtracking
* indexes.
*
* Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html),
* this implementation does not include any code to automatically determine
* the optimal number of clusters: this information needs to be explicitly
* provided.
*
* ### References
* _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic
* Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859
*
* from The R Journal Vol. 3/2, December 2011
* @param {Array<number>} data input data, as an array of number values
* @param {number} nClusters number of desired classes. This cannot be
* greater than the number of values in the data array.
* @returns {Array<Array<number>>} clustered input
* @example
* ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3);
* // The input, clustered into groups of similar numbers.
* //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]);
*/
function ckmeans(data/*: Array<number> */, nClusters/*: number */)/*: Array<Array<number>> */ {
if (nClusters > data.length) {
throw new Error('Cannot generate more classes than there are data values');
}
var sorted = numericSort(data),
// we'll use this as the maximum number of clusters
uniqueCount = uniqueCountSorted(sorted);
// if all of the input values are identical, there's one cluster
// with all of the input in it.
if (uniqueCount === 1) {
return [sorted];
}
// named 'D' originally
var matrix = makeMatrix(nClusters, sorted.length),
// named 'B' originally
backtrackMatrix = makeMatrix(nClusters, sorted.length);
// This is a dynamic programming way to solve the problem of minimizing
// within-cluster sum of squares. It's similar to linear regression
// in this way, and this calculation incrementally computes the
// sum of squares that are later read.
// The outer loop iterates through clusters, from 0 to nClusters.
for (var cluster = 0; cluster < nClusters; cluster++) {
// At the start of each loop, the mean starts as the first element
var firstClusterMean = sorted[0];
for (var sortedIdx = Math.max(cluster, 1);
sortedIdx < sorted.length;
sortedIdx++) {
if (cluster === 0) {
// Increase the running sum of squares calculation by this
// new value
var squaredDifference = Math.pow(
sorted[sortedIdx] - firstClusterMean, 2);
matrix[cluster][sortedIdx] = matrix[cluster][sortedIdx - 1] +
(sortedIdx / (sortedIdx + 1)) * squaredDifference;
// We're computing a running mean by taking the previous
// mean value, multiplying it by the number of elements
// seen so far, and then dividing it by the number of
// elements total.
var newSum = sortedIdx * firstClusterMean + sorted[sortedIdx];
firstClusterMean = newSum / (sortedIdx + 1);
} else {
var sumSquaredDistances = 0,
meanXJ = 0;
for (var j = sortedIdx; j >= cluster; j--) {
sumSquaredDistances += (sortedIdx - j) /
(sortedIdx - j + 1) *
Math.pow(sorted[j] - meanXJ, 2);
meanXJ = (sorted[j] + (sortedIdx - j) * meanXJ) /
(sortedIdx - j + 1);
if (j === sortedIdx) {
matrix[cluster][sortedIdx] = sumSquaredDistances;
backtrackMatrix[cluster][sortedIdx] = j;
if (j > 0) {
matrix[cluster][sortedIdx] += matrix[cluster - 1][j - 1];
}
} else {
if (j === 0) {
if (sumSquaredDistances <= matrix[cluster][sortedIdx]) {
matrix[cluster][sortedIdx] = sumSquaredDistances;
backtrackMatrix[cluster][sortedIdx] = j;
}
} else if (sumSquaredDistances + matrix[cluster - 1][j - 1] < matrix[cluster][sortedIdx]) {
matrix[cluster][sortedIdx] = sumSquaredDistances + matrix[cluster - 1][j - 1];
backtrackMatrix[cluster][sortedIdx] = j;
}
}
}
}
}
}
// The real work of Ckmeans clustering happens in the matrix generation:
// the generated matrices encode all possible clustering combinations, and
// once they're generated we can solve for the best clustering groups
// very quickly.
var clusters = [],
clusterRight = backtrackMatrix[0].length - 1;
// Backtrack the clusters from the dynamic programming matrix. This
// starts at the bottom-right corner of the matrix (if the top-left is 0, 0),
// and moves the cluster target with the loop.
for (cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) {
var clusterLeft = backtrackMatrix[cluster][clusterRight];
// fill the cluster from the sorted input by taking a slice of the
// array. the backtrack matrix makes this easy - it stores the
// indexes where the cluster should start and end.
clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1);
if (cluster > 0) {
clusterRight = clusterLeft - 1;
}
}
return clusters;
}
@@ -291,6 +291,7 @@ sub _get_index_body {
<script src="$dir/inc/d3.geo.projection.v0.min.js"></script>
<script src="$dir/inc/topojson.v1.min.js"></script>
<script src="$dir/inc/world-topo.json"></script>
<script src="$dir/inc/simple-statistics-ckmeans.js"></script>
<link rel="stylesheet" type="text/css" href="$dir/inc/c3.min.css" />
<script type="text/javascript" src="$dir/inc/elsa.js" ></script>
<script type="text/javascript" src="$dir/inc/dashboard.js" ></script>

0 comments on commit 1566d32

Please sign in to comment.