Skip to content
Permalink
Browse files

Still not working but committing anyway.

  • Loading branch information...
emeryberger committed Aug 26, 2019
1 parent c9bd91b commit 9c1831a11b6c6ac681dccaa2e4579cedcb3d2f4d
Showing with 203 additions and 127 deletions.
  1. +20 −8 src/components/App.tsx
  2. +22 −30 src/components/Content.tsx
  3. +76 −41 src/components/colorize.js
  4. +85 −48 src/components/colorize.ts
@@ -38,6 +38,7 @@ export default class App extends React.Component<AppProps, AppState> {
private numericRangeThreshold = 10000;
private formulasThreshold = 10000;
private valuesThreshold = 10000;
private suspiciousCellsThreshold = 1 - Colorize.suspiciousCellsReportingThreshold / 100; // Must be more rare than this fraction.

constructor(props, context) {
super(props, context);
@@ -178,37 +179,48 @@ export default class App extends React.Component<AppProps, AppState> {
processed_formulas.concat(data_values));
// processed_formulas);

// console.log("formula_matrix = " + JSON.stringify(formula_matrix));
console.log("formula_matrix = " + JSON.stringify(formula_matrix));


// console.log("processed_formulas = " + JSON.stringify(processed_formulas));
// console.log("data_values = " + JSON.stringify(data_values));
console.log("processed_formulas = " + JSON.stringify(processed_formulas));
console.log("data_values = " + JSON.stringify(data_values));


const stencil = Colorize.stencilize(cols, rows, formula_matrix);
// console.log("stencilized formula_matrix = " + JSON.stringify(stencil));
console.log("stencilized formula_matrix = " + JSON.stringify(stencil));
const probs = Colorize.compute_stencil_probabilities(cols, rows, stencil);
// console.log("probabilities = " + JSON.stringify(probs));
console.log("probabilities = " + JSON.stringify(probs));

const candidateSuspiciousCells = Colorize.generate_suspicious_cells(cols, rows, origin[0] - 1, origin[1] - 1, formula_matrix, probs, threshold);
// console.log("suspicious cells before = " + JSON.stringify(candidateSuspiciousCells));
console.log("suspicious cells before = " + JSON.stringify(candidateSuspiciousCells));

// Prune any cell that is in fact a formula.

if (typeof formulas !== 'undefined') {
let totalFormulaWeight = 0;
suspiciousCells = candidateSuspiciousCells.filter((c) => {
const theFormula = formulas[c[1] - origin[1]][c[0] - origin[0]];
console.log("Checking theFormula = " + JSON.stringify(theFormula) + " for cell " + JSON.stringify(c));
if ((theFormula.length < 1) || (theFormula[0] != '=')) {
return true;
} else {
// It's a formula: we will remove it, but also track how much it contributed to the probability distribution.
console.log("REMOVING " + JSON.stringify(c));
totalFormulaWeight += c[2];
return false;
}
});
console.log("total formula weight = " + totalFormulaWeight);
// Now we need to correct all the non-formulas to give them weight proportional to the case when the formulas are removed.
const multiplier = 1 / (1 - totalFormulaWeight);
console.log("before thresholding: suspicious cells = " + JSON.stringify(suspiciousCells));
suspiciousCells = suspiciousCells.map((c) => [c[0], c[1], c[2] * multiplier]);
suspiciousCells = suspiciousCells.filter((c) => c[2] <= threshold );
} else {
suspiciousCells = candidateSuspiciousCells;
}

// console.log("suspicious cells after = " + JSON.stringify(suspiciousCells));
console.log("suspicious cells after = " + JSON.stringify(suspiciousCells));
}
return suspiciousCells;
}
@@ -651,7 +663,7 @@ export default class App extends React.Component<AppProps, AppState> {
this.suspicious_cells = [];

if (values.length < 10000) {
this.find_suspicious_cells(cols, rows, origin, formulas, processed_formulas, data_values, 0.05); // <-- threshold
this.suspicious_cells = this.find_suspicious_cells(cols, rows, origin, formulas, processed_formulas, data_values, this.suspiciousCellsThreshold);
}


@@ -88,7 +88,7 @@ function makeTable(sheetName: string, arr, selector, current: number, numFixes :
return table;
}
}
return <div style={notSuspiciousStyle}>No suspicious formulas found in {sheetName}.<br /><br /></div>;
return <div style={notSuspiciousStyle}>No suspicious formulas found in {sheetName}.</div>;
// return <div></div>; // style={notSuspiciousStyle}>No suspicious formulas found in {sheetName}.<br /><br /></div>;
}

@@ -105,22 +105,16 @@ function makeTableSuspiciousCells(sheetName: string, arr, selector, current: num
// let r = ExcelUtils.get_rectangle(arr, i);
let r = arr[i];
if (r) {
let [ col, row, _val ] = r;
let [ col, row, val ] = r;
// console.log("value = " + val);
let score = (1.0 - val) * barWidth;
// Sort from largest to smallest (by making negative).
// console.log(JSON.stringify(r));
// console.log("original score = " + arr[i][0]);
if (false) {
let score = -arr[i][0] * barWidth; // Math.round((arr[i][0])/numFixes*barWidth*100)/(100); // * numFixes);
// let score = Math.round((-arr[i][0])/numFixes*barWidth*100)/(100); // * numFixes);
// console.log("score = " + score);
if (score > barWidth) {
score = barWidth;
}
// score = barWidth - score; // Invert the ranking.
// Skip really low scores.
if (score < Colorize.reportingThreshold) {
continue;
}
if (score > barWidth) {
score = barWidth;
}
// Skip really low scores.
if (score < Colorize.suspiciousCellsReportingThreshold) {
continue;
}
counter += 1;
// console.log("score is now = " + score);
@@ -131,22 +125,20 @@ function makeTableSuspiciousCells(sheetName: string, arr, selector, current: num
} else {
rangeDisplay = <td style={{width:100}}>{colName}{row}</td>;
}
let score = 49; // HACK FIXME
if (false) {
const scoreStr = Math.round(score).toString() + "% suspicious";
let barColor = 'red';
if (Math.round(score) < 50) {
barColor = 'yellow';
}
const scoreStr = Math.round(score).toString() + "% suspicious";
let barColor = 'red';
if (Math.round(score) < 50) {
barColor = 'yellow';
}
const scoreStr = "mildly suspicious";
children.push(<tr style={lineStyle} onClick={(ev) => { ev.preventDefault(); selector(i); }}>{rangeDisplay}<td title={scoreStr} style={{width: Math.round(score), backgroundColor: 'yellow', display:'inline-block'}}>&nbsp;</td><td style={{width: barWidth-Math.round(score), backgroundColor: 'lightgray', display:'inline-block'}}>&nbsp;</td></tr>);
// const scoreStr = "mildly suspicious";
children.push(<tr style={lineStyle} onClick={(ev) => { ev.preventDefault(); selector(i); }}>{rangeDisplay}<td title={scoreStr} style={{width: Math.round(score), backgroundColor: barColor, display:'inline-block'}}>&nbsp;</td><td title={scoreStr} style={{width: barWidth-Math.round(score), backgroundColor: 'lightgray', display:'inline-block'}}>&nbsp;</td></tr>);
// children.push(<tr style={lineStyle} onClick={(ev) => { ev.preventDefault(); selector(i); }}>{rangeDisplay}<td title={scoreStr} style={{width: Math.round(score), backgroundColor: 'yellow', display:'inline-block'}}>&nbsp;</td><td style={{width: barWidth-Math.round(score), backgroundColor: 'lightgray', display:'inline-block'}}>&nbsp;</td></tr>);
}
}
if (counter > 0) {
let table = [];
let header = <tr><th align="left">Cell</th><th align="left">Suspiciousness</th></tr>;
table.push(<div style={notSuspiciousStyle}>Click to jump to suspicious cells in {sheetName}:<br /><div style={divStyle}><table style={{width:'300px'}}><tbody>{header}{children}</tbody></table></div><br /></div>);
table.push(<div style={notSuspiciousStyle}>Click to jump to suspicious cells in {sheetName} (<b>EXPERIMENTAL!</b>):<br /><div style={divStyle}><table style={{width:'300px'}}><tbody>{header}{children}</tbody></table></div><br /></div>);
return table;
}
}
@@ -160,7 +152,7 @@ function DisplayFixes(props) {
let result1 = <div></div>;
let str = "";
if ((props.totalFixes === 0) && (props.suspiciousCells.length === 0)) {
return <div style={notSuspiciousStyle}>No suspicious formulas or cells found in {props.sheetName}.<br /><br /></div>;
return <div style={notSuspiciousStyle}>Nothing suspicious found in {props.sheetName}.<br /><br /></div>;
}
if (props.totalFixes < 0) {
return <div></div>;
@@ -171,13 +163,13 @@ function DisplayFixes(props) {
return <div style={notSuspiciousStyle}><br />Nothing suspicious found in {props.sheetName}.</div>;
}
const table1 = makeTable(props.sheetName, props.themFixes, props.selector, props.currentFix, props.numFixes);
result1 = <div><br /><br />{table1}<br /></div>;
result1 = <div><br /><br />{table1}</div>;
let result2 = <div></div>;
const table2 = makeTableSuspiciousCells(props.sheetName, props.suspiciousCells, props.cellSelector, props.currentSuspiciousCell, props.suspiciousCells.length);
result2 = <div>{table2}</div>;
return <div>{result1}</div>;
// return <div>{result1}</div>;
// Temporarily disable display of suspicious cells while feature remains in development.
// return <div>{result1}{result2}</div>;
return <div>{result1}{result2}</div>;
}


@@ -99,8 +99,6 @@ var Colorize = /** @class */ (function () {
// It's a number. Add it.
var adjustedX = j + origin_col + 1;
var adjustedY = i + origin_row + 1;
// value_array.push([[adjustedX, adjustedY, 1], Colorize.distinguishedZeroHash]); // See comment at top of function declaration.
// value_array.push([[adjustedX, adjustedY, 1], cell]); // Colorize.distinguishedZeroHash]); // See comment at top of function declaration.
value_array.push([[adjustedX, adjustedY, 1], Colorize.distinguishedZeroHash]); // See comment at top of function declaration.
}
}
@@ -188,10 +186,11 @@ var Colorize = /** @class */ (function () {
// console.log("C) cols = " + rows + ", rows = " + cols + "; row = " + row + ", col = " + col);
var adjustedX = row - origin_row - 1;
var adjustedY = col - origin_col - 1;
var value = 12345;
var value = Number(Colorize.distinguishedZeroHash);
if (isConstant === 1) {
// That means it was a constant.
// Set to a fixed value (as above).
// value = ; // FIXME????
}
else {
value = Number(val);
@@ -202,100 +201,134 @@ var Colorize = /** @class */ (function () {
};
Colorize.stencilize = function (cols, rows, matrix) {
console.log("cols = " + cols + ", rows = " + rows);
// console.log("matrix = " + JSON.stringify(matrix));
var stencil = new Array(cols);
for (var i = 0; i < cols; i++) {
stencil[i] = new Array(rows).fill(0);
}
for (var i = 0; i < cols; i++) {
for (var j = 0; j < rows; j++) {
if (matrix[i][j] > 0) {
stencil[i][j] = matrix[i][j];
}
stencil[i][j] = 1; // FIXME: this is if we are counting total number of different objects. // matrix[i][j];
}
}
// Compute the stencil while omitting the edges and corners.
for (var i = 1; i < cols - 1; i++) {
for (var j = 1; j < rows - 1; j++) {
if (matrix[i][j] > 0) {
stencil[i][j] = matrix[i][j];
stencil[i][j] += matrix[i - 1][j - 1] + matrix[i - 1][j] + matrix[i - 1][j + 1];
stencil[i][j] += matrix[i][j - 1] + matrix[i][j + 1];
stencil[i][j] += matrix[i + 1][j - 1] + matrix[i + 1][j] + matrix[i + 1][j + 1];
var nonzeros = Number(matrix[i - 1][j - 1] > 0) +
Number(matrix[i - 1][j] > 0) +
Number(matrix[i - 1][j + 1] > 0) +
Number(matrix[i][j - 1] > 0) +
Number(matrix[i][j + 1] > 0) +
Number(matrix[i + 1][j - 1] > 0) +
Number(matrix[i + 1][j] > 0) +
Number(matrix[i + 1][j + 1] > 0);
stencil[i][j] /= (1 + nonzeros);
var _loop_1 = function (j) {
//if (matrix[i][j] !== 0) { // FIXME was >
// 3x3 window around the center.
console.log("i = " + i + ", j = " + j);
var win = [matrix[i - 1][j - 1], matrix[i][j - 1], matrix[i + 1][j - 1],
matrix[i - 1][j], matrix[i][j], matrix[i + 1][j],
matrix[i - 1][j + 1], matrix[i][j + 1], matrix[i + 1][j + 1]];
console.log(JSON.stringify(win));
var sum = win.reduce(function (total, a) { return total + a; });
var nonzeros = win.reduce(function (total, a) { if (Number(a) > 0) {
return total + 1;
}
else {
return total;
} });
if (nonzeros > 0) {
var mean_1 = sum / nonzeros;
var variance = win.reduce(function (total, a) { return total + (a - mean_1) * (a - mean_1); });
var counts_1 = {};
win.forEach(function (el) { return counts_1[el] = 1 + (counts_1[el] || 0); });
delete counts_1[0];
stencil[i][j] = Object.keys(counts_1).length; // variance; // mean;
stencil[i][j] = mean_1;
}
// Avoid math issues by rounding so we only use the first two significant digit past the decimal point.
stencil[i][j] = Math.round(stencil[i][j] * 100) / 100;
};
for (var j = 1; j < rows - 1; j++) {
_loop_1(j);
}
}
// console.log("Stencil = " + JSON.stringify(stencil));
return stencil;
};
Colorize.compute_stencil_probabilities = function (cols, rows, matrix) {
Colorize.compute_stencil_probabilities = function (cols, rows, stencil) {
var probs = new Array(cols);
for (var i = 0; i < cols; i++) {
probs[i] = new Array(rows).fill(0);
}
// Generate the counts.
var totalNonzeroes = 0;
var counts = {};
for (var i = 0; i < cols; i++) {
for (var j = 0; j < rows; j++) {
if (matrix[i][j] != 0) {
// console.log("************* found " + matrix[i][j] + " = " + counts[matrix[i][j]] + "!");
probs[i][j] += 1;
// if (stencil[i][j] != 0) {
// console.log("************* found " + stencil[i][j] + " = " + counts[stencil[i][j]] + "!");
counts[stencil[i][j]] = (counts[stencil[i][j]] + 1) || 1;
// probs[i][j] += stencil[i][j];
if (stencil[i][j] != 0) {
totalNonzeroes += 1;
}
// } else {
// counts[stencil[i][j]] = 0;
// }
}
}
console.log("counts = " + JSON.stringify(counts));
// console.log("**********************total non-zeroes = " + totalNonzeroes);
// Now iterate over the counts to compute probabilities.
for (var i = 0; i < cols; i++) {
for (var j = 0; j < rows; j++) {
probs[i][j] /= totalNonzeroes;
probs[i][j] = counts[stencil[i][j]] / totalNonzeroes;
}
}
// console.log("probs = " + JSON.stringify(probs));
var totalEntropy = 0;
var total = 0;
for (var i = 0; i < cols; i++) {
for (var j = 0; j < rows; j++) {
total += probs[i][j];
if (probs[i][j] > 0) {
totalEntropy += this.entropy(probs[i][j]);
if (stencil[i][j] > 0) {
total += counts[stencil[i][j]];
}
}
}
for (var i = 0; i < cols; i++) {
for (var j = 0; j < rows; j++) {
if (counts[stencil[i][j]] > 0) {
totalEntropy += this.entropy(counts[stencil[i][j]] / total);
}
}
}
var normalizedEntropy = totalEntropy / Math.log2(totalNonzeroes);
// Now discount the probabilities by weighing them by the normalized total entropy.
if (false) {
for (var i = 0; i < cols; i++) {
for (var j = 0; j < rows; j++) {
probs[i][j] *= normalizedEntropy;
// totalEntropy += this.entropy(probs[i][j]);
}
}
}
// let totalEntropy = probs.reduce((total, num) => Number(total) + Number(num), 0); // this.entropy(num); });
// console.log("new probs = " + JSON.stringify(probs));
console.log("total probability = " + total);
console.log("total entropy = " + totalEntropy);
console.log("normalized entropy = " + normalizedEntropy);
return probs;
};
Colorize.generate_suspicious_cells = function (cols, rows, origin_col, origin_row, matrix, probs, threshold) {
if (threshold === void 0) { threshold = 0.01; }
console.log("threshold = " + threshold);
var cells = [];
var sumValues = 0;
var countValues = 0;
for (var i = 0; i < cols; i++) {
for (var j = 0; j < rows; j++) {
var adjustedX = j + origin_col + 1;
var adjustedY = i + origin_row + 1;
console.log("examining " + i + " " + j + " = " + matrix[i][j] + " (" + adjustedX + ", " + adjustedY + ")");
if (probs[i][j] > 0) {
console.log("found one at " + i + " " + j + " = " + probs[i][j]);
sumValues += matrix[i][j];
countValues += 1;
if (probs[i][j] <= threshold) {
// console.log("Pushing " + i + ", " + j + " = " + probs[i][j] + ", threshold = " + threshold);
// cells.push([j+1, i+1, matrix[i][j]]); // 3rd = actual value
var adjustedX = j + origin_col + 1;
var adjustedY = i + origin_row + 1;
if (matrix[i][j] === 0) {
// Keep zeroes intact.
cells.push([adjustedX, adjustedY, "0"]); // 3rd = bogus hash for constants
}
else {
// console.log("value at [" + (adjustedX) + "][" + (adjustedY) + "] = " + matrix[i][j] + " -- " + probs[i][j]);
cells.push([adjustedX, adjustedY, Colorize.distinguishedZeroHash]); // 3rd = bogus hash for constants
if (matrix[i][j] != 0) {
// Never push an empty cell.
cells.push([adjustedX, adjustedY, probs[i][j]]);
}
}
}
@@ -304,6 +337,7 @@ var Colorize = /** @class */ (function () {
var avgValues = sumValues / countValues;
console.log("avg values = " + avgValues);
cells.sort(function (a, b) { return Math.abs(b[2] - avgValues) - Math.abs(a[2] - avgValues); });
// console.log("cells = " + JSON.stringify(cells));
return cells;
};
// Shannon entropy.
@@ -597,6 +631,7 @@ var Colorize = /** @class */ (function () {
// return this.Multiplier * (Math.sqrt(v0 + v1) + v2);
};
Colorize.reportingThreshold = 35; // percent of bar
Colorize.suspiciousCellsReportingThreshold = 80; // percent of bar
// Color-blind friendly color palette.
Colorize.palette = ["#ecaaae", "#74aff3", "#d8e9b2", "#deb1e0", "#9ec991", "#adbce9", "#e9c59a", "#71cdeb", "#bfbb8a", "#94d9df", "#91c7a8", "#b4efd3", "#80b6aa", "#9bd1c6"]; // removed "#73dad1",
// True iff this class been initialized.

0 comments on commit 9c1831a

Please sign in to comment.
You can’t perform that action at this time.