Skip to content

Commit

Permalink
better derive of categoricals
Browse files Browse the repository at this point in the history
  • Loading branch information
sgratzl committed Nov 23, 2018
1 parent 7d28ee0 commit dffcb48
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 10 deletions.
8 changes: 5 additions & 3 deletions src/model/internal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {LazyBoxPlotData} from '../internal';
import {IOrderedGroup} from './Group';
import {IDataRow, IGroup, IGroupParent} from './interfaces';
import INumberColumn, {numberCompare} from './INumberColumn';
import {schemeCategory10, schemeSet3} from 'd3-scale-chromatic';


/** @internal */
Expand Down Expand Up @@ -74,9 +75,10 @@ export function unifyParents<T extends IOrderedGroup>(groups: T[]) {
}

// based on https://github.com/d3/d3-scale-chromatic#d3-scale-chromatic
const schemeCategory10 = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'];
const set3 = ['#8dd3c7', '#ffffb3', '#bebada', '#fb8072', '#80b1d3', '#fdb462', '#b3de69', '#fccde5', '#d9d9d9', '#bc80bd', '#ccebc5', '#ffed6f'];
const colors = schemeCategory10.concat(set3);
const colors = schemeCategory10.concat(schemeSet3);

/** @internal */
export const MAX_COLORS = colors.length;

/** @internal */
export function colorPool() {
Expand Down
31 changes: 24 additions & 7 deletions src/provider/utils.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import {extent} from 'd3-array';
import {isNumberColumn, isSupportType, isMapAbleColumn} from '../model';
import Column, {IColumnDesc} from '../model/Column';
import {colorPool} from '../model/internal';
import {colorPool, MAX_COLORS} from '../model/internal';
import Ranking from '../model/Ranking';


export interface IDeriveOptions {
/**
* maximal percentage of unique values to be treated as a categorical column
*/
categoricalThreshold: number | ((size: number) => number);
categoricalThreshold: number | ((unique: number, total: number) => boolean);

columns: string[];
}
Expand All @@ -29,6 +29,15 @@ export function cleanCategories(categories: Set<string>) {
return Array.from(categories).map(String).sort();
}

function hasDifferentSizes(data: any[][]) {
if (data.length === 0) {
return false;
}
const base = data[0].length;

return data.some((d) => d != null && base !== (Array.isArray(d) ? d.length : -1));
}

function deriveType(label: string, value: any, column: number | string, data: any[], options: IDeriveOptions): IColumnDesc {
const base: any = {
type: 'string',
Expand All @@ -48,11 +57,15 @@ function deriveType(label: string, value: any, column: number | string, data: an
base.type = 'boolean';
return base;
}
const threshold = typeof options.categoricalThreshold === 'function' ? options.categoricalThreshold(data.length) : options.categoricalThreshold;
const treatAsCategorical = typeof options.categoricalThreshold === 'function' ? options.categoricalThreshold : (u: number, t: number) => u < t * (<number>options.categoricalThreshold);

if (typeof value === 'string') {
//maybe a date string
// TODO

//maybe a categorical
const categories = new Set(data.map((d) => d[column]));
if (categories.size < data.length * threshold) { // 70% unique guess categorical
if (treatAsCategorical(categories.size, data.length)) {
base.type = 'categorical';
base.categories = cleanCategories(categories);
}
Expand All @@ -76,10 +89,14 @@ function deriveType(label: string, value: any, column: number | string, data: an
return base;
}
if (typeof value === 'string') {
//maybe a date string
// TODO

//maybe a categorical
const categories = new Set((<string[]>[]).concat(...data.map((d) => d[column])));
if (categories.size < data.length * threshold) { // 70% unique guess categorical
base.type = 'categoricals';
if (treatAsCategorical(categories.size, data.length)) {
//
base.type = hasDifferentSizes(data) ? 'set' : 'categoricals';
base.categories = cleanCategories(categories);
}
return base;
Expand All @@ -92,7 +109,7 @@ function deriveType(label: string, value: any, column: number | string, data: an

export function deriveColumnDescriptions(data: any[], options: Partial<IDeriveOptions> = {}) {
const config = Object.assign({
categoricalThreshold: 0.7,
categoricalThreshold: (u: number, n: number) => u <= MAX_COLORS && u < n * 0.7, //70% unique and less equal to 22 categories
columns: []
}, options);
const r: IColumnDesc[] = [];
Expand Down

0 comments on commit dffcb48

Please sign in to comment.