From e8b349c5843cec288c271aa5594e227ab3904c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dandelion=20Man=C3=A9?= Date: Sun, 16 Feb 2020 12:30:32 -0700 Subject: [PATCH] wip on credGraph and new pagerank --- src/core/algorithm/pagerank.js | 106 +++++++++++++++++++++++++++++++++ src/core/credGraph.js | 92 ++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 src/core/algorithm/pagerank.js create mode 100644 src/core/credGraph.js diff --git a/src/core/algorithm/pagerank.js b/src/core/algorithm/pagerank.js new file mode 100644 index 000000000..424cc4487 --- /dev/null +++ b/src/core/algorithm/pagerank.js @@ -0,0 +1,106 @@ +// @flow + +import deepFreeze from "deep-freeze"; +import {sum} from "d3-array"; +import * as NullUtil from "../../util/null"; +import { + Graph, + type NodeAddressT, + type Edge, + type Node, + NodeAddress, +} from "../graph"; +import * as WeightedGraph from "../weightedGraph"; +import {type WeightedGraph as WeightedGraphT} from "../weightedGraph"; +import {type Interval, partitionGraph} from "../interval"; +import { + nodeWeightEvaluator, + edgeWeightEvaluator, + type NodeWeightEvaluator, + type EdgeWeightEvaluator, +} from "./weightEvaluator"; +import {weightedDistribution} from "./nodeDistribution"; +import {type Distribution} from "./distribution"; +import { + createOrderedSparseMarkovChain, + createConnections, + distributionToNodeDistribution, +} from "./graphToMarkovChain"; +import { + findStationaryDistribution, + type PagerankParams, + type SparseMarkovChain, + type PagerankOptions as CorePagerankOptions, +} from "./markovChain"; + +export type PagerankOptions = {| + +maxIterations: number, + +convergenceThreshold: number, + +alpha: number, +|}; + +const DEFAULT_MAX_ITERATIONS = 255; +const DEFAULT_CONVERGENCE_THRESHOLD = 1e-7; +const DEFAULT_ALPHA = 0.05; + +export function defaultOptions( + overrides?: $Shape +): PagerankOptions { + const defaults = { + maxIterations: DEFAULT_MAX_ITERATIONS, + convergenceThreshold: DEFAULT_CONVERGENCE_THRESHOLD, + alpha: DEFAULT_ALPHA, + }; + return {...defaults, ...overrides}; +} + +export async function pagerank( + wg: WeightedGraphT, + scoringPrefixes: $ReadOnlyArray, + options: PagerankOptions +): Promise> { + const nodeEvaluator = nodeWeightEvaluator(wg.weights); + const edgeEvaluator = edgeWeightEvaluator(wg.weights); + const oldStyleEvaluator = (e: Edge) => edgeEvaluator(e.address); + const connections = createConnections(wg.graph, oldStyleEvaluator, 0); + const nodeOrder = wg.graph._getOrder().nodeOrder; + const osmc = createOrderedSparseMarkovChain(connections); + const nodeWeights = new Map(); + for (const addr of nodeOrder) { + nodeWeights.set(addr, nodeEvaluator(addr)); + } + const seed = weightedDistribution(nodeOrder, nodeWeights); + const params: PagerankParams = { + chain: osmc.chain, + alpha: options.alpha, + pi0: seed, + seed, + }; + const coreOptions: CorePagerankOptions = { + verbose: true, + convergenceThreshold: options.convergenceThreshold, + maxIterations: options.maxIterations, + yieldAfterMs: 30, + }; + const distributionResult = await findStationaryDistribution( + params, + coreOptions + ); + const pi = distributionToNodeDistribution(nodeOrder, distributionResult.pi); + let matchingScore = 0; + for (const [addr, score] of pi) { + if (scoringPrefixes.some((p) => NodeAddress.hasPrefix(addr, p))) { + matchingScore += score; + } + } + if (matchingScore === 0) { + throw new Error("no matching score"); + } + const totalCred = sum(nodeWeights.values()); + const f = totalCred / matchingScore; + const result = new Map(); + for (const [addr, score] of pi) { + result.set(addr, score * f); + } + return result; +} diff --git a/src/core/credGraph.js b/src/core/credGraph.js new file mode 100644 index 000000000..0e1373182 --- /dev/null +++ b/src/core/credGraph.js @@ -0,0 +1,92 @@ +// @flow + +import * as NullUtil from "../util/null"; +import { + type NodeAddressT, + Graph, + type Edge, + type Node, + EdgesOptions, +} from "./graph"; +import * as WeightedGraph from "./weightedGraph"; +import {type NodeWeight, type EdgeWeight} from "./weights"; +import { + nodeWeightEvaluator, + edgeWeightEvaluator, +} from "./algorithm/weightEvaluator"; +import {type WeightedGraph as WeightedGraphT} from "./weightedGraph"; +import { + type PagerankOptions, + pagerank, + defaultOptions, +} from "./algorithm/pagerank"; + +export type CredNode = {| + +node: Node, + +weight: NodeWeight, + +cred: number, +|}; + +export type CredEdge = {| + +edge: Edge, + +weight: EdgeWeight, + +normalizedWeight: EdgeWeight, + +credFlow: EdgeWeight, +|}; + +export type CredNeighbor = {| + +node: CredNode, + +edge: CredEdge, + +cred: number, +|}; + +export class CredGraph { + _wg: WeightedGraphT; + _options: PagerankOptions; + _scores: Map; + + constructor( + wg: WeightedGraphT, + options: PagerankOptions, + scores: Map + ) { + this._wg = wg; + this._options = options; + this._scores = scores; + } + + graph(): Graph { + return this._wg.graph; + } + + weightedGraph(): WeightedGraphT { + return this._wg; + } + + *nodes(options?: {|+prefix: NodeAddressT|}): Iterator { + const nwe = nodeWeightEvaluator(this.weightedGraph().weights); + for (const node of this.graph().nodes(options)) { + const cred = NullUtil.get(this._scores.get(node.address)); + const weight = nwe(node.address); + yield {node, cred, weight}; + } + } + + *edges(options: EdgesOptions): Iterator { + const ewe = edgeWeightEvaluator(this.weightedGraph().weights); + for (const edge of this.graph().edges(options)) { + const weight = ewe(edge.address); + yield {weight, edge}; + } + } + + static async compute( + wg: WeightedGraphT, + scoringPrefixes: $ReadOnlyArray, + options: $Shape + ): Promise { + options = defaultOptions(options); + const scores = await pagerank(wg, scoringPrefixes, options); + return new CredGraph(wg, options, scores); + } +}