/
timelineCred.js
281 lines (260 loc) · 8.74 KB
/
timelineCred.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
// @flow
import {sum} from "d3-array";
import sortBy from "lodash.sortby";
import * as NullUtil from "../../util/null";
import * as MapUtil from "../../util/map";
import {toCompat, fromCompat, type Compatible} from "../../util/compat";
import {type Interval} from "./interval";
import {timelinePagerank} from "./timelinePagerank";
import {distributionToCred} from "./distributionToCred";
import {type PluginDeclaration, combineTypes} from "../pluginDeclaration";
import {
Graph,
type GraphJSON,
type NodeAddressT,
NodeAddress,
type Node,
} from "../../core/graph";
import {
type TimelineCredParameters,
paramsToJSON,
paramsFromJSON,
type TimelineCredParametersJSON,
type PartialTimelineCredParameters,
partialParams,
defaultParams,
} from "./params";
export type {Interval} from "./interval";
/**
* A Graph Node wrapped with cred information.
*/
export type CredNode = {|
// The Graph Node in question
+node: Node,
// The total aggregated cred. (Summed across every interval).
+total: number,
// The timeline sequence of cred (one score per interval).
+cred: $ReadOnlyArray<number>,
|};
/**
* Represents the timeline cred of a graph. This class wraps all the data
* needed to analyze and interpet cred (ie. it has the Graph and the cred
* scores), and provides convenient view methods for accessing the cred.
*
* The TimelineCred also has the params and config. The intention is that this
* is a "one stop shop" for serializing SourceCred results.
*/
export class TimelineCred {
_graph: Graph;
_intervals: $ReadOnlyArray<Interval>;
_addressToCred: Map<NodeAddressT, $ReadOnlyArray<number>>;
_params: TimelineCredParameters;
_plugins: $ReadOnlyArray<PluginDeclaration>;
constructor(
graph: Graph,
intervals: $ReadOnlyArray<Interval>,
addressToCred: Map<NodeAddressT, $ReadOnlyArray<number>>,
params: TimelineCredParameters,
plugins: $ReadOnlyArray<PluginDeclaration>
) {
this._graph = graph;
this._intervals = intervals;
this._addressToCred = addressToCred;
this._params = params;
this._plugins = plugins;
}
graph(): Graph {
return this._graph;
}
params(): TimelineCredParameters {
return this._params;
}
plugins(): $ReadOnlyArray<PluginDeclaration> {
return this._plugins;
}
/**
* Creates a new TimelineCred based on the new Parameters.
* Holds the graph and config constant.
*
* This returns a new TimelineCred; it does not modify the existing one.
*/
async reanalyze(
newParams: PartialTimelineCredParameters
): Promise<TimelineCred> {
return await TimelineCred.compute({
graph: this._graph,
params: newParams,
plugins: this._plugins,
});
}
/**
* Return all the intervals in the timeline.
*/
intervals(): $ReadOnlyArray<Interval> {
return this._intervals;
}
/**
* Get the CredNode for a given NodeAddress.
*
* Returns undefined if the node is not in the filtered results.
*
* Note that it's possible that the node is present in the Graph, but not the
* filtered results; if so, it will return undefined.
*/
credNode(a: NodeAddressT): ?CredNode {
const cred = this._addressToCred.get(a);
if (cred === undefined) {
return undefined;
}
const total = sum(cred);
const node = NullUtil.get(this._graph.node(a));
return {cred, total, node};
}
/**
* Returns nodes sorted by their total cred (descending).
*
* If prefixes is provided, then only nodes matching at least one of the provided
* address prefixes will be included.
*/
credSortedNodes(
prefixes?: $ReadOnlyArray<NodeAddressT>
): $ReadOnlyArray<CredNode> {
let addresses = Array.from(this._addressToCred.keys());
if (prefixes != null) {
const match = (a) => prefixes.some((p) => NodeAddress.hasPrefix(a, p));
addresses = addresses.filter(match);
}
const credNodes = addresses.map((a) => this.credNode(a));
return sortBy(credNodes, (x: CredNode) => -x.total);
}
/**
* Returns all user-typed nodes, sorted by their total cred (descending).
*
* A node is considered a user-type node if its address has a prefix match
* with a type specified as a user type by one of the plugin declarations.
*/
userNodes(): $ReadOnlyArray<CredNode> {
const userTypes = [].concat(...this.plugins().map((p) => p.userTypes));
return this.credSortedNodes(userTypes.map((x) => x.prefix));
}
/**
* Create a new, filtered TimelineCred, by removing low-scored nodes.
*
* Cred Graphs may have a huge number of small contributions, like comments,
* in which end users are not particularly interested. However, the size of
* the TimelineCred offered to the frontend matters quite a bit. Therefore,
* we can use this method to discard almost all nodes in the graph.
*
* Specifically, `reduceSize` takes in an array of inclusion prefixes: for
* each inclusion prefix, we will take the top `k` nodes matching that prefix
* (by total score across all intervals).
*
* It also takes `fullInclusion` prefixes: for these prefixes, every matching
* node will be included. This allows us to ensure that e.g. every user will
* be included in the `cli scores` output, even if they are not in the top
* `k` users.
*/
reduceSize(opts: {|
+typePrefixes: $ReadOnlyArray<NodeAddressT>,
+nodesPerType: number,
+fullInclusionPrefixes: $ReadOnlyArray<NodeAddressT>,
|}): TimelineCred {
const {typePrefixes, nodesPerType, fullInclusionPrefixes} = opts;
const selectedNodes: Set<NodeAddressT> = new Set();
for (const prefix of typePrefixes) {
const matchingNodes = this.credSortedNodes([prefix]).slice(
0,
nodesPerType
);
for (const {node} of matchingNodes) {
selectedNodes.add(node.address);
}
}
// For the fullInclusionPrefixes, we won't slice -- we just take every match.
const matchingNodes = this.credSortedNodes(fullInclusionPrefixes);
for (const {node} of matchingNodes) {
selectedNodes.add(node.address);
}
const filteredAddressToCred = new Map();
for (const address of selectedNodes) {
const cred = NullUtil.get(this._addressToCred.get(address));
filteredAddressToCred.set(address, cred);
}
return new TimelineCred(
this._graph,
this._intervals,
filteredAddressToCred,
this._params,
this._plugins
);
}
toJSON(): TimelineCredJSON {
const rawJSON = {
graphJSON: this._graph.toJSON(),
intervalsJSON: this._intervals,
credJSON: MapUtil.toObject(this._addressToCred),
paramsJSON: paramsToJSON(this._params),
pluginsJSON: this._plugins,
};
return toCompat(COMPAT_INFO, rawJSON);
}
static fromJSON(j: TimelineCredJSON): TimelineCred {
const json = fromCompat(COMPAT_INFO, j);
const {graphJSON, intervalsJSON, credJSON, paramsJSON, pluginsJSON} = json;
const cred = MapUtil.fromObject(credJSON);
const graph = Graph.fromJSON(graphJSON);
const params = paramsFromJSON(paramsJSON);
return new TimelineCred(graph, intervalsJSON, cred, params, pluginsJSON);
}
static async compute(opts: {|
graph: Graph,
params?: PartialTimelineCredParameters,
plugins: $ReadOnlyArray<PluginDeclaration>,
|}): Promise<TimelineCred> {
const {graph, params, plugins} = opts;
const fullParams = params == null ? defaultParams() : partialParams(params);
const nodeOrder = Array.from(graph.nodes()).map((x) => x.address);
const types = combineTypes(plugins);
const userTypes = [].concat(...plugins.map((x) => x.userTypes));
const scorePrefixes = userTypes.map((x) => x.prefix);
const distribution = await timelinePagerank(
graph,
types,
fullParams.weights,
fullParams.intervalDecay,
fullParams.alpha
);
const cred = distributionToCred(
distribution,
nodeOrder,
userTypes.map((x) => x.prefix)
);
const addressToCred = new Map();
for (let i = 0; i < nodeOrder.length; i++) {
const addr = nodeOrder[i];
const addrCred = cred.map(({cred}) => cred[i]);
addressToCred.set(addr, addrCred);
}
const intervals = cred.map((x) => x.interval);
const preliminaryCred = new TimelineCred(
graph,
intervals,
addressToCred,
fullParams,
plugins
);
return preliminaryCred.reduceSize({
typePrefixes: types.nodeTypes.map((x) => x.prefix),
nodesPerType: 100,
fullInclusionPrefixes: scorePrefixes,
});
}
}
const COMPAT_INFO = {type: "sourcecred/timelineCred", version: "0.5.0"};
export opaque type TimelineCredJSON = Compatible<{|
+graphJSON: GraphJSON,
+paramsJSON: TimelineCredParametersJSON,
+pluginsJSON: $ReadOnlyArray<PluginDeclaration>,
+credJSON: {[string]: $ReadOnlyArray<number>},
+intervalsJSON: $ReadOnlyArray<Interval>,
|}>;