-
Notifications
You must be signed in to change notification settings - Fork 15
/
index.tsx
293 lines (265 loc) · 10.6 KB
/
index.tsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import React from 'react'
import { MetaNode } from '@/spec/metanode'
import { GeneTerm } from '@/components/core/term'
import { GeneSet } from '@/components/core/set'
import { GraphPlot } from '@/components/viz/graph'
import { metabolomicsworkbench_icon } from '@/icons'
import { z } from 'zod'
import { additional_info_icon, gene_icon } from '@/icons'
import * as array from '@/utils/array'
import { GetGeneSetIDConv } from '@/components/MW/ConvertedGeneID'
// How the schema validation works: https://codex.so/zod-validation-en
// StringDB PPI edge schema information: FROM: https://string-db.org/cgi/help.pl?subpage=api%23getting-all-the-string-interaction-partners-of-the-protein-set
/*
Output fields (TSV and JSON formats):
Field Description
stringId_A STRING identifier (protein A)
stringId_B STRING identifier (protein B)
preferredName_A common protein name (protein A)
preferredName_B common protein name (protein B)
ncbiTaxonId NCBI taxon identifier
score combined score
nscore gene neighborhood score
fscore gene fusion score
pscore phylogenetic profile score
ascore coexpression score
escore experimental score
dscore database score
tscore textmining score
*/
export const StringDBedgeC = z.object({
stringId_A: z.string().optional(),
stringId_B: z.string().optional(),
preferredName_A: z.string(),
preferredName_B: z.string(),
ncbiTaxonId: z.number(),
score: z.number().optional(),
nscore: z.number().optional(),
fscore: z.number().optional(),
pscore: z.number().optional(),
ascore: z.number().optional(),
escore: z.number().optional(),
dscore: z.number().optional(),
tscore: z.number().optional()
})
export const MyedgeC = z.object({
SYMBOL_A: z.string(),
SYMBOL_B: z.string(),
score: z.number().optional()
})
export type StringDBedge = z.infer<typeof StringDBedgeC>
export type Myedge = z.infer<typeof MyedgeC>
// important ref: https://rsinohara.github.io/json-to-zod-react/
export const StringDBedgeArrayC = z.array(
StringDBedgeC
)
export const MyedgeArrayC = z.array(
MyedgeC
)
export type StringDBedgeArray = z.infer<typeof StringDBedgeArrayC>
export type MyedgeArray = z.infer<typeof MyedgeArrayC>
export const StringDB_PPI_NetworkDataC = z.object({
species_txid: z.string(),
species_id: z.string(),
edges: StringDBedgeArrayC
})
export type StringDB_PPI_NetworkData = z.infer<typeof StringDB_PPI_NetworkDataC>
export function SimplifyStringDBedgeArray(data:StringDBedgeArray) {
// Given the list of edges in standard StringDB PPI format, extract only three columns:
// preferredName_A [call SYMBOL_A], preferredName_B [call SYMBOL_B] and score.
let edges : MyedgeArray = data.map(d => {
return {
SYMBOL_A : d.preferredName_A,
SYMBOL_B : d.preferredName_B,
score : d.score,
};
});
return edges;
}
export function SimplifyStringDBedge(d:StringDBedge) {
// Given the edge in standard StringDB PPI format, extract only three columns:
// preferredName_A [call SYMBOL_A], preferredName_B [call SYMBOL_B] and score.
let edge: Myedge = { SYMBOL_A : d.preferredName_A, SYMBOL_B : d.preferredName_B, score : d.score};
return edge;
}
export function GetAllNodes_from_StringDBedgeArray(data:StringDBedgeArray) {
// Given the list of edges in standard StringDB PPI format, extract the array of all nodes (unique)
return array.unique(data.flatMap(a => [a.preferredName_A, a.preferredName_B]));
}
export function GetAllNodes_from_MyedgeArray(data:MyedgeArray) {
// Given the list of edges in MyedgeArray, extract the array of all nodes (unique)
return array.unique(data.flatMap(a => [a.SYMBOL_A, a.SYMBOL_B]));
}
export async function Format_StringDBedgeArray_for_GraphPlot(data:StringDBedgeArray, species_id:string = "hsa", geneid_type:string = "SYMBOL_OR_ALIAS") {
// Need to use async since GetGeneSetIDConv is an async function
// Given the list of edges in standard StringDB PPI format, get an object in the codec format for network plot:
// https://github.com/MaayanLab/Playbook-Workflow-Builder/blob/network-viz/components/viz/graph/index.tsx
/*
.codec(z.object({
nodes: z.array(z.object({
id: z.string(),
label: z.string().optional(),
type: z.string(),
})),
edges: z.array(z.object({
source: z.string(),
target: z.string(),
})),
}))
*/
let allnodes = GetAllNodes_from_StringDBedgeArray(data);
let allnodes_more = await GetGeneSetIDConv(allnodes, species_id, geneid_type); // GetGeneSetIDConv is async function, so, await is needed
let nodes_array = allnodes_more.map((d,i) => {
let ezid = d.ENTREZID;
return {
id : d.SYMBOL,
label : d.SYMBOL, // allnodes_more[i].GENENAME, // d, // need to cross-check order if d goes with allnodes_more[i]
link: `https://www.ncbi.nlm.nih.gov/gene/${ezid}`,
hovertext: d.GENENAME,
type : (d.SYMBOL==allnodes[0]) ? "gene1" : "gene",
};
});
let edges_array = data.map(d => {
return {
source : d.preferredName_A,
target : d.preferredName_B,
};
});
return {nodes: nodes_array, edges: edges_array};
}
// A unique name for your data type is used here
export const StringDB_PPI_Network = MetaNode('StringDB_PPI_Network')
// Human readble descriptors about this node should go here
.meta({
label: 'StringDB PPI network',
description: 'StringDB PPI network',
icon: [gene_icon, additional_info_icon],
})
// this should have a codec which can encode or decode the data type represented by this node
// using zod, a compile-time and runtime type-safe codec can be constructed
.codec(StringDB_PPI_NetworkDataC)
// react component rendering your data goes here
.view(data => {
const dataobj = data.edges; //const dataobj = [data][0];
return(
<div className="prose max-w-none">
<h3>Species: {data.species_id}</h3>
<h3>Taxonomy ID: {data.species_txid}</h3>
<table>
<tr>
<th>SYMBOL A</th>
<th>SYMBOL B</th>
<th>Score</th>
</tr>
{dataobj.map((val, key) => {
return (
<tr key={key}>
<td>{val.preferredName_A}</td>
<td>{val.preferredName_B}</td>
<td>{val.score}</td>
</tr>
)
})}
</table></div>
)
})
.build()
// A unique name for your resolver is used here
export const FetchStringDBPPI = MetaNode('FetchStringDBPPI')
// Human readble descriptors about this node should go here
.meta({
label: 'Fetch StringDB PPI',
description: 'Given a gene or gene set (SYMBOL), extract PPI using StringDB APIs',
icon: [metabolomicsworkbench_icon],
})
// This should be a mapping from argument name to argument type
// the types are previously defined Meta Node Data Types
.inputs({ gene: GeneTerm })
// This should be a single Meta Node Data Type
.output(StringDB_PPI_Network)
// The resolve function uses the inputs and returns output
// both in the shape prescribed by the data type codecs
.resolve(async (props) => {
// https://string-db.org/help/api/
// Based on the API at: https://string-db.org/cgi/help.pl?subpage=api%23getting-all-the-string-interaction-partners-of-the-protein-set
// See also: get, post, fetch, request: https://medium.com/meta-box/how-to-send-get-and-post-requests-with-javascript-fetch-api-d0685b7ee6ed
const species_txid = "9606"
const species_id = "hsa"
const string_api_url = "https://version-11-5.string-db.org/api"
const output_format = "json"
const method = "interaction_partners"
const request_url_base = [string_api_url, output_format, method].join("/");
const my_genes = [props.inputs.gene];
// Try with the gene RPE as its PPI with 900 cut off is not that large and all those genes
// don't have too many metabolites associated with them.
const params = {
identifiers : my_genes.join("%0d"), // your protein
species : species_txid, // species NCBI identifier
limit : "5000",
required_score : "900",
caller_identity : "sc-cfdewebdev.sdsc.edu" // your app name
}
const params_str = "identifiers=" + params.identifiers + "&" +
"species=" + params.species + "&" +
"limit=" + params.limit + "&" +
"required_score=" + params.required_score + "&" +
"caller_identity=" + params.caller_identity;
const req = await fetch(`${request_url_base}?${params_str}`);
const res = await req.json()
//return res
let PPIobj = {species_id: species_id, species_txid: species_txid, edges:res};
return PPIobj
})
.story(props =>
`For the given gene ID (SYMBOL), StringDB PPI was extracted using their API [\\ref{STRING api, https://string-db.org/cgi/help.pl?subpage=api%23getting-all-the-string-interaction-partners-of-the-protein-set}].`
)
.build()
// A unique name for your resolver is used here
export const StringDBPPI_to_GeneSet = MetaNode('StringDBPPI_to_GeneSet')
// Human readble descriptors about this node should go here
.meta({
label: 'Given StringDB PPI, generate the list of nodes (GeneSet)',
description: 'Given StringDB PPI, generate the list of nodes (GeneSet)',
icon: [metabolomicsworkbench_icon],
})
// This should be a mapping from argument name to argument type
// the types are previously defined Meta Node Data Types
.inputs({ data: StringDB_PPI_Network })
// This should be a single Meta Node Data Type
.output(GeneSet)
// The resolve function uses the inputs and returns output
// both in the shape prescribed by the data type codecs
.resolve(async (props) => {
const allnodes = GetAllNodes_from_StringDBedgeArray(props.inputs.data.edges);
return {"description": "", "set": allnodes} ;
})
.story(props =>
`For the Given StringDB PPI, the list of nodes (GeneSet) is generated.`
)
.build()
// A unique name for your resolver is used here
export const StringDBPPI_to_GraphPlot = MetaNode('StringDBPPI_to_GraphPlot')
// Human readble descriptors about this node should go here
.meta({
label: 'Reformat StringDB PPI for plotting',
description: 'Given StringDB PPI, reformat for plotting',
icon: [metabolomicsworkbench_icon],
})
// This should be a mapping from argument name to argument type
// the types are previously defined Meta Node Data Types
.inputs({ data: StringDB_PPI_Network })
// This should be a single Meta Node Data Type
.output(GraphPlot)
// The resolve function uses the inputs and returns output
// both in the shape prescribed by the data type codecs
.resolve(async (props) => {
let species_id = props.inputs.data.species_id;
let species_txid = props.inputs.data.species_txid;
let edges = props.inputs.data.edges;
const GraphPlotObj = Format_StringDBedgeArray_for_GraphPlot(edges, species_id);
return GraphPlotObj;
})
.story(props =>
`For the Given StringDB PPI, the list of nodes (Gene Set) is generated.`
)
.build()