Skip to content

Commit

Permalink
Merge pull request #17 from nbonfils/node-labels
Browse files Browse the repository at this point in the history
Add labels to nodes and normalize their size
  • Loading branch information
nbonfils committed Dec 7, 2023
2 parents e42e5f9 + 441c279 commit e396548
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,10 @@ <h4 x-text="field"></h4>
</template>
</div>
<button class="btn primary"
@click="
@click="
loading('visualization', 'Generating Graph...');
filteredData = await filterData(data, filters);
setTimeout(() => {
filteredData = filterData(data, filters);
graph = generateGraph(filteredData);
done();
}, 10);">
Expand Down
34 changes: 34 additions & 0 deletions src/lib/fetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,37 @@ export const fetchWorks = async (query, fromYear, toYear, maxWorks) => {

return {count, works};
};

export const fetchRefsLabels = async (openalexIds) => {
if (openalexIds.length === 0) return [];

const refsPerPage = 50;
const maxRefs = 500; // We'll only fetch the labels for the first maxRefs refs
const ids = [...openalexIds.slice(0, maxRefs)]; // Copy the array to prevent its destruction
const numReq = Math.ceil(ids.length / refsPerPage);

let refs = await Promise.all([...Array(numReq).keys()].map(async (i) => {
let data = {};
const idsStr = ids.splice(0, refsPerPage).join("|");

try {
const response = await fetch(
"https://api.openalex.org/works?" + new URLSearchParams({
filter: `openalex:${idsStr}`,
select: "id,display_name",
mailto: `****@****.com`,
"per-page": perPage,
page: 1,
}));
if (!response.ok) {
throw new Error("Network response was not OK");
}
data = await response.json();
} catch (e) {
console.error(`Error while fetching refs:\n\t${e}`);
}
return data.results;
}));

return refs.flat().filter(ref => ref);
};
15 changes: 9 additions & 6 deletions src/lib/graph.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,19 @@ export const fieldColors = {
export const fields = Object.keys(fieldColors);
export const metadataFields = fields.filter(field => field !== 'refs');

const maxRefNodeSize = 900;
const maxMetadataNodeSize = 2500;

export const generateGraph = (data) => {
let graph = new UndirectedGraph({ allowSelfLoops: false });

// Step 1: Create the map background (refs)

console.time('add refs nodes');
Object.entries(data['refs']).forEach(([id, {count}]) => {
Object.entries(data['refs']).forEach(([id, {count, label}]) => {
graph.addNode(id, {
label: id,
size: Math.sqrt(count),
label,
size: Math.sqrt(maxRefNodeSize * count / data.maxCounts.refs),
color: fieldColors['refs'],
count,
dataType: 'refs',
Expand Down Expand Up @@ -88,10 +91,10 @@ export const generateGraph = (data) => {
}

for (const field of metadataFields) {
for (const [id, {count}] of Object.entries(data[field])) {
for (const [id, {count, label}] of Object.entries(data[field])) {
graph.addNode(id, {
label: id,
size: Math.sqrt(count),
label,
size: Math.sqrt(maxMetadataNodeSize * count / data.maxCounts[field]),
color: fieldColors[field],
count,
dataType: field,
Expand Down
39 changes: 33 additions & 6 deletions src/lib/processing.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { fields, metadataFields } from './graph.js';
import { fetchRefsLabels } from './fetch.js';

const incOrCreate = (obj, key, subkey=false) => {
if (subkey) {
Expand All @@ -22,40 +23,50 @@ export const processWorks = (works) => {

work.referenced_works.forEach((ref) => {
incOrCreate(data.refs, ref, 'count');
data.refs[ref].label = ref;
});
data.sets.refs[work.id] = new Set(work.referenced_works);


// TODO think about using all the associated sources
if (work.primary_location?.source) {
incOrCreate(data.sources, work.primary_location.source.id, 'count');
data.sources[work.primary_location.source.id].label = work.primary_location.source.display_name;
data.sets.sources[work.id].add(work.primary_location.source.id);
}

const institutions = {}; // Save institions objects to retrieve the labels later
// Use Sets to count each country and institution mentionned ONLY ONCE per work
work.authorships.forEach((authorship) => {
incOrCreate(data.authors, authorship.author.id, 'count');
data.authors[authorship.author.id].label = authorship.author.display_name;
data.sets.authors[work.id].add(authorship.author.id);

authorship.countries.forEach((country) => {
data.sets.countries[work.id].add(country);
});
authorship.institutions.forEach((institution) => {
data.sets.institutions[work.id].add(institution.id);
institutions[institution.id] = institution;
});
});
data.sets.countries[work.id].forEach((country) => {
incOrCreate(data.countries, country, 'count');
data.countries[country].label = country;
});
data.sets.institutions[work.id].forEach((institution) => { // This is only the id!
incOrCreate(data.institutions, institution, 'count');
data.sets.institutions[work.id].forEach((institutionId) => { // This is only the id!
incOrCreate(data.institutions, institutionId, 'count');
data.institutions[institutionId].label = institutions[institutionId].display_name;
});

work.concepts.forEach((concept) => {
incOrCreate(data.concepts, concept.id, 'count');
data.concepts[concept.id].label = concept.display_name;
data.sets.concepts[work.id].add(concept.id);
});

work.grants.forEach((grant) => {
incOrCreate(data.funders, grant.funder, 'count');
data.funders[grant.funder].label = grant.funder_display_name;
data.sets.funders[work.id].add(grant.funder);
});
});
Expand Down Expand Up @@ -99,23 +110,39 @@ function intersection(setA, setB) {
return _intersection;
}

export const filterData = (data, filters) => {
export const filterData = async (data, filters) => {
const filteredData = {};
filteredData.sets = {};
filteredData.maxCounts = {};

console.time('filter');

// Filter the refs first to get refsSet and use it to filter later
const threshold = filters.refs.lowerBounds[filters.refs.value];
filteredData.refs = Object.fromEntries(Object.entries(data.refs).filter(([, {count}]) => count >= threshold));
const filteredRefs = Object.entries(data.refs)
.filter(([, {count}]) => count >= threshold)
.sort(([, {count: count1}], [, {count: count2}]) => count2 - count1); // Sort in reverse order, we want the top ones first
filteredData.refs = Object.fromEntries(filteredRefs);
const refsSet = new Set(Object.keys(filteredData.refs));
filteredData.sets.refs = Object.fromEntries(Object.entries(data.sets.refs).map(([id, fieldSet]) => [id, intersection(refsSet, fieldSet)]).filter(([, fieldSet]) => fieldSet.size > 0));

filteredData.maxCounts.refs = filteredRefs.reduce((acc, [, {count}]) => Math.max(acc, count), 0);

// Get the refs labels
console.time('label refs');
const refsLabels = await fetchRefsLabels(filteredRefs.map(([id,]) => id));
for (const {id, display_name} of refsLabels) {
filteredData.refs[id].label = display_name;
}
console.timeEnd('label refs');

metadataFields.forEach((field) => {
const threshold = filters[field].lowerBounds[filters[field].value];
filteredData[field] = Object.fromEntries(Object.entries(data[field]).filter(([, {count}]) => count >= threshold));
const wholeSet = new Set(Object.keys(filteredData[field]));
filteredData.sets[field] = Object.fromEntries(Object.entries(data.sets[field]).map(([id, fieldSet]) => [id, intersection(wholeSet, fieldSet)]).filter(([id, fieldSet]) => fieldSet.size > 0 && refsSet.has(id)));
filteredData.sets[field] = Object.fromEntries(Object.entries(data.sets[field]).map(([id, fieldSet]) => [id, intersection(wholeSet, fieldSet)]).filter(([id, fieldSet]) => fieldSet.size > 0 && filteredData.sets.refs[id]));

filteredData.maxCounts[field] = Object.values(filteredData[field]).reduce((acc, {count}) => Math.max(acc, count), 0);
});

console.timeEnd('filter');
Expand Down

0 comments on commit e396548

Please sign in to comment.