# Aggregating data with MDF

Searches using `Forge.search()` are limited to 10,000 results. However, there are two methods to circumvent this restriction: `Forge.aggregate_source()` and `Forge.aggregate()`.

In [1]:
import json
from mdf_forge.forge import Forge

In [2]:
mdf = Forge()

## Aggregate Source NIST XPS DB

In [3]:
all_entries = mdf.aggregate_source("nist_xps_db")
print(len(all_entries))

29189


In [4]:
qualities = {}
for record in all_entries:
    if record["mdf"]["resource_type"] == "record":
        raw = json.loads(record["mdf"]["raw"])
        if raw["Quality of Data"] in qualities.keys():
            qualities[raw["Quality of Data"]] += 1
        else:
            qualities[raw["Quality of Data"]] = 1
print(qualities)

{'Adequate': 11630, '': 15940, 'Good': 1615, 'good': 4}


## Aggregate query across multiple datasets

In [5]:
all_results = mdf.aggregate("mdf.elements:Ga")
print(len(all_results))

100%|██████████| 25299/25299 [01:03<00:00, 342.95it/s]

25299





In [6]:
elements = {}
for record in all_results:
    if record["mdf"]["resource_type"] == "record":
        elems = record["mdf"]["elements"]
        for elem in elems:
            if elem in elements.keys():
                elements[elem] += 1
            else:
                elements[elem] = 1
print(json.dumps(elements, sort_keys=True, indent=4, separators=(',', ': ')))

{
    "Ac": 625,
    "Ag": 548,
    "Al": 555,
    "Ar": 2,
    "As": 1252,
    "Au": 588,
    "B": 536,
    "Ba": 671,
    "Be": 496,
    "Bi": 547,
    "Br": 53,
    "C": 122,
    "Ca": 617,
    "Cd": 563,
    "Ce": 597,
    "Cl": 89,
    "Co": 874,
    "Cr": 679,
    "Cs": 501,
    "Cu": 660,
    "Dy": 576,
    "Er": 637,
    "Eu": 560,
    "F": 113,
    "Fe": 647,
    "Ga": 25299,
    "Gd": 575,
    "Ge": 646,
    "H": 205,
    "Hf": 630,
    "Hg": 524,
    "Ho": 567,
    "I": 56,
    "In": 579,
    "Ir": 541,
    "K": 584,
    "La": 803,
    "Li": 854,
    "Lu": 512,
    "Mg": 932,
    "Mn": 609,
    "Mo": 569,
    "N": 172,
    "Na": 721,
    "Nb": 536,
    "Nd": 573,
    "Ni": 722,
    "Np": 501,
    "O": 2634,
    "Os": 663,
    "P": 175,
    "Pa": 565,
    "Pb": 517,
    "Pd": 601,
    "Pm": 618,
    "Pr": 691,
    "Pt": 698,
    "Pu": 526,
    "Rb": 545,
    "Re": 494,
    "Rh": 551,
    "Ru": 531,
    "S": 191,
    "Sb": 566,
    "Sc": 680,
    "Se": 174,
    "Si": 778,
    