# Utility notebook to get the runs with most fields logged in a particular project

<a target="_blank" href="https://colab.research.google.com/github/neptune-ai/examples/blob/main/utils/management_tools/Get_runs_with_most_fields.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/>
</a><a target="_blank" href="https://github.com/neptune-ai/examples/blob/main/utils/management_tools/Get_runs_with_most_fields.ipynb">
  <img alt="Open in GitHub" src="https://img.shields.io/badge/Open_in_GitHub-blue?logo=github&labelColor=black">
</a>

In [None]:
%pip install -qq -U neptune tqdm

In [None]:
import os
import neptune
import pandas as pd
from tqdm.auto import tqdm
from typing import Optional
import logging

## Set Neptune API token

In [None]:
if "NEPTUNE_API_TOKEN" not in os.environ:
    from getpass import getpass

    os.environ["NEPTUNE_API_TOKEN"] = getpass("Enter the Neptune API token you wish to use: ")

## Enter project to scan

To find the full project name:

1. [Log in to Neptune](https://app.neptune.ai/).
1. Open the project settings and select **Details & privacy**.

For more help, see [Setting Neptune credentials](https://docs.neptune.ai/setup/setting_credentials) in the Neptune docs.

In [None]:
os.environ["NEPTUNE_PROJECT"] = input("Enter the project to scan in the format workspace/project: ")

## Get all the runs from the project

In [None]:
# Suppressing logs
logging.getLogger("neptune").setLevel(logging.CRITICAL)

In [None]:
runs_df = pd.DataFrame()

with neptune.init_project(mode="read-only") as proj:
    runs_df = proj.fetch_runs_table(
        columns=[],
        progress_bar=False,
    ).to_pandas()

## Fetch namespaces from all the runs

In [None]:
def flatten_namespaces(
    dictionary: dict, prefix: Optional[list] = None, result: Optional[list] = None
) -> list:
    if prefix is None:
        prefix = []
    if result is None:
        result = []

    for k, v in dictionary.items():
        if isinstance(v, dict):
            flatten_namespaces(v, prefix + [k], result)
        elif prefix_str := "/".join(prefix):
            result.append(f"{prefix_str}/{k}")
        else:
            result.append(k)
    return result

In [None]:
namespaces = []


for id in tqdm(runs_df["sys/id"].values, total=len(runs_df)):
    with neptune.init_run(with_id=id, mode="read-only") as run:
        namespaces.append(len(flatten_namespaces(run.get_structure())))

runs_df["namespaces"] = namespaces

## Runs with most fields logged

In [None]:
runs_df.sort_values(by="namespaces", ascending=False)