# Scraping Data Rescue Project
github: https://github.com/datarescueproject/portal/

LAST UPDATE: 25/01/2026

In [None]:
# Imports
import re
import requests
import yaml
import pandas as pd
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright
from urllib.parse import urlparse
from urllib.parse import unquote
import time


pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)



Getting info from MD files

In [2]:
owner = "datarescueproject"
repo = "portal"
repo_url = f"https://github.com/datarescueproject/portal/"
folder = "_datasets/"
branch = 'main'

In [3]:
ref = requests.get(f"https://api.github.com/repos/datarescueproject/portal/git/refs/heads/{branch}").json()
commit_sha = ref['object']['sha']
tree_sha = requests.get(f"https://api.github.com/repos/datarescueproject/portal/git/commits/{commit_sha}").json()["tree"]["sha"]
tree = requests.get(f"https://api.github.com/repos/datarescueproject/portal/git/trees/{tree_sha}", params={"recursive":"1"}).json()["tree"]

In [4]:
md_files = []
for item in tree:
    if item["type"] != "blob":
        continue
    if not item["path"].startswith(folder):
        continue
    if not item["path"].lower().endswith(".md"):
        continue
    md_files.append(item["path"])

md_files = sorted(md_files)
print(len(md_files))

1978


In [6]:
frontmatter_regex = re.compile(
    r'^\s*---\s*\n(.*?)\n---\s*\n?',
    re.DOTALL
)

projects = []
resources = []

for path in md_files:
    raw_url = f'https://raw.githubusercontent.com/datarescueproject/portal/main/{path}'
    markdown = requests.get(raw_url).text

    match = frontmatter_regex.match(markdown)
    if not match:
        frontmatter = {}
    else:
        frontmatter = yaml.safe_load(match.group(1)) or {}

    if not isinstance(frontmatter, dict):
        frontmatter = {}

    project_row = {
        'file': path,
        'schema': frontmatter.get('schema'),
        'title': frontmatter.get('title'),
        'organization': frontmatter.get('organization'),
        'agency': frontmatter.get('agency'),
        'websites': frontmatter.get('websites'),
        'data_source': frontmatter.get('data_source'),
        'description': frontmatter.get('description'),
        'last_modified': frontmatter.get('last_modified'),
        'metadata_available': frontmatter.get('metadata_available'),
        'metadata_url': frontmatter.get('metadata_url'),
    }

    categories = frontmatter.get('category', [])
    if isinstance(categories, list):
        project_row['category'] = '; '.join(map(str, categories))
    elif categories:
        project_row['category'] = str(categories)
    else:
        project_row['category'] = ''

    resource_list = frontmatter.get('resources', [])
    if not isinstance(resource_list, list):
        resource_list = []

    project_row['resource_count'] = len(resource_list)
    projects.append(project_row)

    for res in resource_list:
        if not isinstance(res, dict):
            continue

        resources.append({
            'file': path,
            'project_title': frontmatter.get('title'),
            'id': res.get('id'),
            'url': res.get('url'),
            'format': res.get('format'),
            'status': res.get('status'),
            'size': res.get('size'),
            'download_date': res.get('download_date'),
            'maintainer': res.get('maintainer'),
            'notes': res.get('notes'),
        })

projects_df = pd.DataFrame(projects)
resources_df = pd.DataFrame(resources)

projects_df.head()

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count
0,_datasets/10j-injunctions.md,data_rescue_project,10(j) Injunctions,National Labor Relations Board,National Labor Relations Board,nlrb.gov,https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions,,2025-04-20,True,,Labor & Employment,1
1,_datasets/2002-2024-mbda-grantees.md,data_rescue_project,2002-2024 MBDA Grantees,Minority Business Development Agency,Department of Commerce,mbda.gov,https://www.mbda.gov/research/data/mbda-grantees,,2025-04-02,False,,Business & Economy,1
2,_datasets/2006-iur-public-database.md,data_rescue_project,2006 IUR Public Database,Environmental Protection Agency,Environmental Protection Agency,epa.gov,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,,2025-09-18,True,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,Climate & Environment,1
3,_datasets/2014-minority-veteran-report.md,data_rescue_project,2014 Minority Veteran Report,Office of Information and Technology - IT Operations and Services (ITOPS),Department of Veterans Affairs,data.va.gov,https://www.data.va.gov/stories/s/gavm-n6bm,,2025-03-17,False,,Science & Research; Military & Veterans Affairs,1
4,_datasets/2016-americorps-mes-americorps-member-exit-survey.md,data_rescue_project,2016 AmeriCorps MES AmeriCorps Member Exit Survey,AmeriCorps,AmeriCorps,data.americorps.gov,https://data.americorps.gov/National-Service/2016-AmeriCorps-MES-AmeriCorps-Member-Exit-Survey/wqhv-fm5d/about_data,,2025-03-10,False,,Humanitarian & Disaster Relief; Military & Veterans Affairs,1


In [7]:
projects_df.to_csv('datarescuetracker.csv')
resources_df.to_csv('resources.csv')

In [8]:
projects_df

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count
0,_datasets/10j-injunctions.md,data_rescue_project,10(j) Injunctions,National Labor Relations Board,National Labor Relations Board,nlrb.gov,https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions,,2025-04-20,True,,Labor & Employment,1
1,_datasets/2002-2024-mbda-grantees.md,data_rescue_project,2002-2024 MBDA Grantees,Minority Business Development Agency,Department of Commerce,mbda.gov,https://www.mbda.gov/research/data/mbda-grantees,,2025-04-02,False,,Business & Economy,1
2,_datasets/2006-iur-public-database.md,data_rescue_project,2006 IUR Public Database,Environmental Protection Agency,Environmental Protection Agency,epa.gov,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,,2025-09-18,True,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,Climate & Environment,1
3,_datasets/2014-minority-veteran-report.md,data_rescue_project,2014 Minority Veteran Report,Office of Information and Technology - IT Operations and Services (ITOPS),Department of Veterans Affairs,data.va.gov,https://www.data.va.gov/stories/s/gavm-n6bm,,2025-03-17,False,,Science & Research; Military & Veterans Affairs,1
4,_datasets/2016-americorps-mes-americorps-member-exit-survey.md,data_rescue_project,2016 AmeriCorps MES AmeriCorps Member Exit Survey,AmeriCorps,AmeriCorps,data.americorps.gov,https://data.americorps.gov/National-Service/2016-AmeriCorps-MES-AmeriCorps-Member-Exit-Survey/wqhv-fm5d/about_data,,2025-03-10,False,,Humanitarian & Disaster Relief; Military & Veterans Affairs,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1973,_datasets/yemen-usaid-education-data.md,data_rescue_project,Yemen USAID Education Data,M/CIO Development Information Solution (DIS),U.S. Agency for International Development,data.usaid.gov,data.usaid.gov,,2025-11-03,False,,Education; Humanitarian & Disaster Relief,1
1974,_datasets/your-right-to-federal-records.md,data_rescue_project,Your Right to Federal Records,Federal Mediation and Conciliation Service,Federal Mediation and Conciliation Service,fmcs.gov,https://www.fmcs.gov/resources/documents-and-data/,,2025-03-31,False,,Labor & Employment,1
1975,_datasets/youth-risk-behavior-surveillance-system-yrbss-2023-2025.md,data_rescue_project,Youth Risk Behavior Surveillance System (YRBSS) 2023-2025,Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,cdc.gov,https://www.cdc.gov/yrbs/about/index.html,,2025-11-05,False,,Health & Healthcare,1
1976,_datasets/youth-risk-behavior-surveillance-system-yrbss.md,data_rescue_project,Youth Risk Behavior Surveillance System (YRBSS),Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,cdc.gov,https://www.cdc.gov/index.html,,2025-05-19,True,,Health & Healthcare,2


In [11]:
projects_df.shape

(1978, 13)

In [9]:
resources_df

Unnamed: 0,file,project_title,id,url,format,status,size,download_date,maintainer,notes
0,_datasets/10j-injunctions.md,10(j) Injunctions,759,https://doi.org/10.3886/E226824V1,"CSV, TXT",Finished,0.000,2025-04-07,"DRP, DL","Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010."
1,_datasets/2002-2024-mbda-grantees.md,2002-2024 MBDA Grantees,696,https://www.datalumos.org/datalumos/project/223443/version/V1/view,CSV,Finished,0.000,2025-03-18,"DRP, DL",
2,_datasets/2006-iur-public-database.md,2006 IUR Public Database,1280,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi10.7910/DVN/F3A62W,"mhtml, ZIP, PDF",Finished,0.005,2025-02-26,"HD, CAFE-RCC","There was a 2006 version and a 2002-1986 version. Both are archived, hence 2 dataverse URLs. ~ag. Seperate Metadata https://www.epa.gov/chemical-data-reporting/summary-cdr-reporting-requirements-year, https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database"
3,_datasets/2014-minority-veteran-report.md,2014 Minority Veteran Report,412,https://www.datalumos.org/datalumos/project/222881/version/V1/view,CSV,Finished,0.000,2025-03-11,"DRP, DL",
4,_datasets/2016-americorps-mes-americorps-member-exit-survey.md,2016 AmeriCorps MES AmeriCorps Member Exit Survey,396,https://www.datalumos.org/datalumos/project/222043/version/V1/view,"CSV, XLSX, PDF",Finished,0.000,2025-03-08,"DRP, DL",
...,...,...,...,...,...,...,...,...,...,...
2021,_datasets/your-right-to-federal-records.md,Your Right to Federal Records,643,https://www.datalumos.org/datalumos/project/223063/version/V1/view,PDF,Finished,0.000,2025-03-15,"DRP, DL",Regulations and Policies Tab
2022,_datasets/youth-risk-behavior-surveillance-system-yrbss-2023-2025.md,Youth Risk Behavior Surveillance System (YRBSS) 2023-2025,1502,https://www.datalumos.org/datalumos/project/220761/view,,Finished,,,DL,
2023,_datasets/youth-risk-behavior-surveillance-system-yrbss.md,Youth Risk Behavior Surveillance System (YRBSS),326,https://www.dropbox.com/scl/fo/2t0ehrteq62jqrj0slm3q/AHed9WK-9ydPJT1398w42zo?rlkey=pqb2fisu1rgjjc2badfyfq0nk&dl=0,"DTA, XLSX, do, MDB, dat, SPS, SAS",Finished,0.000,2025-01-31,ICPSR,
2024,_datasets/youth-risk-behavior-surveillance-system-yrbss.md,Youth Risk Behavior Surveillance System (YRBSS),1040,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/31/health-policy/yrbss/data.zip,ZIP,Finished,0.877,2025-02-05,UI,"The Youth Risk Behavior Surveillance System focuses on health risk behaviors that are often established during childhood and early adolescence, including behaviors associated with tobacco use, alcohol and other drug use, unintentional injuries, sexual behaviors related to unintended pregnancy and sexually transmitted infections, unhealthy diet, and inadequate physical activity."


In [12]:
df = pd.merge(projects_df, resources_df, on='file')

In [13]:
df

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes
0,_datasets/10j-injunctions.md,data_rescue_project,10(j) Injunctions,National Labor Relations Board,National Labor Relations Board,nlrb.gov,https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions,,2025-04-20,True,,Labor & Employment,1,10(j) Injunctions,759,https://doi.org/10.3886/E226824V1,"CSV, TXT",Finished,0.000,2025-04-07,"DRP, DL","Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010."
1,_datasets/2002-2024-mbda-grantees.md,data_rescue_project,2002-2024 MBDA Grantees,Minority Business Development Agency,Department of Commerce,mbda.gov,https://www.mbda.gov/research/data/mbda-grantees,,2025-04-02,False,,Business & Economy,1,2002-2024 MBDA Grantees,696,https://www.datalumos.org/datalumos/project/223443/version/V1/view,CSV,Finished,0.000,2025-03-18,"DRP, DL",
2,_datasets/2006-iur-public-database.md,data_rescue_project,2006 IUR Public Database,Environmental Protection Agency,Environmental Protection Agency,epa.gov,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,,2025-09-18,True,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,Climate & Environment,1,2006 IUR Public Database,1280,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi10.7910/DVN/F3A62W,"mhtml, ZIP, PDF",Finished,0.005,2025-02-26,"HD, CAFE-RCC","There was a 2006 version and a 2002-1986 version. Both are archived, hence 2 dataverse URLs. ~ag. Seperate Metadata https://www.epa.gov/chemical-data-reporting/summary-cdr-reporting-requirements-year, https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database"
3,_datasets/2014-minority-veteran-report.md,data_rescue_project,2014 Minority Veteran Report,Office of Information and Technology - IT Operations and Services (ITOPS),Department of Veterans Affairs,data.va.gov,https://www.data.va.gov/stories/s/gavm-n6bm,,2025-03-17,False,,Science & Research; Military & Veterans Affairs,1,2014 Minority Veteran Report,412,https://www.datalumos.org/datalumos/project/222881/version/V1/view,CSV,Finished,0.000,2025-03-11,"DRP, DL",
4,_datasets/2016-americorps-mes-americorps-member-exit-survey.md,data_rescue_project,2016 AmeriCorps MES AmeriCorps Member Exit Survey,AmeriCorps,AmeriCorps,data.americorps.gov,https://data.americorps.gov/National-Service/2016-AmeriCorps-MES-AmeriCorps-Member-Exit-Survey/wqhv-fm5d/about_data,,2025-03-10,False,,Humanitarian & Disaster Relief; Military & Veterans Affairs,1,2016 AmeriCorps MES AmeriCorps Member Exit Survey,396,https://www.datalumos.org/datalumos/project/222043/version/V1/view,"CSV, XLSX, PDF",Finished,0.000,2025-03-08,"DRP, DL",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021,_datasets/your-right-to-federal-records.md,data_rescue_project,Your Right to Federal Records,Federal Mediation and Conciliation Service,Federal Mediation and Conciliation Service,fmcs.gov,https://www.fmcs.gov/resources/documents-and-data/,,2025-03-31,False,,Labor & Employment,1,Your Right to Federal Records,643,https://www.datalumos.org/datalumos/project/223063/version/V1/view,PDF,Finished,0.000,2025-03-15,"DRP, DL",Regulations and Policies Tab
2022,_datasets/youth-risk-behavior-surveillance-system-yrbss-2023-2025.md,data_rescue_project,Youth Risk Behavior Surveillance System (YRBSS) 2023-2025,Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,cdc.gov,https://www.cdc.gov/yrbs/about/index.html,,2025-11-05,False,,Health & Healthcare,1,Youth Risk Behavior Surveillance System (YRBSS) 2023-2025,1502,https://www.datalumos.org/datalumos/project/220761/view,,Finished,,,DL,
2023,_datasets/youth-risk-behavior-surveillance-system-yrbss.md,data_rescue_project,Youth Risk Behavior Surveillance System (YRBSS),Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,cdc.gov,https://www.cdc.gov/index.html,,2025-05-19,True,,Health & Healthcare,2,Youth Risk Behavior Surveillance System (YRBSS),326,https://www.dropbox.com/scl/fo/2t0ehrteq62jqrj0slm3q/AHed9WK-9ydPJT1398w42zo?rlkey=pqb2fisu1rgjjc2badfyfq0nk&dl=0,"DTA, XLSX, do, MDB, dat, SPS, SAS",Finished,0.000,2025-01-31,ICPSR,
2024,_datasets/youth-risk-behavior-surveillance-system-yrbss.md,data_rescue_project,Youth Risk Behavior Surveillance System (YRBSS),Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,cdc.gov,https://www.cdc.gov/index.html,,2025-05-19,True,,Health & Healthcare,2,Youth Risk Behavior Surveillance System (YRBSS),1040,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/31/health-policy/yrbss/data.zip,ZIP,Finished,0.877,2025-02-05,UI,"The Youth Risk Behavior Surveillance System focuses on health risk behaviors that are often established during childhood and early adolescence, including behaviors associated with tobacco use, alcohol and other drug use, unintentional injuries, sexual behaviors related to unintended pregnancy and sexually transmitted infections, unhealthy diet, and inadequate physical activity."


extracting domains

In [14]:
def get_domain(url):
    if not isinstance(url, str):
        return None
    return urlparse(url).netloc

In [15]:
df["domain"] = df["url"].apply(get_domain)
df.head()

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain
0,_datasets/10j-injunctions.md,data_rescue_project,10(j) Injunctions,National Labor Relations Board,National Labor Relations Board,nlrb.gov,https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions,,2025-04-20,True,,Labor & Employment,1,10(j) Injunctions,759,https://doi.org/10.3886/E226824V1,"CSV, TXT",Finished,0.0,2025-04-07,"DRP, DL","Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010.",doi.org
1,_datasets/2002-2024-mbda-grantees.md,data_rescue_project,2002-2024 MBDA Grantees,Minority Business Development Agency,Department of Commerce,mbda.gov,https://www.mbda.gov/research/data/mbda-grantees,,2025-04-02,False,,Business & Economy,1,2002-2024 MBDA Grantees,696,https://www.datalumos.org/datalumos/project/223443/version/V1/view,CSV,Finished,0.0,2025-03-18,"DRP, DL",,www.datalumos.org
2,_datasets/2006-iur-public-database.md,data_rescue_project,2006 IUR Public Database,Environmental Protection Agency,Environmental Protection Agency,epa.gov,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,,2025-09-18,True,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,Climate & Environment,1,2006 IUR Public Database,1280,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi10.7910/DVN/F3A62W,"mhtml, ZIP, PDF",Finished,0.005,2025-02-26,"HD, CAFE-RCC","There was a 2006 version and a 2002-1986 version. Both are archived, hence 2 dataverse URLs. ~ag. Seperate Metadata https://www.epa.gov/chemical-data-reporting/summary-cdr-reporting-requirements-year, https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database",dataverse.harvard.edu
3,_datasets/2014-minority-veteran-report.md,data_rescue_project,2014 Minority Veteran Report,Office of Information and Technology - IT Operations and Services (ITOPS),Department of Veterans Affairs,data.va.gov,https://www.data.va.gov/stories/s/gavm-n6bm,,2025-03-17,False,,Science & Research; Military & Veterans Affairs,1,2014 Minority Veteran Report,412,https://www.datalumos.org/datalumos/project/222881/version/V1/view,CSV,Finished,0.0,2025-03-11,"DRP, DL",,www.datalumos.org
4,_datasets/2016-americorps-mes-americorps-member-exit-survey.md,data_rescue_project,2016 AmeriCorps MES AmeriCorps Member Exit Survey,AmeriCorps,AmeriCorps,data.americorps.gov,https://data.americorps.gov/National-Service/2016-AmeriCorps-MES-AmeriCorps-Member-Exit-Survey/wqhv-fm5d/about_data,,2025-03-10,False,,Humanitarian & Disaster Relief; Military & Veterans Affairs,1,2016 AmeriCorps MES AmeriCorps Member Exit Survey,396,https://www.datalumos.org/datalumos/project/222043/version/V1/view,"CSV, XLSX, PDF",Finished,0.0,2025-03-08,"DRP, DL",,www.datalumos.org


In [16]:
df['domain'].value_counts()

domain
www.datalumos.org                                1589
www.dropbox.com                                    85
sciop.net                                          72
doi.org                                            49
dataverse.harvard.edu                              46
zenodo.org                                         34
archive.org                                        28
github.com                                         22
urban-data-catalog.s3.us-east-1.amazonaws.com      10
purl.stanford.edu                                  10
nlrbresearch.com                                    4
web.archive.org                                     3
arcgis.com                                          3
www.openicpsr.org                                   3
www.arcgis.com                                      3
biglocalnews.org                                    3
doi.pangaea.de                                      2
www.hydroshare.org                                  1
s3.amazonaws.com     

In [17]:
df[df['domain'].isna()]

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain
134,_datasets/bipartisan-infrastructure-law-and-inflation-reduction-act-awards-explorer.md,data_rescue_project,Bipartisan Infrastructure Law and Inflation Reduction Act Awards Explorer,National Oceanic and Atmospheric Administration,Department of Commerce,noaa.gov,https://www.noaa.gov/bil-ira-awards-explorer,,2025-03-02,False,,Climate & Environment,1,Bipartisan Infrastructure Law and Inflation Reduction Act Awards Explorer,211,,CSV,Finished,0.0,,"EDGI, ESRI",Local - EDGI; only downloaded the csv; not trying to recreate the mapper,
135,_datasets/bls-downloads.md,data_rescue_project,BLS Downloads,Bureau of Labor Statistics,Department of Labor,download.bls.gov,https://download.bls.gov,,2025-02-10,False,,Labor & Employment; Business & Economy,1,BLS Downloads,1,,,In Progress,47.0,2025-02-01,DRP,,
143,_datasets/building-code-adoption-tracking.md,data_rescue_project,Building Code Adoption Tracking,Federal Emergency Management Agency,Department of Homeland Security,fema.gov,https://stantec.maps.arcgis.com/apps/MapSeries/index.html?appid=a053ac48343c4217ab4184bc8759c350,,2025-03-03,False,,Climate & Environment; Humanitarian & Disaster Relief,1,Building Code Adoption Tracking,205,,"Interactive Map, PDF",In Progress,0.0,,ESRI,"Local - ESRI; Identify which building codes an area has adopted by hazard risk, PDF fact sheets by region",
151,_datasets/cdc-ftp-server.md,data_rescue_project,CDC FTP Server,Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,ftp.cdc.gov,https://ftp.cdc.gov/,,2025-05-23,False,,Health & Healthcare,1,CDC FTP Server,2,,,In Progress,213.0,2025-02-01,DRP,,
160,_datasets/census-bureau-ftp-server.md,data_rescue_project,Census Bureau FTP Server,Census Bureau,Department of Commerce,www2.census.gov,https://www2.census.gov,,2025-05-23,False,,Housing & Community Development; Health & Healthcare; Labor & Employment; Business & Economy; Social Services; Education,4,Census Bureau FTP Server,3,,"PDF, ZIP, XLS, HTML, TXT",Uploading,180.0,2025-02-01,DRP,Partial download.,
161,_datasets/census-bureau-ftp-server.md,data_rescue_project,Census Bureau FTP Server,Census Bureau,Department of Commerce,www2.census.gov,https://www2.census.gov,,2025-05-23,False,,Housing & Community Development; Health & Healthcare; Labor & Employment; Business & Economy; Social Services; Education,4,Census Bureau FTP Server,1050,,"PDF, ZIP, XLS, HTML, TXT",Uploading,6200.0,2025-02-17,"DRP, ANON","Created as a torrent by Reddit user enchanting_endeavour and can be downloaded using the following magnet link magnet?xt=urnbtihda7f54c14ca6ab795ddb9f87b953c3dd8f22fbcd&dn=ftp2_census_gov_2025_02_17_torrents&tr=http%3A%2F%2Fwww.torrentsnipe.info%3A2701%2Fannounce&tr=udp%3A%2F%2Fdiscord.heihachi.pw%3A6969%2Fannounce. Original description ""In order to make the data manageable, the whole dataset is broken up into 41 pieces. This file is a torrent of torrents which has a torrent file for each of those pieces. Note that due to an error on my part, piece 31 is just an empty data structure and has no other data in it. The top level directory, ftp2.census.gov, in each of the pieces should merge cleanly without conflicts with the others. This dataset includes data for many of the census.gov subdomains, though I have not be able to verify which domains specifically are included and which are not.""",
162,_datasets/census-bureau-ftp-server.md,data_rescue_project,Census Bureau FTP Server,Census Bureau,Department of Commerce,www2.census.gov,https://www2.census.gov,,2025-05-23,False,,Housing & Community Development; Health & Healthcare; Labor & Employment; Business & Economy; Social Services; Education,4,Census Bureau FTP Server,1051,,"PDF, ZIP, XLS, HTML, TXT",Uploading,1200.0,2025-02-13,"DRP, CREP",,
163,_datasets/census-bureau-ftp-server.md,data_rescue_project,Census Bureau FTP Server,Census Bureau,Department of Commerce,www2.census.gov,https://www2.census.gov,,2025-05-23,False,,Housing & Community Development; Health & Healthcare; Labor & Employment; Business & Economy; Social Services; Education,4,Census Bureau FTP Server,1052,,"PDF, ZIP, XLS, HTML, TXT",Uploading,1900.0,2025-02-20,DRP,,
189,_datasets/climate-indicators.md,data_rescue_project,Climate Indicators,Environmental Protection Agency,Environmental Protection Agency,epa.gov,https://www.epa.gov/climate-indicators,,2025-03-02,False,,Climate & Environment,1,Climate Indicators,186,,,Finished,0.0,,ESRI,Local - ESRI; visual available on ArcGIS Online; Data is backed up but not app code,
250,_datasets/consumer-financial-protection-bureau-website.md,data_rescue_project,Consumer Financial Protection Bureau Website,Consumer Financial Protection Bureau,Consumer Financial Protection Bureau,consumerfinance.gov,https://files.consumerfinance.gov/,,2025-02-20,False,,Business & Economy,1,Consumer Financial Protection Bureau Website,8,,,In Progress,0.0,2025-02-03,DRP,Can't find the link in internet archives and no download location,


Scraping datalumos for metadata (didnt work)

In [None]:
!pip -q install playwright beautifulsoup4 lxml
!playwright install chromium

In [None]:
doi_re = re.compile(r"(?:doi\.org/|doi:\s*)(10\.\d{4,9}/[-._;()/:A-Z0-9]+)", re.I)

mask = resources_df["domain"].isin(["www.datalumos.org", "datalumos.org"])
cols = ["principal_investigators","summary","project_title_page","geographic_coverage","time_periods","doi"]
resources_df.loc[mask, cols] = None

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=True)
    page = await browser.new_page()

    for i, row in resources_df[mask].iterrows():
        url = row["url"]
        print(url)
        try:
            await page.goto(url, wait_until="networkidle", timeout=60000)
            html = await page.content()
            soup = BeautifulSoup(html, "lxml")

            # scan all <strong> labels and capture the text in the same parent
            for strong in soup.find_all("strong"):
                label = strong.get_text(" ", strip=True)
                parent_text = strong.parent.get_text(" ", strip=True)
                value = re.sub(rf"^{re.escape(label)}\s*:?\s*", "", parent_text).strip()

                low = label.lower()
                if "principal investigator" in low:
                    resources_df.at[i, "principal_investigators"] = value
                elif low.startswith("summary"):
                    resources_df.at[i, "summary"] = value
                elif low.startswith("project title"):
                    resources_df.at[i, "project_title_page"] = value
                elif low.startswith("geographic coverage"):
                    resources_df.at[i, "geographic_coverage"] = value
                elif low.startswith("time period"):
                    resources_df.at[i, "time_periods"] = value

            text = soup.get_text(" ", strip=True)
            found = doi_re.findall(text)
            if found:
                resources_df.at[i, "doi"] = found[0]

        except Exception as e:
            print("Failed:", url, "|", e)

    await browser.close()

resources_df.loc[mask, ["url"] + cols].head()

In [None]:
from playwright.async_api import async_playwright

test_url = resources_df.loc[mask, "url"].dropna().iloc[0]
print("Testing:", test_url)

async with async_playwright() as p:
    browser = await p.chromium.launch(headless=True)
    page = await browser.new_page()
    await page.goto(test_url, timeout=60000)
    print(await page.title())
    await browser.close()

Filling in doi column for doi we already have

In [18]:
dois_provided = df['domain'] == 'doi.org'

In [80]:
doi_pattern = r"(10\.\d{4,9}/[-._;()/:a-z0-9]+)"

df.loc[dois_provided, "doi"] = (
    df.loc[dois_provided, "url"]
      .str.lower()
      .str.extract(doi_pattern, expand=False)
)

In [81]:
df['doi'].isna().mean()

0.15301085883514315

Trying to get doi from urls for datalumos projects based n project number in the url

In [21]:
m = df["url"].astype(str).str.extract(r"/project/(\d+)/version/(V\d+)/", expand=True)
df["project_id"] = m[0]
df["version"] = m[1]

# guess DOI in the common ICPSR style: 10.3886/E{project_id}{version}
df["doi_guess"] = None
mask_dl = df["domain"].isin(["www.datalumos.org", "datalumos.org"]) & df["project_id"].notna() & df["version"].notna()
df.loc[mask_dl, "doi_guess"] = "10.3886/E" + df.loc[mask_dl, "project_id"] + df.loc[mask_dl, "version"]

df.loc[mask_dl, ["url","project_id","version","doi_guess"]]

Unnamed: 0,url,project_id,version,doi_guess
1,https://www.datalumos.org/datalumos/project/223443/version/V1/view,223443,V1,10.3886/E223443V1
3,https://www.datalumos.org/datalumos/project/222881/version/V1/view,222881,V1,10.3886/E222881V1
4,https://www.datalumos.org/datalumos/project/222043/version/V1/view,222043,V1,10.3886/E222043V1
5,https://www.datalumos.org/datalumos/project/222044/version/V2/view,222044,V2,10.3886/E222044V2
6,https://www.datalumos.org/datalumos/project/222044/version/V2/view,222044,V2,10.3886/E222044V2
...,...,...,...,...
2010,https://www.datalumos.org/datalumos/project/223962/version/V1/view,223962,V1,10.3886/E223962V1
2013,https://www.datalumos.org/datalumos/project/222882/version/V1/view,222882,V1,10.3886/E222882V1
2014,https://www.datalumos.org/datalumos/project/227293/version/V1/view,227293,V1,10.3886/E227293V1
2015,https://www.datalumos.org/datalumos/project/227006/version/V1/view,227006,V1,10.3886/E227006V1


In [22]:
df['doi_guess'].value_counts()

doi_guess
10.3886/E222581V1    51
10.3886/E223141V1    48
10.3886/E229201V1    37
10.3886/E223001V1    30
10.3886/E224621V1    25
                     ..
10.3886/E240512V1     1
10.3886/E240514V1     1
10.3886/E240516V1     1
10.3886/E240543V1     1
10.3886/E227293V1     1
Name: count, Length: 969, dtype: int64

testing if it works

In [23]:
test = "10.3886/E223443V1"

url = f"https://api.datacite.org/dois/{test}"

headers = {
    "Accept": "application/vnd.api+json"
}

response = requests.get(url, headers=headers)

print(response.text)

{"data":{"id":"10.3886/e223443v1","type":"dois","attributes":{"doi":"10.3886/e223443v1","prefix":"10.3886","suffix":"e223443v1","identifiers":[],"alternateIdentifiers":[],"creators":[{"name":"United States Department Of Commerce. Minority Business Development Agency","nameType":"Organizational","affiliation":[],"nameIdentifiers":[]}],"titles":[{"lang":"en","title":"2022-2024 MBDA Grantees"}],"publisher":"ICPSR - Interuniversity Consortium for Political and Social Research","container":{},"publicationYear":2025,"subjects":[{"lang":"en","subject":"grants"},{"lang":"en","subject":"minority businesses"}],"contributors":[],"dates":[{"date":"2022-01-01/2024-12-31","dateType":"Collected"},{"date":"2025","dateType":"Issued"}],"language":"en","types":{"ris":"DATA","bibtex":"misc","citeproc":"dataset","schemaOrg":"Dataset","resourceTypeGeneral":"Dataset"},"relatedIdentifiers":[{"relationType":"IsVersionOf","relatedIdentifier":"10.3886/e223443","relatedIdentifierType":"DOI"}],"relatedItems":[],"s

dois are correct, so fill in doi col with guesses

In [24]:
df["doi"] = df["doi"].combine_first(df["doi_guess"])

get dois of data outside datalumos

In [82]:
missing_doi = df[df["doi"].isna()]

In [83]:
missing_doi

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain,doi,project_id,version,doi_guess
44,_datasets/administrative-law-judge-decisions.md,data_rescue_project,Administrative Law Judge Decisions,National Labor Relations Board,National Labor Relations Board,nlrb.gov,https://www.nlrb.gov/cases-decisions/decisions/administrative-law-judge-decisions,,2025-05-14,False,,Labor & Employment,1,Administrative Law Judge Decisions,1008,https://nlrbresearch.com/NLRB/NLRB_DB?_search=type%3A+%22ALJ%22,PDF,Finished,0.000,,NLRB-R,Captured as part of NLRB Research a free database a researcher made.,nlrbresearch.com,,,,
61,_datasets/affirmatively-furthering-fair-housing-affh-data.md,data_rescue_project,Affirmatively Furthering Fair Housing (AFFH) Data,Department of Housing and Urban Development,Department of Housing and Urban Development,hud.gov,https://www.hud.gov/AFFH,,2025-05-19,True,https://datacatalog.urban.org/dataset/us-department-housing-and-urban-development-affirmatively-furthering-fair-housing-hud-affh,Housing & Community Development,1,Affirmatively Furthering Fair Housing (AFFH) Data,1037,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/04/03/housing-and-communities/hud-affh/data.zip,ZIP,Finished,0.876,2024-12-18,UI,"This dataset contains all data, documentation, and file resources linked to on the main US Department of Housing and Urban Development’s Affirmatively Furthering Fair Housing (AFFH) page and powering the AFFH tool.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
65,_datasets/aging-independence-and-disability-agid-program-portal-data.md,data_rescue_project,"AGing, Independence, and Disability (AGID) Program Portal Data",Administration for Community Living,Department of Health and Human Services,agid.acl.gov,https://agid.acl.gov/release.html,,2025-05-19,True,https://datacatalog.urban.org/dataset/aging-independence-and-disability-agid-program-portal-data,Social Services; Health & Healthcare,1,"AGing, Independence, and Disability (AGID) Program Portal Data",1041,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/race-and-equity/agid/data.zip,ZIP,Finished,6.000,2025-03-28,UI,"This resource contains all data from AGing, Independence, and Disability’s Program Data Portal from the US Department of Health and Human Services’ Administration for Community Living.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
67,_datasets/aids-drug-assistance-programs-adaps.md,data_rescue_project,AIDS Drug Assistance Programs (ADAPS),Health Resources and Services Administration,Department of Health and Human Services,hrsa.gov,https://www.hrsa.gov/grants/find-funding/HRSA-23-056,,2025-03-26,False,,Health & Healthcare,1,AIDS Drug Assistance Programs (ADAPS),242,https://www.dropbox.com/scl/fo/60drbfxp3p1hb1l4gr3cj/AHrpM10MrLH-0MvQHCcXFI4?rlkey=fps9vit5x2cg6367jhdykkl9a&dl=0,CSV,Finished,0.000,2025-01-31,ICPSR,,www.dropbox.com,,,,
68,_datasets/air-data-pre-generated-files-dec-2024.md,data_rescue_project,"Air Data Pre-Generated Files, Dec. 2024",Environmental Protection Agency,Environmental Protection Agency,aqs.epa.gov,https://aqs.epa.gov/aqsweb/airdata/download_files.html,,2025-05-18,False,,Climate & Environment,1,"Air Data Pre-Generated Files, Dec. 2024",1029,https://sciop.net/uploads/75bd916972ae78cbe59534dd88da55d11c4719f2,CSV,Finished,20.000,2025-05-15,"DRP, SRC",Alternate torrent location https://academictorrents.com/details/75bd916972ae78cbe59534dd88da55d11c4719f2,sciop.net,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017,_datasets/world-ocean-atlas-2023-figures.md,data_rescue_project,World Ocean Atlas 2023 Figures,National Oceanic and Atmospheric Administration,Department of Commerce,ncei.noaa.gov,https://www.ncei.noaa.gov/data/oceans/woa/WOA23F/,,2025-04-19,True,https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DOCUMENTATION/WOA23_Product_Documentation.pdf,Climate & Environment,1,World Ocean Atlas 2023 Figures,756,https://archive.org/details/noaa-ncei-woa23-figures-2025-04-19,PNG,Finished,105.000,2025-04-18,"DRP, IA",Includes actively seeded torrent file (the one without _archive). Alternate torrent location https://academictorrents.com/details/9bcdcb5efbcec15e37d918784618329e487599ac,archive.org,,,,
2018,_datasets/world-ocean-atlas-woa-2023.md,data_rescue_project,World Ocean Atlas (WOA) 2023,National Oceanic and Atmospheric Administration,Department of Commerce,ncei.noaa.gov,https://www.ncei.noaa.gov/data/oceans/ncei/woa/,,2025-04-14,True,https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc0270533,Climate & Environment,1,World Ocean Atlas (WOA) 2023,725,https://sciop.net/uploads/581bb9462e958df6011025b26fa1d609a2dafd73,nc,Uploading,157.000,2025-04-09,"DRP, SRC",New corrected/updated torrent. Alternate torrent location https://academictorrents.com/details/581bb9462e958df6011025b26fa1d609a2dafd73,sciop.net,,,,
2019,_datasets/world-ocean-database-wod.md,data_rescue_project,World Ocean Database (WOD),National Oceanic and Atmospheric Administration,Department of Commerce,ncei.noaa.gov,https://www.ncei.noaa.gov/data/oceans/ncei/wod/,,2025-04-14,True,https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodcNCEI-WOD,Climate & Environment,1,World Ocean Database (WOD),726,https://sciop.net/uploads/c0dce33ade7d0f828a542d5bed069b8909b3ee87,nc,Finished,159.000,2025-04-09,"DRP, SRC",New corrected/updated torrent. Alternate torrent location https://academictorrents.com/details/c0dce33ade7d0f828a542d5bed069b8909b3ee87,sciop.net,,,,
2023,_datasets/youth-risk-behavior-surveillance-system-yrbss.md,data_rescue_project,Youth Risk Behavior Surveillance System (YRBSS),Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,cdc.gov,https://www.cdc.gov/index.html,,2025-05-19,True,,Health & Healthcare,2,Youth Risk Behavior Surveillance System (YRBSS),326,https://www.dropbox.com/scl/fo/2t0ehrteq62jqrj0slm3q/AHed9WK-9ydPJT1398w42zo?rlkey=pqb2fisu1rgjjc2badfyfq0nk&dl=0,"DTA, XLSX, do, MDB, dat, SPS, SAS",Finished,0.000,2025-01-31,ICPSR,,www.dropbox.com,,,,


In [84]:
missing_doi['domain'].value_counts()

domain
www.dropbox.com                                  85
sciop.net                                        72
archive.org                                      28
github.com                                       22
purl.stanford.edu                                10
urban-data-catalog.s3.us-east-1.amazonaws.com    10
nlrbresearch.com                                  4
web.archive.org                                   3
arcgis.com                                        3
www.openicpsr.org                                 3
www.arcgis.com                                    3
biglocalnews.org                                  3
www.datalumos.org                                 2
doi.pangaea.de                                    2
s3.amazonaws.com                                  1
tables.codeberg.page                              1
www.thelgbtqarchive.org                           1
www.hydroshare.org                                1
box.hu-berlin.de                                  1
erica

In [85]:
missing_doi['domain'].unique()

array(['nlrbresearch.com',
       'urban-data-catalog.s3.us-east-1.amazonaws.com', 'www.dropbox.com',
       'sciop.net', 'livingatlas.arcgis.com', 'web.archive.org',
       'archive.org', None, 'github.com', 'www.datalumos.org',
       'purl.stanford.edu', 'source.coop', 'hub.arcgis.com',
       'doi.pangaea.de', 'www.zelma.ai', 'erica.datarescueproject.org',
       'edgi-govdata-archiving.github.io', 'public.tableau.com',
       'biglocalnews.org', 'gofile.me', 'www.documentcloud.org',
       's3.amazonaws.com', 'box.hu-berlin.de', 'www.arcgis.com',
       'www.openicpsr.org', 'arcgis.com', 'www.thelgbtqarchive.org',
       'tables.codeberg.page', 'www.hydroshare.org', 'data.openei.org'],
      dtype=object)

In [90]:
missing_doi[missing_doi['domain'] == 'urban-data-catalog.s3.us-east-1.amazonaws.com']

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain,doi,project_id,version,doi_guess
61,_datasets/affirmatively-furthering-fair-housing-affh-data.md,data_rescue_project,Affirmatively Furthering Fair Housing (AFFH) Data,Department of Housing and Urban Development,Department of Housing and Urban Development,hud.gov,https://www.hud.gov/AFFH,,2025-05-19,True,https://datacatalog.urban.org/dataset/us-department-housing-and-urban-development-affirmatively-furthering-fair-housing-hud-affh,Housing & Community Development,1,Affirmatively Furthering Fair Housing (AFFH) Data,1037,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/04/03/housing-and-communities/hud-affh/data.zip,ZIP,Finished,0.876,2024-12-18,UI,"This dataset contains all data, documentation, and file resources linked to on the main US Department of Housing and Urban Development’s Affirmatively Furthering Fair Housing (AFFH) page and powering the AFFH tool.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
65,_datasets/aging-independence-and-disability-agid-program-portal-data.md,data_rescue_project,"AGing, Independence, and Disability (AGID) Program Portal Data",Administration for Community Living,Department of Health and Human Services,agid.acl.gov,https://agid.acl.gov/release.html,,2025-05-19,True,https://datacatalog.urban.org/dataset/aging-independence-and-disability-agid-program-portal-data,Social Services; Health & Healthcare,1,"AGing, Independence, and Disability (AGID) Program Portal Data",1041,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/race-and-equity/agid/data.zip,ZIP,Finished,6.0,2025-03-28,UI,"This resource contains all data from AGing, Independence, and Disability’s Program Data Portal from the US Department of Health and Human Services’ Administration for Community Living.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
109,_datasets/associate-attorney-general-select-publications.md,data_rescue_project,Associate Attorney General Select Publications,Justice Management Division,Department of Justice,justice.gov,https://www.justice.gov/asg/select-publications,,2025-05-19,True,https://datacatalog.urban.org/dataset/us-department-justice-associate-attorney-general-select-publications,Justice & Public Safety,1,Associate Attorney General Select Publications,1038,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/justice-and-safety/doj-associate-attorney-general-publications/data.zip,ZIP,Finished,0.015,2025-01-15,UI,This resource contains select publications from the US Department of Justice’s associate attorney general.,urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
114,_datasets/attorney-general-publications.md,data_rescue_project,Attorney General Publications,Justice Management Division,Department of Justice,justice.gov,https://www.justice.gov/ag/select-publications,,2025-05-19,True,https://datacatalog.urban.org/dataset/us-department-justice-attorney-general-publications,Justice & Public Safety,1,Attorney General Publications,1043,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/justice-and-safety/doj-attorney-general-publications/data.zip,ZIP,Finished,0.034,2024-12-27,UI,This resource contains select publications from the attorney general during the Biden administration.,urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
186,_datasets/climate-and-economic-justice-screening-tool-cejst.md,data_rescue_project,Climate and Economic Justice Screening Tool (CEJST),Environmental Protection Agency,Environmental Protection Agency,ejscreen.epa.gov,https://www.epa.gov/ejscreen,,2025-09-02,True,https://datacatalog.urban.org/dataset/climate-and-economic-justice-screening-tool-data,Climate & Environment,2,Climate and Economic Justice Screening Tool (CEJST),1044,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/04/03/housing-and-communities/climate-and-economic-justice-screening-tool/data.zip,ZIP,Finished,0.012,2025-01-09,UI,"These are census tract–level data on marginalization caused by underinvestment and pollution burden. Data include information on energy, health, housing, pollution, transportation, water and waste, and workforce development.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
299,_datasets/deputy-attorney-general-publications.md,data_rescue_project,Deputy Attorney General Publications,Justice Management Division,Department of Justice,justice.gov,https://www.justice.gov/dag/select-publications,,2025-05-19,True,https://datacatalog.urban.org/dataset/us-department-justice-deputy-attorney-general-publications,Justice & Public Safety,1,Deputy Attorney General Publications,1042,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/justice-and-safety/doj-deputy-ag-publications/data.zip,ZIP,Finished,0.036,2025-01-18,UI,This resource contains select publications from the deputy attorney general during the Biden administration.,urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
394,_datasets/equity-commission-materials.md,data_rescue_project,Equity Commission Materials,U.S. Department of Agriculture,U.S. Department of Agriculture,usda.gov,https://www.usda.gov/about-usda/general-information/priorities/equity-usda/equity-commission,,2025-05-19,True,https://datacatalog.urban.org/dataset/us-department-agriculture-equity-commission-materials,Agriculture,1,Equity Commission Materials,1045,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/race-and-equity/usda-equity-commission/data.zip,ZIP,Finished,0.06,2024-12-26,UI,"This resource contains meeting materials from US Department of Agriculture (USDA) Equity Commission and relevant Federal Register notices. The USDA Equity Commission is an external group of individuals with expertise in areas including policy design, civil rights, legal experience, public relations, organizing farm workers, and advancing rural economic prosperity.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
1668,_datasets/race-ethnicity-and-gender-program-statistics-regstats.md,data_rescue_project,"Race, Ethnicity, and Gender Program Statistics (Regstats)",U.S. Department of Agriculture,U.S. Department of Agriculture,regstats.usda.gov,https://www.regstats.usda.gov/,,2025-05-19,True,https://datacatalog.urban.org/dataset/us-department-agriculture-race-ethnicity-and-gender-program-statistics-regstats,Agriculture,1,"Race, Ethnicity, and Gender Program Statistics (Regstats)",1039,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/race-and-equity/usda-regstats/data.zip,ZIP,Finished,0.018,2024-12-31,UI,"This resource contains county-level participant data for various Farm Service Agency programs by race, ethnicity, and gender, as well as aggregate program information for all states, years, and geographies available.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
1887,_datasets/treasury-advisory-committee-on-racial-equity-tacre-materials.md,data_rescue_project,Treasury Advisory Committee on Racial Equity (TACRE) Materials,Departmental Offices,Department of the Treasury,home.treasury.gov,https://home.treasury.gov/about/offices/equity-hub/TACRE/tacre-meeting-materials,,2025-05-19,True,https://datacatalog.urban.org/dataset/treasury-advisory-committee-racial-equity-tacre-materials,Finance & Budget,1,Treasury Advisory Committee on Racial Equity (TACRE) Materials,1046,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/race-and-equity/treasury-advisory-committee-on-racial-equity/data.zip,ZIP,Finished,0.025,2024-12-26,UI,This resource contains meeting materials and relevant Federal Register notices from the Treasury Advisory Committee on Racial Equity website.,urban-data-catalog.s3.us-east-1.amazonaws.com,,,,
2024,_datasets/youth-risk-behavior-surveillance-system-yrbss.md,data_rescue_project,Youth Risk Behavior Surveillance System (YRBSS),Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,cdc.gov,https://www.cdc.gov/index.html,,2025-05-19,True,,Health & Healthcare,2,Youth Risk Behavior Surveillance System (YRBSS),1040,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/31/health-policy/yrbss/data.zip,ZIP,Finished,0.877,2025-02-05,UI,"The Youth Risk Behavior Surveillance System focuses on health risk behaviors that are often established during childhood and early adolescence, including behaviors associated with tobacco use, alcohol and other drug use, unintentional injuries, sexual behaviors related to unintended pregnancy and sexually transmitted infections, unhealthy diet, and inadequate physical activity.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,


dataverse dois

In [68]:
missing_doi[missing_doi['domain'] == 'dataverse.harvard.edu']

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain,doi,project_id,version,doi_guess


In [56]:
is_harvard = (df['domain'].eq('dataverse.harvard.edu') & df['url'].notna())

df.loc[is_harvard, 'doi_guess'] = (
    df.loc[is_harvard, 'url']
      .astype(str)
      .str.extract(r'persistentId=([^&#]+)', expand=False)
      .apply(lambda x: unquote(x) if pd.notna(x) else pd.NA)
      .str.lower()
      .str.strip()
      .str.replace('doi:', '', regex=False)
      .str.replace('doi', '', regex=False)
)

df.loc[is_harvard, ['url', 'doi_guess']].head()

Unnamed: 0,url,doi_guess
2,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi10.7910/DVN/F3A62W,10.7910/dvn/f3a62w
107,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi10.7910/DVN/MWNGDP,10.7910/dvn/mwngdp
110,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi10.7910/DVN/U7WVNO,10.7910/dvn/u7wvno
129,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910%2FDVN%2FFRAGKR,10.7910/dvn/fragkr
133,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi10.7910/DVN/0L9K3E,10.7910/dvn/0l9k3e


In [57]:
test = "10.7910/dvn/f3a62w"

url = f"https://api.datacite.org/dois/{test}"

headers = {
    "Accept": "application/vnd.api+json"
}

response = requests.get(url, headers=headers)

print(response.text)

{"data":{"id":"10.7910/dvn/f3a62w","type":"dois","attributes":{"doi":"10.7910/dvn/f3a62w","prefix":"10.7910","suffix":"dvn/f3a62w","identifiers":[],"alternateIdentifiers":[],"creators":[{"name":"EPA","nameType":"Organizational","affiliation":["U.S. EPA"],"nameIdentifiers":[]}],"titles":[{"title":"Extracted Data From: Downloadable 2006 IUR Public Database"}],"publisher":"Harvard Dataverse","container":{},"publicationYear":2025,"subjects":[{"subject":"Chemistry"},{"subject":"Earth and Environmental Sciences"},{"subject":"Environmental Health","schemeUri":"https://tools.niehs.nih.gov/cchhglossary/"},{"subject":"Exposure"}],"contributors":[{"name":"CAFE","nameType":"Personal","affiliation":[],"contributorType":"ContactPerson","nameIdentifiers":[]}],"dates":[{"date":"2025-02-18","dateType":"Submitted"},{"date":"2025-02-26","dateType":"Available"},{"date":"2025-02-26","dateType":"Updated"},{"date":"2006-01-01/2006-12-31","dateType":"Other","dateInformation":"Time period covered by the data"}

In [58]:
df["doi"] = df["doi"].fillna(df["doi_guess"])


zenodo dois

In [70]:
missing_doi[missing_doi['domain'] == 'zenodo.org'].head()

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain,doi,project_id,version,doi_guess
92,_datasets/annual-energy-outlook.md,data_rescue_project,Annual Energy Outlook,US Department of Energy - Office of the CIO,Department of Energy,eia.gov,https://www.eia.gov/outlooks/aeo/data/browser/,,2025-02-12,False,,Energy,1,Annual Energy Outlook,73,https://zenodo.org/records/10838488,"XLSX, ZIP",Finished,6.7,2025-03-19,"PEDP, CaCo",,zenodo.org,,,,
96,_datasets/annual-technology-baseline-atb-for-electricity-and-transportation.md,data_rescue_project,Annual Technology Baseline (ATB) for Electricity and Transportation,National Renewable Energy Laboratory,Department of Energy,nrel.gov,https://atb.nrel.gov/,,2025-03-23,False,,Climate & Environment; Energy,1,Annual Technology Baseline (ATB) for Electricity and Transportation,234,https://zenodo.org/records/14784563,"Parquet, XLSX, JSON",Finished,3.7,2024-08-01,"PEDP, CaCo",,zenodo.org,,,,
182,_datasets/clean-air-markets-program-data-campd-daily-emissions-for-2020-by-state.md,data_rescue_project,Clean Air Markets Program Data (CAMPD) Daily Emissions for 2020 by State,Environmental Protection Agency,Environmental Protection Agency,campd.epa.gov,https://campd.epa.gov/data,,2025-10-01,False,,Climate & Environment,1,Clean Air Markets Program Data (CAMPD) Daily Emissions for 2020 by State,1300,https://zenodo.org/records/15047648,CSV,Finished,5.9,2025-03-18,"PEDP, EDGI",,zenodo.org,,,,
183,_datasets/clean-air-markets-program-data-campd-daily-emissions-for-2024-by-quarter.md,data_rescue_project,Clean Air Markets Program Data (CAMPD) Daily Emissions for 2024 by Quarter,Environmental Protection Agency,Environmental Protection Agency,campd.epa.gov,https://campd.epa.gov/data,,2025-10-01,False,,Climate & Environment,1,Clean Air Markets Program Data (CAMPD) Daily Emissions for 2024 by Quarter,1299,https://zenodo.org/records/15047486,CSV,Finished,6.5,2025-03-18,"PEDP, EDGI",,zenodo.org,,,,
184,_datasets/clean-air-markets-program-data-campd-daily-emissions-for-2024-by-state.md,data_rescue_project,Clean Air Markets Program Data (CAMPD) Daily Emissions for 2024 by State,Environmental Protection Agency,Environmental Protection Agency,campd.epa.gov,https://campd.epa.gov/data,,2025-10-01,False,,Climate & Environment,1,Clean Air Markets Program Data (CAMPD) Daily Emissions for 2024 by State,1302,https://zenodo.org/records/15047671,CSV,Finished,3.0,2025-03-18,"PEDP, EDGI",,zenodo.org,,,,


In [71]:
is_zenodo = (df['domain'].eq('zenodo.org') & df['url'].notna())

df.loc[is_zenodo, 'doi_guess'] = (
    df.loc[is_zenodo, 'url']
      .astype(str)
      .str.extract(r'/records?/(\d+)', expand=False)
      .apply(lambda x: f'10.5281/zenodo.{x}' if pd.notna(x) else pd.NA)
      .str.lower()
)

df.loc[is_zenodo, ['url', 'doi_guess']].head()

Unnamed: 0,url,doi_guess
92,https://zenodo.org/records/10838488,10.5281/zenodo.10838488
96,https://zenodo.org/records/14784563,10.5281/zenodo.14784563
182,https://zenodo.org/records/15047648,10.5281/zenodo.15047648
183,https://zenodo.org/records/15047486,10.5281/zenodo.15047486
184,https://zenodo.org/records/15047671,10.5281/zenodo.15047671


In [72]:
test = '10.5281/zenodo.10838488'
url = f'https://api.datacite.org/dois/{test}'
headers = {'Accept': 'application/vnd.api+json'}
response = requests.get(url, headers=headers)

print(response.text)

{"data":{"id":"10.5281/zenodo.10838488","type":"dois","attributes":{"doi":"10.5281/zenodo.10838488","prefix":"10.5281","suffix":"zenodo.10838488","identifiers":[{"identifier":"oai:zenodo.org:10838488","identifierType":"oai"}],"alternateIdentifiers":[{"alternateIdentifierType":"oai","alternateIdentifier":"oai:zenodo.org:10838488"}],"creators":[{"name":"Catalyst Cooperative","nameType":"Personal","familyName":"Catalyst Cooperative","affiliation":["Catalyst Cooperative"],"nameIdentifiers":[]}],"titles":[{"title":"PUDL Raw EIA Annual Energy Outlook (AEO)"}],"publisher":"Zenodo","container":{},"publicationYear":2024,"subjects":[{"subject":"MW"},{"subject":"MWh"},{"subject":"annual energy outlook"},{"subject":"distribution"},{"subject":"eia"},{"subject":"eia aeo"},{"subject":"electric"},{"subject":"electricity"},{"subject":"energy"},{"subject":"energy consumption"},{"subject":"energy information administration"},{"subject":"energy supply"},{"subject":"federal"},{"subject":"fuel projections"}

In [73]:
df["doi"] = df["doi"].fillna(df["doi_guess"])


getting metadata from datacite

In [91]:
dois = df["doi"].dropna().astype(str).str.strip().str.lower().unique()
dois

array(['10.3886/e226824v1', '10.3886/e223443v1', '10.7910/dvn/f3a62w',
       ..., '10.3886/e238627v1', '10.3886/e220761v1', '10.3886/e237626v1'],
      dtype=object)

In [92]:
unique_dois = (
    df["doi"]
    .dropna()
    .astype(str)
    .str.strip()
    .str.lower()
    .unique()
)

len(unique_dois)

1400

also doi with no version

In [29]:
# grab project id
df["project_id"] = df["url"].astype(str).str.extract(r"/project/(\d+)", expand=False)

# grab version if present, otherwise default to V1 for datalumos project links
df["version"] = df["url"].astype(str).str.extract(r"/version/(V\d+)", expand=False)

mask_datalumos = df["domain"].isin(["www.datalumos.org", "datalumos.org"]) & df["project_id"].notna()
df.loc[mask_datalumos & df["version"].isna(), "version"] = "V1"

# build DOI guess for datalumos rows where we have project_id + version
mask_guess = mask_datalumos & df["version"].notna()
df.loc[mask_guess, "doi_guess"] = (
    "10.3886/E" + df.loc[mask_guess, "project_id"] + df.loc[mask_guess, "version"]
)

df["doi"] = df["doi"].fillna(df["doi_guess"])

df.loc[mask_datalumos, ["url", "project_id", "version", "doi_guess", "doi"]]

Unnamed: 0,url,project_id,version,doi_guess,doi
1,https://www.datalumos.org/datalumos/project/223443/version/V1/view,223443,V1,10.3886/E223443V1,10.3886/E223443V1
3,https://www.datalumos.org/datalumos/project/222881/version/V1/view,222881,V1,10.3886/E222881V1,10.3886/E222881V1
4,https://www.datalumos.org/datalumos/project/222043/version/V1/view,222043,V1,10.3886/E222043V1,10.3886/E222043V1
5,https://www.datalumos.org/datalumos/project/222044/version/V2/view,222044,V2,10.3886/E222044V2,10.3886/E222044V2
6,https://www.datalumos.org/datalumos/project/222044/version/V2/view,222044,V2,10.3886/E222044V2,10.3886/E222044V2
...,...,...,...,...,...
2015,https://www.datalumos.org/datalumos/project/227006/version/V1/view,227006,V1,10.3886/E227006V1,10.3886/E227006V1
2020,https://www.datalumos.org/datalumos/project/238627/view,238627,V1,10.3886/E238627V1,10.3886/E238627V1
2021,https://www.datalumos.org/datalumos/project/223063/version/V1/view,223063,V1,10.3886/E223063V1,10.3886/E223063V1
2022,https://www.datalumos.org/datalumos/project/220761/view,220761,V1,10.3886/E220761V1,10.3886/E220761V1


lets see some duplicates

In [93]:
dup_rows = df[df["doi"].duplicated(keep=False)]
dup_rows[["doi", "url", "domain", 'title']].head(20)

Unnamed: 0,doi,url,domain,title
4,10.3886/E222043V1,https://www.datalumos.org/datalumos/project/222043/version/V1/view,www.datalumos.org,2016 AmeriCorps MES AmeriCorps Member Exit Survey
5,10.3886/E222044V2,https://www.datalumos.org/datalumos/project/222044/version/V2/view,www.datalumos.org,2017-2023 CEV Findings National Rates of All Measures by Demographics from the Current Population Survey Civic Engagement and Volunteering Supplement
6,10.3886/E222044V2,https://www.datalumos.org/datalumos/project/222044/version/V2/view,www.datalumos.org,2017-2023 CEV Findings National Rates of All Measures from the Current Population Survey Civic Engagement and Volunteering Supplement
7,10.3886/E222044V2,https://www.datalumos.org/datalumos/project/222044/version/V2/view,www.datalumos.org,2017-2023 CEV Findings State-Level Rates of All Measures from the Current Population Survey Civic Engagement and Volunteering Supplement
8,10.3886/E222043V1,https://www.datalumos.org/datalumos/project/222043/version/V1/view,www.datalumos.org,2017 AmeriCorps MES AmeriCorps Member Exit Survey
10,10.3886/E222044V2,https://www.datalumos.org/datalumos/project/222044/version/V2/view,www.datalumos.org,2017 CEV Data Current Population Survey Civic Engagement and Volunteering Supplement
11,10.3886/E222043V1,https://www.datalumos.org/datalumos/project/222043/version/V1/view,www.datalumos.org,2018 AmeriCorps MES AmeriCorps Member Exit Survey
15,10.3886/E222043V1,https://www.datalumos.org/datalumos/project/222043/version/V1/view,www.datalumos.org,2019 AmeriCorps MES AmeriCorps Member Exit Survey
16,10.3886/E224361V1,https://www.datalumos.org/datalumos/project/224361/version/V1/view,www.datalumos.org,2019 CDFI Program Awardee Performance Data Snapshot
17,10.3886/E222044V2,https://www.datalumos.org/datalumos/project/222044/version/V2/view,www.datalumos.org,2019 CEV Data Current Population Survey Civic Engagement and Volunteering Supplement


In [94]:
df['doi'].value_counts()

doi
10.3886/E222581V1    51
10.3886/E223141V1    48
10.3886/E229201V1    37
10.3886/E223001V1    30
10.3886/E224621V1    25
                     ..
10.3886/E240277V1     1
10.3886/E239260V1     1
10.3886/E239259V1     1
10.3886/E239108V1     1
10.3886/E237626V1     1
Name: count, Length: 1401, dtype: int64

In [95]:
df[df['doi'] == '10.3886/E222581V1']

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain,doi,project_id,version,doi_guess
1142,_datasets/medical-expenditure-panel-survey-meps-topics.md,data_rescue_project,Medical Expenditure Panel Survey (MEPS) Topics,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp,,2025-03-18,False,,Health & Healthcare,1,Medical Expenditure Panel Survey (MEPS) Topics,447,https://www.datalumos.org/datalumos/project/222581/version/V1/view,"TSV, ZIP",Finished,0.243,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1156,_datasets/meps-topics-access-to-health-care.md,data_rescue_project,MEPS Topics Access to Health Care,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=1Z-1,,2025-03-18,False,,Health & Healthcare,1,MEPS Topics Access to Health Care,448,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1157,_datasets/meps-topics-childrens-health.md,data_rescue_project,MEPS Topics Children's Health,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=2Z-1,,2025-03-18,False,,Health & Healthcare,1,MEPS Topics Children's Health,449,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1158,_datasets/meps-topics-childrens-insurance-coverage.md,data_rescue_project,MEPS Topics Children's Insurance Coverage,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=3Z-1,,2025-03-18,False,,Health & Healthcare,1,MEPS Topics Children's Insurance Coverage,450,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1159,_datasets/meps-topics-dental-visitsuseevents-and-expenditures.md,data_rescue_project,MEPS Topics Dental Visits/Use/Events and Expenditures,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=47Z-1,,2025-03-18,False,,Health & Healthcare,1,MEPS Topics Dental Visits/Use/Events and Expenditures,451,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1160,_datasets/meps-topics-disability.md,data_rescue_project,MEPS Topics Disability,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=20Z-1,,2025-03-18,False,,Health & Healthcare,1,MEPS Topics Disability,452,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1161,_datasets/meps-topics-doctor-visitsuseevents-and-expenditures.md,data_rescue_project,MEPS Topics Doctor Visits/Use/Events and Expenditures,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=21Z-1,,2025-03-18,False,,Health & Healthcare,1,MEPS Topics Doctor Visits/Use/Events and Expenditures,453,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1162,_datasets/meps-topics-doctors-separately-billing.md,data_rescue_project,"MEPS Topics Doctors, Separately Billing",Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=22Z-1,,2025-03-18,False,,Health & Healthcare,1,"MEPS Topics Doctors, Separately Billing",454,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1163,_datasets/meps-topics-elderly-health-care.md,data_rescue_project,MEPS Topics Elderly Health Care,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=8Z-1,,2025-03-18,False,,Health & Healthcare,1,MEPS Topics Elderly Health Care,455,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1
1164,_datasets/meps-topics-emergency-room-visitsuseevents-and-expenditures.md,data_rescue_project,MEPS Topics Emergency Room Visits/Use/Events and Expenditures,Agency for Healthcare Research and Quality,Department of Health and Human Services,meps.ahrq.gov,https://meps.ahrq.gov/mepsweb/data_stats/MEPS_topics.jsp?topicid=23Z-1,,2025-03-18,False,,Health & Healthcare,1,MEPS Topics Emergency Room Visits/Use/Events and Expenditures,456,https://www.datalumos.org/datalumos/project/222581/version/V1/view,PDF,Finished,0.0,2025-03-03,"DRP, DL",,www.datalumos.org,10.3886/E222581V1,222581,V1,10.3886/E222581V1


metadata

In [96]:
MAILTO = "maja.murawka@student.uva.nl"
session = requests.Session()
session.headers.update({
    "Accept": "application/vnd.api+json",
    "User-Agent": f"metadata-harvest/1.0 (mailto:{MAILTO})"
})


rows = []
for doi in unique_dois:
    url = f"https://api.datacite.org/dois/{doi}"
    try:
        r = session.get(url, timeout=30)

        if r.status_code == 429:
            time.sleep(2)
            r = session.get(url, timeout=30)

        if r.status_code != 200:
            rows.append({"doi": doi, "dc_ok": False, "dc_status": r.status_code, "dc_error": r.text[:200]})
            continue

        data = r.json()
        attrs = data.get("data", {}).get("attributes", {})
        rel = data.get("data", {}).get("relationships", {})

        row = {
            "doi": doi,
            "dc_ok": True,
            "dc_status": 200,
            "dc_raw": data,
        }

        for k in [
            "prefix", "suffix", "publisher", "publicationYear", "language",
            "version", "url", "contentUrl", "schemaVersion", "source",
            "isActive", "state", "reason", "metadataVersion",
            "created", "registered", "published", "updated",
            "viewCount", "downloadCount", "referenceCount", "citationCount",
            "partCount", "partOfCount", "versionCount", "versionOfCount"
        ]:
            row[f"dc_{k}"] = attrs.get(k)

        types = attrs.get("types") or {}
        row["dc_types_ris"] = types.get("ris")
        row["dc_types_bibtex"] = types.get("bibtex")
        row["dc_types_citeproc"] = types.get("citeproc")
        row["dc_types_schemaOrg"] = types.get("schemaOrg")
        row["dc_types_resourceTypeGeneral"] = types.get("resourceTypeGeneral")

        row["dc_identifiers"] = attrs.get("identifiers")
        row["dc_alternateIdentifiers"] = attrs.get("alternateIdentifiers")

        titles = attrs.get("titles") or []
        row["dc_title"] = titles[0].get("title") if titles else None
        row["dc_titles_all"] = titles

        creators = attrs.get("creators") or []
        row["dc_creators_all"] = creators
        row["dc_creators_names"] = "; ".join([c.get("name","") for c in creators if c.get("name")]) or None
        row["dc_creators_types"] = "; ".join([c.get("nameType","") for c in creators if c.get("nameType")]) or None

        # --- contributors ---
        contributors = attrs.get("contributors") or []
        row["dc_contributors_all"] = contributors
        row["dc_contributors_names"] = "; ".join([c.get("name","") for c in contributors if c.get("name")]) or None

        # --- subjects ---
        subjects = attrs.get("subjects") or []
        row["dc_subjects_all"] = subjects
        row["dc_subjects"] = "; ".join([s.get("subject","") for s in subjects if s.get("subject")]) or None

        # --- dates ---
        dates = attrs.get("dates") or []
        row["dc_dates_all"] = dates
        # handy “picked” fields
        collected = [d.get("date") for d in dates if (d.get("dateType") or "").lower() == "collected"]
        issued = [d.get("date") for d in dates if (d.get("dateType") or "").lower() == "issued"]
        row["dc_date_collected"] = collected[0] if collected else None
        row["dc_date_issued"] = issued[0] if issued else None

        # --- descriptions ---
        descs = attrs.get("descriptions") or []
        row["dc_descriptions_all"] = descs
        abstract = [d.get("description") for d in descs if (d.get("descriptionType") or "").lower() == "abstract"]
        row["dc_abstract"] = abstract[0] if abstract else None

        # --- geoLocations ---
        geos = attrs.get("geoLocations") or []
        row["dc_geoLocations_all"] = geos
        row["dc_geo_places"] = "; ".join(
            [g.get("geoLocationPlace","") for g in geos if g.get("geoLocationPlace")]
        ) or None

        # --- rights & funding ---
        row["dc_rightsList"] = attrs.get("rightsList")
        row["dc_fundingReferences"] = attrs.get("fundingReferences")

        # --- related identifiers ---
        relids = attrs.get("relatedIdentifiers") or []
        row["dc_relatedIdentifiers_all"] = relids
        row["dc_isVersionOf"] = "; ".join(
            [x.get("relatedIdentifier","") for x in relids if (x.get("relationType") or "") == "IsVersionOf"]
        ) or None

        # --- relationships (structured links) ---
        # e.g. provider/client/versionOf ids
        def rel_id(name):
            d = rel.get(name, {}).get("data")
            if isinstance(d, dict):
                return d.get("id")
            if isinstance(d, list):
                return "; ".join([x.get("id","") for x in d if isinstance(x, dict) and x.get("id")]) or None
            return None

        row["dc_rel_client"] = rel_id("client")
        row["dc_rel_provider"] = rel_id("provider")
        row["dc_rel_media"] = rel_id("media")
        row["dc_rel_versionOf"] = rel_id("versionOf")
        row["dc_rel_versions"] = rel_id("versions")
        row["dc_rel_parts"] = rel_id("parts")
        row["dc_rel_partOf"] = rel_id("partOf")
        row["dc_rel_citations"] = rel_id("citations")
        row["dc_rel_references"] = rel_id("references")

        rows.append(row)

    except Exception as e:
        rows.append({"doi": doi, "dc_ok": False, "dc_status": None, "dc_error": str(e)})

datacite_df = pd.DataFrame(rows)
datacite_df.head()

Unnamed: 0,doi,dc_ok,dc_status,dc_raw,dc_prefix,dc_suffix,dc_publisher,dc_publicationYear,dc_language,dc_version,dc_url,dc_contentUrl,dc_schemaVersion,dc_source,dc_isActive,dc_state,dc_reason,dc_metadataVersion,dc_created,dc_registered,dc_published,dc_updated,dc_viewCount,dc_downloadCount,dc_referenceCount,dc_citationCount,dc_partCount,dc_partOfCount,dc_versionCount,dc_versionOfCount,dc_types_ris,dc_types_bibtex,dc_types_citeproc,dc_types_schemaOrg,dc_types_resourceTypeGeneral,dc_identifiers,dc_alternateIdentifiers,dc_title,dc_titles_all,dc_creators_all,dc_creators_names,dc_creators_types,dc_contributors_all,dc_contributors_names,dc_subjects_all,dc_subjects,dc_dates_all,dc_date_collected,dc_date_issued,dc_descriptions_all,dc_abstract,dc_geoLocations_all,dc_geo_places,dc_rightsList,dc_fundingReferences,dc_relatedIdentifiers_all,dc_isVersionOf,dc_rel_client,dc_rel_provider,dc_rel_media,dc_rel_versionOf,dc_rel_versions,dc_rel_parts,dc_rel_partOf,dc_rel_citations,dc_rel_references,dc_error
0,10.3886/e226824v1,True,200,"{'data': {'id': '10.3886/e226824v1', 'type': 'dois', 'attributes': {'doi': '10.3886/e226824v1', 'prefix': '10.3886', 'suffix': 'e226824v1', 'identifiers': [], 'alternateIdentifiers': [], 'creators': [{'name': 'National Labor Relations Board', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}], 'titles': [{'lang': 'en', 'title': '10(j) Injunctions'}], 'publisher': 'ICPSR - Interuniversity Consortium for Political and Social Research', 'container': {}, 'publicationYear': 2025, 'subjects': [{'lang': 'en', 'subject': 'labor'}, {'lang': 'en', 'subject': 'labor unions'}, {'lang': 'en', 'subject': 'labor disputes'}, {'lang': 'en', 'subject': 'injunctions'}, {'lang': 'en', 'subject': 'collective bargaining'}, {'lang': 'en', 'subject': 'labor relations'}, {'lang': 'en', 'subject': 'unionization'}], 'contributors': [], 'dates': [{'date': '2010-01-01/2025-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}], 'language': 'en', 'types': {'ris': 'DATA', 'bibtex': 'misc', 'citeproc': 'dataset', 'schemaOrg': 'Dataset', 'resourceType': 'administrative records data', 'resourceTypeGeneral': 'Dataset'}, 'relatedIdentifiers': [{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e226824', 'relatedIdentifierType': 'DOI'}], 'relatedItems': [], 'sizes': [], 'formats': [], 'version': 'v1', 'rightsList': [], 'descriptions': [{'lang': 'en', 'description': 'Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.<br><br>There are 15 categories of labor disputes in which Section 10(j) injunctions may be appropriate, listed at [https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions/section-10j-categories]. Under NLRB processes, potential cases are identified by Regional Offices and reviewed by the General Counsel, who must seek authorization from the Board before proceeding to court. <br><br>The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010. <br>', 'descriptionType': 'Abstract'}], 'geoLocations': [{'geoLocationPlace': 'United States'}], 'fundingReferences': [], 'xml': 'PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9InllcyI/Pgo8cmVzb3VyY2UgeG1sbnM9Imh0dHA6Ly9kYXRhY2l0ZS5vcmcvc2NoZW1hL2tlcm5lbC00IiB4bWxuczp4c2k9Imh0dHA6Ly93d3cudzMub3JnLzIwMDEvWE1MU2NoZW1hLWluc3RhbmNlIiB4c2k6c2NoZW1hTG9jYXRpb249Imh0dHA6Ly9kYXRhY2l0ZS5vcmcvc2NoZW1hL2tlcm5lbC00IGh0dHBzOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC4zODg2L0UyMjY4MjRWMTwvaWRlbnRpZmllcj4KICA8Y3JlYXRvcnM+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJPcmdhbml6YXRpb25hbCI+TmF0aW9uYWwgTGFib3IgUmVsYXRpb25zIEJvYXJkPC9jcmVhdG9yTmFtZT4KICAgIDwvY3JlYXRvcj4KICA8L2NyZWF0b3JzPgogIDx0aXRsZXM+CiAgICA8dGl0bGUgeG1sOmxhbmc9ImVuIj4xMChqKSBJbmp1bmN0aW9uczwvdGl0bGU+CiAgPC90aXRsZXM+CiAgPHB1Ymxpc2hlcj5JQ1BTUiAtIEludGVydW5pdmVyc2l0eSBDb25zb3J0aXVtIGZvciBQb2xpdGljYWwgYW5kIFNvY2lhbCBSZXNlYXJjaDwvcHVibGlzaGVyPgogIDxwdWJsaWNhdGlvblllYXI+MjAyNTwvcHVibGljYXRpb25ZZWFyPgogIDxyZXNvdXJjZVR5cGUgcmVzb3VyY2VUeXBlR2VuZXJhbD0iRGF0YXNldCI+YWRtaW5pc3RyYXRpdmUgcmVjb3JkcyBkYXRhPC9yZXNvdXJjZVR5cGU+CiAgPHN1YmplY3RzPgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIj5sYWJvcjwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiI+bGFib3IgdW5pb25zPC9zdWJqZWN0PgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIj5sYWJvciBkaXNwdXRlczwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiI+aW5qdW5jdGlvbnM8L3N1YmplY3Q+CiAgICA8c3ViamVjdCB4bWw6bGFuZz0iZW4iPmNvbGxlY3RpdmUgYmFyZ2FpbmluZzwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHhtbDpsYW5nPSJlbiI+bGFib3IgcmVsYXRpb25zPC9zdWJqZWN0PgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIj51bmlvbml6YXRpb248L3N1YmplY3Q+CiAgPC9zdWJqZWN0cz4KICA8ZGF0ZXM+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iQ29sbGVjdGVkIj4yMDEwLTAxLTAxLzIwMjUtMTItMzE8L2RhdGU+CiAgPC9kYXRlcz4KICA8bGFuZ3VhZ2U+ZW5nPC9sYW5ndWFnZT4KICA8cmVsYXRlZElkZW50aWZpZXJzPgogICAgPHJlbGF0ZWRJZGVudGlmaWVyIHJlbGF0ZWRJZGVudGlmaWVyVHlwZT0iRE9JIiByZWxhdGlvblR5cGU9IklzVmVyc2lvbk9mIj4xMC4zODg2L0UyMjY4MjQ8L3JlbGF0ZWRJZGVudGlmaWVyPgogIDwvcmVsYXRlZElkZW50aWZpZXJzPgogIDx2ZXJzaW9uPnYxPC92ZXJzaW9uPgogIDxkZXNjcmlwdGlvbnM+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJBYnN0cmFjdCIgeG1sOmxhbmc9ImVuIj5TZWN0aW9uIDEwKGopIG9mIHRoZSBOYXRpb25hbCBMYWJvciBSZWxhdGlvbnMgQWN0IGF1dGhvcml6ZXMgdGhlIE5hdGlvbmFsIExhYm9yIFJlbGF0aW9ucyBCb2FyZCB0byBzZWVrIHRlbXBvcmFyeSBpbmp1bmN0aW9ucyBhZ2FpbnN0IGVtcGxveWVycyBhbmQgdW5pb25zIGluIGZlZGVyYWwgZGlzdHJpY3QgY291cnRzIHRvIHN0b3AgdW5mYWlyIGxhYm9yIHByYWN0aWNlcyB3aGlsZSB0aGUgY2FzZSBpcyBiZWluZyBsaXRpZ2F0ZWQgYmVmb3JlIGFkbWluaXN0cmF0aXZlIGxhdyBqdWRnZXMgYW5kIHRoZSBCb2FyZC4gVGhlc2UgdGVtcG9yYXJ5IGluanVuY3Rpb25zIGFyZSBuZWVkZWQgdG8gcHJvdGVjdCB0aGUgcHJvY2VzcyBvZiBjb2xsZWN0aXZlIGJhcmdhaW5pbmcgYW5kIGVtcGxveWVlIHJpZ2h0cyB1bmRlciB0aGUgQWN0LCBhbmQgdG8gZW5zdXJlIHRoYXQgQm9hcmQgZGVjaXNpb25zIHdpbGwgYmUgbWVhbmluZ2Z1bC4gVGhlIHNlY3Rpb24gd2FzIGFkZGVkIGFzIHBhcnQgb2YgYSBzZXQgb2YgcmVmb3JtcyB0byB0aGUgQWN0IGluIDE5NDcuIE92ZXIgdGhlIHllYXJzLCBhbGwgTkxSQiBHZW5lcmFsIENvdW5zZWxzIGhhdmUgbWFkZSB1c2Ugb2YgdGhpcyBlZmZlY3RpdmUgZW5mb3JjZW1lbnQgdG9vbCwgYXMgc2hvd24gaW4gdGhpcyBjaGFydC4mbHQ7YnImZ3Q7Jmx0O2JyJmd0O1RoZXJlIGFyZSAxNSBjYXRlZ29yaWVzIG9mIGxhYm9yIGRpc3B1dGVzIGluIHdoaWNoIFNlY3Rpb24gMTAoaikgaW5qdW5jdGlvbnMgbWF5IGJlIGFwcHJvcHJpYXRlLCBsaXN0ZWQgYXQgWyZsdDthIHRhcmdldD0iX2JsYW5rIiByZWw9Im5vZm9sbG93IiBocmVmPSJodHRwczovL3d3dy5ubHJiLmdvdi93aGF0LXdlLWRvL2ludmVzdGlnYXRlLWNoYXJnZXMvMTBqLWluanVuY3Rpb25zL3NlY3Rpb24tMTBqLWNhdGVnb3JpZXNdIiZndDtodHRwczovL3d3dy5ubHJiLmdvdi93aGF0LXdlLWRvL2ludmVzdGlnYXRlLWNoYXJnZXMvMTBqLWluanVuY3Rpb25zL3NlY3Rpb24tMTBqLWNhdGVnb3JpZXMmbHQ7L2EmZ3Q7XS4gVW5kZXIgTkxSQiBwcm9jZXNzZXMsIHBvdGVudGlhbCBjYXNlcyBhcmUgaWRlbnRpZmllZCBieSBSZWdpb25hbCBPZmZpY2VzIGFuZCByZXZpZXdlZCBieSB0aGUgR2VuZXJhbCBDb3Vuc2VsLCB3aG8gbXVzdCBzZWVrIGF1dGhvcml6YXRpb24gZnJvbSB0aGUgQm9hcmQgYmVmb3JlIHByb2NlZWRpbmcgdG8gY291cnQuICZsdDticiZndDsmbHQ7YnImZ3Q7VGhlIGNzdiBjb250YWlucyBBdXRob3JpemF0aW9uIERhdGVzLCBDYXNlIE51bWJlcnMsIENhc2UgTmFtZXMsIGFuZCBJbmp1bmN0aW9uIFN0YXR1cyBhcyBvZiB0aGUgZGF0ZSBjb2xsZWN0ZWQgKDIwMjUtMDQtMDcpLiBUaGlzIGxpc3QgaXMgYWxsIDEwKGopIGluanVuY3Rpb24gY2FzZXMgYXV0aG9yaXplZCBieSB0aGUgQm9hcmQgc2luY2UgU2VwdGVtYmVyIDEsIDIwMTAuICZsdDticiZndDs8L2Rlc2NyaXB0aW9uPgogIDwvZGVzY3JpcHRpb25zPgogIDxnZW9Mb2NhdGlvbnM+CiAgICA8Z2VvTG9jYXRpb24+CiAgICAgIDxnZW9Mb2NhdGlvblBsYWNlIHhtbG5zOnhzPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYSIgeHNpOnR5cGU9InhzOnN0cmluZyI+VW5pdGVkIFN0YXRlczwvZ2VvTG9jYXRpb25QbGFjZT4KICAgIDwvZ2VvTG9jYXRpb24+CiAgPC9nZW9Mb2NhdGlvbnM+CjwvcmVzb3VyY2U+', 'url': 'https://www.datalumos.org/datalumos/project/226824/version/V1/view', 'contentUrl': None, 'metadataVersion': 0, 'schemaVersion': 'http://datacite.org/schema/kernel-4', 'source': 'api', 'isActive': True, 'state': 'findable', 'reason': None, 'viewCount': 0, 'viewsOverTime': [], 'downloadCount': 0, 'downloadsOverTime': [], 'referenceCount': 0, 'citationCount': 0, 'citationsOverTime': [], 'partCount': 0, 'partOfCount': 0, 'versionCount': 0, 'versionOfCount': 1, 'created': '2025-04-15T15:51:51.000Z', 'registered': '2025-04-15T15:51:52.000Z', 'published': '2025', 'updated': '2025-04-15T15:51:52.000Z'}, 'relationships': {'client': {'data': {'id': 'gesis.icpsr', 'type': 'clients'}}, 'provider': {'data': {'id': 'icpsr', 'type': 'providers'}}, 'media': {'data': {'id': '10.3886/e226824v1', 'type': 'media'}}, 'references': {'data': []}, 'citations': {'data': []}, 'parts': {'data': []}, 'partOf': {'data': []}, 'versions': {'data': []}, 'versionOf': {'data': [{'id': '10.3886/e226824', 'type': 'dois'}]}}}}",10.3886,e226824v1,ICPSR - Interuniversity Consortium for Political and Social Research,2025.0,en,v1,https://www.datalumos.org/datalumos/project/226824/version/V1/view,,http://datacite.org/schema/kernel-4,api,True,findable,,0.0,2025-04-15T15:51:51.000Z,2025-04-15T15:51:52.000Z,2025,2025-04-15T15:51:52.000Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,DATA,misc,dataset,Dataset,Dataset,[],[],10(j) Injunctions,"[{'lang': 'en', 'title': '10(j) Injunctions'}]","[{'name': 'National Labor Relations Board', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}]",National Labor Relations Board,Organizational,[],,"[{'lang': 'en', 'subject': 'labor'}, {'lang': 'en', 'subject': 'labor unions'}, {'lang': 'en', 'subject': 'labor disputes'}, {'lang': 'en', 'subject': 'injunctions'}, {'lang': 'en', 'subject': 'collective bargaining'}, {'lang': 'en', 'subject': 'labor relations'}, {'lang': 'en', 'subject': 'unionization'}]",labor; labor unions; labor disputes; injunctions; collective bargaining; labor relations; unionization,"[{'date': '2010-01-01/2025-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}]",2010-01-01/2025-12-31,2025,"[{'lang': 'en', 'description': 'Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.<br><br>There are 15 categories of labor disputes in which Section 10(j) injunctions may be appropriate, listed at [https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions/section-10j-categories]. Under NLRB processes, potential cases are identified by Regional Offices and reviewed by the General Counsel, who must seek authorization from the Board before proceeding to court. <br><br>The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010. <br>', 'descriptionType': 'Abstract'}]","Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.<br><br>There are 15 categories of labor disputes in which Section 10(j) injunctions may be appropriate, listed at [https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions/section-10j-categories]. Under NLRB processes, potential cases are identified by Regional Offices and reviewed by the General Counsel, who must seek authorization from the Board before proceeding to court. <br><br>The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010. <br>",[{'geoLocationPlace': 'United States'}],United States,[],[],"[{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e226824', 'relatedIdentifierType': 'DOI'}]",10.3886/e226824,gesis.icpsr,icpsr,10.3886/e226824v1,10.3886/e226824,,,,,,
1,10.3886/e223443v1,True,200,"{'data': {'id': '10.3886/e223443v1', 'type': 'dois', 'attributes': {'doi': '10.3886/e223443v1', 'prefix': '10.3886', 'suffix': 'e223443v1', 'identifiers': [], 'alternateIdentifiers': [], 'creators': [{'name': 'United States Department Of Commerce. Minority Business Development Agency', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}], 'titles': [{'lang': 'en', 'title': '2022-2024 MBDA Grantees'}], 'publisher': 'ICPSR - Interuniversity Consortium for Political and Social Research', 'container': {}, 'publicationYear': 2025, 'subjects': [{'lang': 'en', 'subject': 'grants'}, {'lang': 'en', 'subject': 'minority businesses'}], 'contributors': [], 'dates': [{'date': '2022-01-01/2024-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}], 'language': 'en', 'types': {'ris': 'DATA', 'bibtex': 'misc', 'citeproc': 'dataset', 'schemaOrg': 'Dataset', 'resourceTypeGeneral': 'Dataset'}, 'relatedIdentifiers': [{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e223443', 'relatedIdentifierType': 'DOI'}], 'relatedItems': [], 'sizes': [], 'formats': [], 'version': 'v1', 'rightsList': [], 'descriptions': [{'lang': 'en', 'description': 'This dataset contains public information on grantees of the Minority Business Development Agency (MBDA) programs, covering grants awarded since 2022. It includes data on all grant-funded centers across all MBDA programs, providing information on each grantee’s including location, contact details, service area, and its associated MBDA program.<br><br>Additional information includes the status of each grantee (whether they are currently funded and operating), grant award identifiers, and a brief description of the grantees services and specialities. Detailed information is provided in the data schema provided below.<br><br>MBDA’s mission is to promote the growth and global competitiveness of Minority Business Enterprises (MBE) in order to unlock the country’s full economic potential. MBDA programs provide support for MBEs through a variety of services aimed at improving access to capital, contracts, and markets. These programs help entrepreneurs overcome barriers to success and expand their businesses by offering tailored technical assistance, business consulting, and access to networks.<br><br>Note: the original website is titled 2002-2024 grantees but the description refers to grants awarded since 2022<br>', 'descriptionType': 'Abstract'}], 'geoLocations': [{'geoLocationPlace': 'U.S. Territories'}, {'geoLocationPlace': 'U.S. States'}], 'fundingReferences': [], 'xml': 'PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9InllcyI/Pgo8cmVzb3VyY2UgeG1sbnM9Imh0dHA6Ly9kYXRhY2l0ZS5vcmcvc2NoZW1hL2tlcm5lbC00IiB4bWxuczp4c2k9Imh0dHA6Ly93d3cudzMub3JnLzIwMDEvWE1MU2NoZW1hLWluc3RhbmNlIiB4c2k6c2NoZW1hTG9jYXRpb249Imh0dHA6Ly9kYXRhY2l0ZS5vcmcvc2NoZW1hL2tlcm5lbC00IGh0dHBzOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC4zODg2L0UyMjM0NDNWMTwvaWRlbnRpZmllcj4KICA8Y3JlYXRvcnM+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJPcmdhbml6YXRpb25hbCI+VW5pdGVkIFN0YXRlcyBEZXBhcnRtZW50IG9mIENvbW1lcmNlLiBNaW5vcml0eSBCdXNpbmVzcyBEZXZlbG9wbWVudCBBZ2VuY3k8L2NyZWF0b3JOYW1lPgogICAgPC9jcmVhdG9yPgogIDwvY3JlYXRvcnM+CiAgPHRpdGxlcz4KICAgIDx0aXRsZSB4bWw6bGFuZz0iZW4iPjIwMjItMjAyNCBNQkRBIEdyYW50ZWVzPC90aXRsZT4KICA8L3RpdGxlcz4KICA8cHVibGlzaGVyPklDUFNSIC0gSW50ZXJ1bml2ZXJzaXR5IENvbnNvcnRpdW0gZm9yIFBvbGl0aWNhbCBhbmQgU29jaWFsIFJlc2VhcmNoPC9wdWJsaXNoZXI+CiAgPHB1YmxpY2F0aW9uWWVhcj4yMDI1PC9wdWJsaWNhdGlvblllYXI+CiAgPHJlc291cmNlVHlwZSByZXNvdXJjZVR5cGVHZW5lcmFsPSJEYXRhc2V0Ii8+CiAgPHN1YmplY3RzPgogICAgPHN1YmplY3QgeG1sOmxhbmc9ImVuIj5ncmFudHM8L3N1YmplY3Q+CiAgICA8c3ViamVjdCB4bWw6bGFuZz0iZW4iPm1pbm9yaXR5IGJ1c2luZXNzZXM8L3N1YmplY3Q+CiAgPC9zdWJqZWN0cz4KICA8ZGF0ZXM+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iQ29sbGVjdGVkIj4yMDIyLTAxLTAxLzIwMjQtMTItMzE8L2RhdGU+CiAgPC9kYXRlcz4KICA8bGFuZ3VhZ2U+ZW5nPC9sYW5ndWFnZT4KICA8cmVsYXRlZElkZW50aWZpZXJzPgogICAgPHJlbGF0ZWRJZGVudGlmaWVyIHJlbGF0ZWRJZGVudGlmaWVyVHlwZT0iRE9JIiByZWxhdGlvblR5cGU9IklzVmVyc2lvbk9mIj4xMC4zODg2L0UyMjM0NDM8L3JlbGF0ZWRJZGVudGlmaWVyPgogIDwvcmVsYXRlZElkZW50aWZpZXJzPgogIDx2ZXJzaW9uPnYxPC92ZXJzaW9uPgogIDxkZXNjcmlwdGlvbnM+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJBYnN0cmFjdCIgeG1sOmxhbmc9ImVuIj5UaGlzIGRhdGFzZXQgY29udGFpbnMgcHVibGljIGluZm9ybWF0aW9uIG9uIGdyYW50ZWVzIG9mIHRoZSBNaW5vcml0eSBCdXNpbmVzcyBEZXZlbG9wbWVudCBBZ2VuY3kgKE1CREEpIHByb2dyYW1zLCBjb3ZlcmluZyBncmFudHMgYXdhcmRlZCBzaW5jZSAyMDIyLiBJdCBpbmNsdWRlcyBkYXRhIG9uIGFsbCBncmFudC1mdW5kZWQgY2VudGVycyBhY3Jvc3MgYWxsIE1CREEgcHJvZ3JhbXMsIHByb3ZpZGluZyBpbmZvcm1hdGlvbiBvbiBlYWNoIGdyYW50ZWXigJlzIGluY2x1ZGluZyBsb2NhdGlvbiwgY29udGFjdCBkZXRhaWxzLCBzZXJ2aWNlIGFyZWEsIGFuZCBpdHMgYXNzb2NpYXRlZCBNQkRBIHByb2dyYW0uJmx0O2JyJmd0OyZsdDticiZndDtBZGRpdGlvbmFsIGluZm9ybWF0aW9uIGluY2x1ZGVzIHRoZSBzdGF0dXMgb2YgZWFjaCBncmFudGVlICh3aGV0aGVyIHRoZXkgYXJlIGN1cnJlbnRseSBmdW5kZWQgYW5kIG9wZXJhdGluZyksIGdyYW50IGF3YXJkIGlkZW50aWZpZXJzLCBhbmQgYSBicmllZiBkZXNjcmlwdGlvbiBvZiB0aGUgZ3JhbnRlZXMgc2VydmljZXMgYW5kIHNwZWNpYWxpdGllcy4gRGV0YWlsZWQgaW5mb3JtYXRpb24gaXMgcHJvdmlkZWQgaW4gdGhlIGRhdGEgc2NoZW1hIHByb3ZpZGVkIGJlbG93LiZsdDticiZndDsmbHQ7c3BhbiZndDsmbHQ7YnImZ3Q7TUJEQeKAmXMgbWlzc2lvbiBpcyB0byBwcm9tb3RlIHRoZSBncm93dGggYW5kIGdsb2JhbCBjb21wZXRpdGl2ZW5lc3Mgb2YgTWlub3JpdHkgQnVzaW5lc3MgRW50ZXJwcmlzZXMgKE1CRSkgaW4gb3JkZXIgdG8gdW5sb2NrIHRoZSBjb3VudHJ54oCZcyBmdWxsIGVjb25vbWljIHBvdGVudGlhbC4gTUJEQSBwcm9ncmFtcyBwcm92aWRlIHN1cHBvcnQgZm9yIE1CRXMgdGhyb3VnaCBhIHZhcmlldHkgb2Ygc2VydmljZXMgYWltZWQgYXQgaW1wcm92aW5nIGFjY2VzcyB0byBjYXBpdGFsLCBjb250cmFjdHMsIGFuZCBtYXJrZXRzLiBUaGVzZSBwcm9ncmFtcyBoZWxwIGVudHJlcHJlbmV1cnMgb3ZlcmNvbWUgYmFycmllcnMgdG8gc3VjY2VzcyBhbmQgZXhwYW5kIHRoZWlyIGJ1c2luZXNzZXMgYnkgb2ZmZXJpbmcgdGFpbG9yZWQgdGVjaG5pY2FsIGFzc2lzdGFuY2UsIGJ1c2luZXNzIGNvbnN1bHRpbmcsIGFuZCBhY2Nlc3MgdG8gbmV0d29ya3MuJmx0O2JyJmd0OyZsdDticiZndDtOb3RlOiB0aGUgb3JpZ2luYWwgd2Vic2l0ZSBpcyB0aXRsZWQgMjAwMi0yMDI0IGdyYW50ZWVzIGJ1dCB0aGUgZGVzY3JpcHRpb24gcmVmZXJzIHRvIGdyYW50cyBhd2FyZGVkIHNpbmNlIDIwMjImbHQ7YnImZ3Q7Jmx0Oy9zcGFuJmd0OzwvZGVzY3JpcHRpb24+CiAgPC9kZXNjcmlwdGlvbnM+CiAgPGdlb0xvY2F0aW9ucz4KICAgIDxnZW9Mb2NhdGlvbj4KICAgICAgPGdlb0xvY2F0aW9uUGxhY2UgeG1sbnM6eHM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDEvWE1MU2NoZW1hIiB4c2k6dHlwZT0ieHM6c3RyaW5nIj5VLlMuIFRlcnJpdG9yaWVzPC9nZW9Mb2NhdGlvblBsYWNlPgogICAgPC9nZW9Mb2NhdGlvbj4KICAgIDxnZW9Mb2NhdGlvbj4KICAgICAgPGdlb0xvY2F0aW9uUGxhY2UgeG1sbnM6eHM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDEvWE1MU2NoZW1hIiB4c2k6dHlwZT0ieHM6c3RyaW5nIj5VLlMuIFN0YXRlczwvZ2VvTG9jYXRpb25QbGFjZT4KICAgIDwvZ2VvTG9jYXRpb24+CiAgPC9nZW9Mb2NhdGlvbnM+CjwvcmVzb3VyY2U+', 'url': 'https://www.datalumos.org/datalumos/project/223443/version/V1/view', 'contentUrl': None, 'metadataVersion': 0, 'schemaVersion': 'http://datacite.org/schema/kernel-4', 'source': 'api', 'isActive': True, 'state': 'findable', 'reason': None, 'viewCount': 0, 'viewsOverTime': [], 'downloadCount': 0, 'downloadsOverTime': [], 'referenceCount': 0, 'citationCount': 0, 'citationsOverTime': [], 'partCount': 0, 'partOfCount': 0, 'versionCount': 0, 'versionOfCount': 1, 'created': '2025-03-19T15:17:50.000Z', 'registered': '2025-03-19T15:17:50.000Z', 'published': '2025', 'updated': '2025-03-19T15:17:51.000Z'}, 'relationships': {'client': {'data': {'id': 'gesis.icpsr', 'type': 'clients'}}, 'provider': {'data': {'id': 'icpsr', 'type': 'providers'}}, 'media': {'data': {'id': '10.3886/e223443v1', 'type': 'media'}}, 'references': {'data': []}, 'citations': {'data': []}, 'parts': {'data': []}, 'partOf': {'data': []}, 'versions': {'data': []}, 'versionOf': {'data': [{'id': '10.3886/e223443', 'type': 'dois'}]}}}}",10.3886,e223443v1,ICPSR - Interuniversity Consortium for Political and Social Research,2025.0,en,v1,https://www.datalumos.org/datalumos/project/223443/version/V1/view,,http://datacite.org/schema/kernel-4,api,True,findable,,0.0,2025-03-19T15:17:50.000Z,2025-03-19T15:17:50.000Z,2025,2025-03-19T15:17:51.000Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,DATA,misc,dataset,Dataset,Dataset,[],[],2022-2024 MBDA Grantees,"[{'lang': 'en', 'title': '2022-2024 MBDA Grantees'}]","[{'name': 'United States Department Of Commerce. Minority Business Development Agency', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}]",United States Department Of Commerce. Minority Business Development Agency,Organizational,[],,"[{'lang': 'en', 'subject': 'grants'}, {'lang': 'en', 'subject': 'minority businesses'}]",grants; minority businesses,"[{'date': '2022-01-01/2024-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}]",2022-01-01/2024-12-31,2025,"[{'lang': 'en', 'description': 'This dataset contains public information on grantees of the Minority Business Development Agency (MBDA) programs, covering grants awarded since 2022. It includes data on all grant-funded centers across all MBDA programs, providing information on each grantee’s including location, contact details, service area, and its associated MBDA program.<br><br>Additional information includes the status of each grantee (whether they are currently funded and operating), grant award identifiers, and a brief description of the grantees services and specialities. Detailed information is provided in the data schema provided below.<br><br>MBDA’s mission is to promote the growth and global competitiveness of Minority Business Enterprises (MBE) in order to unlock the country’s full economic potential. MBDA programs provide support for MBEs through a variety of services aimed at improving access to capital, contracts, and markets. These programs help entrepreneurs overcome barriers to success and expand their businesses by offering tailored technical assistance, business consulting, and access to networks.<br><br>Note: the original website is titled 2002-2024 grantees but the description refers to grants awarded since 2022<br>', 'descriptionType': 'Abstract'}]","This dataset contains public information on grantees of the Minority Business Development Agency (MBDA) programs, covering grants awarded since 2022. It includes data on all grant-funded centers across all MBDA programs, providing information on each grantee’s including location, contact details, service area, and its associated MBDA program.<br><br>Additional information includes the status of each grantee (whether they are currently funded and operating), grant award identifiers, and a brief description of the grantees services and specialities. Detailed information is provided in the data schema provided below.<br><br>MBDA’s mission is to promote the growth and global competitiveness of Minority Business Enterprises (MBE) in order to unlock the country’s full economic potential. MBDA programs provide support for MBEs through a variety of services aimed at improving access to capital, contracts, and markets. These programs help entrepreneurs overcome barriers to success and expand their businesses by offering tailored technical assistance, business consulting, and access to networks.<br><br>Note: the original website is titled 2002-2024 grantees but the description refers to grants awarded since 2022<br>","[{'geoLocationPlace': 'U.S. Territories'}, {'geoLocationPlace': 'U.S. States'}]",U.S. Territories; U.S. States,[],[],"[{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e223443', 'relatedIdentifierType': 'DOI'}]",10.3886/e223443,gesis.icpsr,icpsr,10.3886/e223443v1,10.3886/e223443,,,,,,
2,10.7910/dvn/f3a62w,True,200,"{'data': {'id': '10.7910/dvn/f3a62w', 'type': 'dois', 'attributes': {'doi': '10.7910/dvn/f3a62w', 'prefix': '10.7910', 'suffix': 'dvn/f3a62w', 'identifiers': [], 'alternateIdentifiers': [], 'creators': [{'name': 'EPA', 'nameType': 'Organizational', 'affiliation': ['U.S. EPA'], 'nameIdentifiers': []}], 'titles': [{'title': 'Extracted Data From: Downloadable 2006 IUR Public Database'}], 'publisher': 'Harvard Dataverse', 'container': {}, 'publicationYear': 2025, 'subjects': [{'subject': 'Chemistry'}, {'subject': 'Earth and Environmental Sciences'}, {'subject': 'Environmental Health', 'schemeUri': 'https://tools.niehs.nih.gov/cchhglossary/'}, {'subject': 'Exposure'}], 'contributors': [{'name': 'CAFE', 'nameType': 'Personal', 'affiliation': [], 'contributorType': 'ContactPerson', 'nameIdentifiers': []}], 'dates': [{'date': '2025-02-18', 'dateType': 'Submitted'}, {'date': '2025-02-26', 'dateType': 'Available'}, {'date': '2025-02-26', 'dateType': 'Updated'}, {'date': '2006-01-01/2006-12-31', 'dateType': 'Other', 'dateInformation': 'Time period covered by the data'}, {'date': '2025', 'dateType': 'Issued'}], 'language': None, 'types': {'ris': 'DATA', 'bibtex': 'misc', 'citeproc': 'dataset', 'schemaOrg': 'Dataset', 'resourceType': 'Extracted Data', 'resourceTypeGeneral': 'Dataset'}, 'relatedIdentifiers': [], 'relatedItems': [], 'sizes': ['2211081', '952281', '1528334'], 'formats': ['application/pdf', 'multipart/related', 'application/zip'], 'version': '1.0', 'rightsList': [{'rightsUri': 'info:eu-repo/semantics/openAccess'}, {'rights': 'Creative Commons Attribution Non Commercial Share Alike 4.0 International', 'rightsUri': 'https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode', 'schemeUri': 'https://spdx.org/licenses/', 'rightsIdentifier': 'cc-by-nc-sa-4.0', 'rightsIdentifierScheme': 'SPDX'}], 'descriptions': [{'description': ""This submission includes publicly available data extracted in its original form. Please reference the Related Publication listed here for source and citation information &lt;br /&gt;&lt;br /&gt;\n\nThe following file contains information reported to EPA under the 2006 Inventory Update Rule (IUR). Please note that no information claimed as TSCA Confidential Business Information by an IUR reporter is contained in this file.\n[https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database] \n&lt;br /&gt; &lt;br /&gt;\n\nIf you have questions about the underlying data stored here, please contact U.S. Environmental Protection Agency. TSCA-Hotline@epa.gov is said to answer questions on chemical data reporting requirements for the current version of the program that generated this historical data. It's unclear if they would have much information on this particular data set. But they might be able to redirect a query. If you have questions or recommendations related to this metadata entry and extracted data, please contact the CAFE Data Management team at: climatecafe@bu.edu.\n&lt;br /&gt;&lt;br /&gt;\n\nThis ACCDB [Access database] file is downloaded in compressed (ZIP) file format. After downloading the file to your preferred location, double-click on the file to extract the ACCDB file. This data requires the use of the database application program Microsoft Access [or, possibly, Microsoft SQL."", 'descriptionType': 'Abstract'}, {'description': 'This submission includes publicly available data extracted in its original form. Please reference the Related Publication listed here for source and citation information.\n\nDownloadable 2006 IUR Public Database data was downloaded by Anne Gunn\non 2025-02-18', 'descriptionType': 'Other'}, {'description': 'Microsoft Access, unknown', 'descriptionType': 'TechnicalInfo'}, {'description': 'The disclaimers below were copied as is from the EPA Disclaimers site (https://www.epa.gov/web-policies-and-procedures/epa-disclaimers): ""Disclaimer of Endorsement Mention of or referral to commercial products or services, and/or links to non-EPA sites does not imply official EPA endorsement of or responsibility for the opinions, ideas, data, or products presented at those locations, or guarantee the validity of the information provided. Mention of commercial products/services on non-EPA websites is provided solely as a pointer to information on topics related to environmental protection that may be useful to EPA staff and the public. Copyright Status The U.S. Government retains a nonexclusive, royalty-free license to publish or reproduce these documents, or allow others to do so, for U.S. Government purposes. These documents may be freely distributed and used for non-commercial, scientific and educational purposes. Commercial use of the documents available from the EPA websites may be protected under the U.S. and Foreign Copyright Laws. Individual documents on the EPA website may have different copyright conditions, and that will be noted in those documents. Disclaimer of Liability With respect to documents available from the EPA website, neither the United States Government nor any of their employees, makes any warranty, express or implied, including the warranties of merchantability and fitness for a particular purpose, or assumes any legal liability or responsibility for the accuracy, completeness, or usefulness of any information, apparatus, product, or process disclosed, or represents that its use would not infringe privately owned rights. Exit Disclaimer The Exit icon denotes that you are leaving the EPA website and entering an external link or a third-party site. EPA\'s official web site is www.epa.gov. EPA has provided this link because it provides additional information that may be useful or interesting and is being provided in a manner consistent with the intended purpose of the EPA website. Please note that EPA uses third-party sites to provide EPA content already on www.epa.gov in a different format. EPA cannot attest to the accuracy of non-EPA information provided by these third-party sites or any other linked site. EPA is providing these links for your reference. In doing so, EPA does not endorse any non-government websites, companies or applications. Also, please be aware that the privacy protection provided on the EPA.gov domain (EPA Privacy and Security Notice) does not apply to these third-party sites. To learn more about EPA and social media, please refer to&amp; our social media page. Notice Information from the EPA website resides on numerous computer systems funded by the Agency. The use of the EPA websites may be monitored for computer security purposes. Any unauthorized access to the EPA website is prohibited and is subject to criminal and civil penalties under federal laws including, but not limited to, Public Law 99-474, the Computer Fraud and Abuse Act of 1986. Terms of Use for Geospatial Data These geospatial data and corresponding cartographic materials have been approved for use by the U.S. Environmental Protection Agency (EPA) as advised by the EPA Geospatial Advisory Committee (EGAC). This approved release is on the condition that neither the EPA nor the U.S. Government may be held liable for any damages resulting from its authorized or unauthorized use. These data and any corresponding products, services, or materials do not necessarily represent the EPA’s official position or viewpoint, expressed or implied. These content items are not intended for use in establishing liability or calculating cost recovery statutes of limitations. They cannot be relied upon to create any rights, substantive or procedural, enforceable by any party in litigation with the United States or third parties. Additionally, although these data have been processed successfully on EPA computer systems, no warranty expressed or implied can be made regarding the accuracy or utility of the data on any other system or for general or scientific purposes, nor shall the act of distribution constitute any such warranty. The Agency reserves the right to revise EPA-stewarded datasets pursuant to further analysis and review without public notice. Unless otherwise specified, geospatial data produced by the EPA is by default in the public domain and is not subject to domestic copyright protection under 17 U.S.C. § 105. Referenced data from non-EPA sources are neither inherently verified nor independently tested by the Agency in all circumstances. Permission to reproduce copyrighted items not produced by the EPA must be secured from the copyright owner. EPA strongly recommends careful attention be paid to metadata files associated with these data to better understand limitations, restrictions or intended use. The U.S. EPA shall not be held liable for improper or incorrect use of the data.""', 'descriptionType': 'Methods'}], 'geoLocations': [], 'fundingReferences': [], 'xml': 'PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHJlc291cmNlIHhtbG5zPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCIgeG1sbnM6eHNpPSJodHRwOi8vd3d3LnczLm9yZy8yMDAxL1hNTFNjaGVtYS1pbnN0YW5jZSIgeHNpOnNjaGVtYUxvY2F0aW9uPSJodHRwOi8vZGF0YWNpdGUub3JnL3NjaGVtYS9rZXJuZWwtNCBodHRwOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjUvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC43OTEwL0RWTi9GM0E2Mlc8L2lkZW50aWZpZXI+CiAgPGNyZWF0b3JzPgogICAgPGNyZWF0b3I+CiAgICAgIDxjcmVhdG9yTmFtZSBuYW1lVHlwZT0iT3JnYW5pemF0aW9uYWwiPkVQQTwvY3JlYXRvck5hbWU+CiAgICAgIDxhZmZpbGlhdGlvbj5VLlMuIEVQQTwvYWZmaWxpYXRpb24+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlPkV4dHJhY3RlZCBEYXRhIEZyb206IERvd25sb2FkYWJsZSAyMDA2IElVUiBQdWJsaWMgRGF0YWJhc2U8L3RpdGxlPgogIDwvdGl0bGVzPgogIDxwdWJsaXNoZXI+SGFydmFyZCBEYXRhdmVyc2U8L3B1Ymxpc2hlcj4KICA8cHVibGljYXRpb25ZZWFyPjIwMjU8L3B1YmxpY2F0aW9uWWVhcj4KICA8c3ViamVjdHM+CiAgICA8c3ViamVjdD5DaGVtaXN0cnk8L3N1YmplY3Q+CiAgICA8c3ViamVjdD5FYXJ0aCBhbmQgRW52aXJvbm1lbnRhbCBTY2llbmNlczwvc3ViamVjdD4KICAgIDxzdWJqZWN0IHNjaGVtZVVSST0iaHR0cHM6Ly90b29scy5uaWVocy5uaWguZ292L2NjaGhnbG9zc2FyeS8iPkVudmlyb25tZW50YWwgSGVhbHRoPC9zdWJqZWN0PgogICAgPHN1YmplY3Q+RXhwb3N1cmU8L3N1YmplY3Q+CiAgPC9zdWJqZWN0cz4KICA8Y29udHJpYnV0b3JzPgogICAgPGNvbnRyaWJ1dG9yIGNvbnRyaWJ1dG9yVHlwZT0iQ29udGFjdFBlcnNvbiI+CiAgICAgIDxjb250cmlidXRvck5hbWUgbmFtZVR5cGU9IlBlcnNvbmFsIj5DQUZFPC9jb250cmlidXRvck5hbWU+CiAgICA8L2NvbnRyaWJ1dG9yPgogIDwvY29udHJpYnV0b3JzPgogIDxkYXRlcz4KICAgIDxkYXRlIGRhdGVUeXBlPSJTdWJtaXR0ZWQiPjIwMjUtMDItMTg8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iQXZhaWxhYmxlIj4yMDI1LTAyLTI2PC9kYXRlPgogICAgPGRhdGUgZGF0ZVR5cGU9IlVwZGF0ZWQiPjIwMjUtMDItMjY8L2RhdGU+CiAgICA8ZGF0ZSBkYXRlVHlwZT0iT3RoZXIiIGRhdGVJbmZvcm1hdGlvbj0iVGltZSBwZXJpb2QgY292ZXJlZCBieSB0aGUgZGF0YSI+MjAwNi0wMS0wMS8yMDA2LTEyLTMxPC9kYXRlPgogIDwvZGF0ZXM+CiAgPHJlc291cmNlVHlwZSByZXNvdXJjZVR5cGVHZW5lcmFsPSJEYXRhc2V0Ij5FeHRyYWN0ZWQgRGF0YTwvcmVzb3VyY2VUeXBlPgogIDxzaXplcz4KICAgIDxzaXplPjIyMTEwODE8L3NpemU+CiAgICA8c2l6ZT45NTIyODE8L3NpemU+CiAgICA8c2l6ZT4xNTI4MzM0PC9zaXplPgogIDwvc2l6ZXM+CiAgPGZvcm1hdHM+CiAgICA8Zm9ybWF0PmFwcGxpY2F0aW9uL3BkZjwvZm9ybWF0PgogICAgPGZvcm1hdD5tdWx0aXBhcnQvcmVsYXRlZDwvZm9ybWF0PgogICAgPGZvcm1hdD5hcHBsaWNhdGlvbi96aXA8L2Zvcm1hdD4KICA8L2Zvcm1hdHM+CiAgPHZlcnNpb24+MS4wPC92ZXJzaW9uPgogIDxyaWdodHNMaXN0PgogICAgPHJpZ2h0cyByaWdodHNVUkk9ImluZm86ZXUtcmVwby9zZW1hbnRpY3Mvb3BlbkFjY2VzcyIvPgogICAgPHJpZ2h0cyByaWdodHNVUkk9Imh0dHA6Ly9jcmVhdGl2ZWNvbW1vbnMub3JnL2xpY2Vuc2VzL2J5LW5jLXNhLzQuMCI+Q0MgQlktTkMtU0EgNC4wPC9yaWdodHM+CiAgPC9yaWdodHNMaXN0PgogIDxkZXNjcmlwdGlvbnM+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJBYnN0cmFjdCI+VGhpcyBzdWJtaXNzaW9uIGluY2x1ZGVzIHB1YmxpY2x5IGF2YWlsYWJsZSBkYXRhIGV4dHJhY3RlZCBpbiBpdHMgb3JpZ2luYWwgZm9ybS4gUGxlYXNlIHJlZmVyZW5jZSB0aGUgUmVsYXRlZCBQdWJsaWNhdGlvbiBsaXN0ZWQgaGVyZSBmb3Igc291cmNlIGFuZCBjaXRhdGlvbiBpbmZvcm1hdGlvbiAmYW1wO2x0O2JyIC8mYW1wO2d0OyZhbXA7bHQ7YnIgLyZhbXA7Z3Q7CgpUaGUgZm9sbG93aW5nIGZpbGUgY29udGFpbnMgaW5mb3JtYXRpb24gcmVwb3J0ZWQgdG8gRVBBIHVuZGVyIHRoZSAyMDA2IEludmVudG9yeSBVcGRhdGUgUnVsZSAoSVVSKS4gUGxlYXNlIG5vdGUgdGhhdCBubyBpbmZvcm1hdGlvbiBjbGFpbWVkIGFzIFRTQ0EgQ29uZmlkZW50aWFsIEJ1c2luZXNzIEluZm9ybWF0aW9uIGJ5IGFuIElVUiByZXBvcnRlciBpcyBjb250YWluZWQgaW4gdGhpcyBmaWxlLgpbaHR0cHM6Ly93d3cuZXBhLmdvdi9jaGVtaWNhbC1kYXRhLXJlcG9ydGluZy9kb3dubG9hZGFibGUtMjAwNi1pdXItcHVibGljLWRhdGFiYXNlXSAKJmFtcDtsdDticiAvJmFtcDtndDsgJmFtcDtsdDticiAvJmFtcDtndDsKCklmIHlvdSBoYXZlIHF1ZXN0aW9ucyBhYm91dCB0aGUgdW5kZXJseWluZyBkYXRhIHN0b3JlZCBoZXJlLCBwbGVhc2UgY29udGFjdCBVLlMuIEVudmlyb25tZW50YWwgUHJvdGVjdGlvbiBBZ2VuY3kuIFRTQ0EtSG90bGluZUBlcGEuZ292IGlzIHNhaWQgdG8gYW5zd2VyIHF1ZXN0aW9ucyBvbiBjaGVtaWNhbCBkYXRhIHJlcG9ydGluZyByZXF1aXJlbWVudHMgZm9yIHRoZSBjdXJyZW50IHZlcnNpb24gb2YgdGhlIHByb2dyYW0gdGhhdCBnZW5lcmF0ZWQgdGhpcyBoaXN0b3JpY2FsIGRhdGEuIEl0JmFtcDthcG9zO3MgdW5jbGVhciBpZiB0aGV5IHdvdWxkIGhhdmUgbXVjaCBpbmZvcm1hdGlvbiBvbiB0aGlzIHBhcnRpY3VsYXIgZGF0YSBzZXQuIEJ1dCB0aGV5IG1pZ2h0IGJlIGFibGUgdG8gcmVkaXJlY3QgYSBxdWVyeS4gSWYgeW91IGhhdmUgcXVlc3Rpb25zIG9yIHJlY29tbWVuZGF0aW9ucyByZWxhdGVkIHRvIHRoaXMgbWV0YWRhdGEgZW50cnkgYW5kIGV4dHJhY3RlZCBkYXRhLCBwbGVhc2UgY29udGFjdCB0aGUgQ0FGRSBEYXRhIE1hbmFnZW1lbnQgdGVhbSBhdDogY2xpbWF0ZWNhZmVAYnUuZWR1LgomYW1wO2x0O2JyIC8mYW1wO2d0OyZhbXA7bHQ7YnIgLyZhbXA7Z3Q7CgpUaGlzIEFDQ0RCIFtBY2Nlc3MgZGF0YWJhc2VdIGZpbGUgaXMgZG93bmxvYWRlZCBpbiBjb21wcmVzc2VkIChaSVApIGZpbGUgZm9ybWF0LiBBZnRlciBkb3dubG9hZGluZyB0aGUgZmlsZSB0byB5b3VyIHByZWZlcnJlZCBsb2NhdGlvbiwgZG91YmxlLWNsaWNrIG9uIHRoZSBmaWxlIHRvIGV4dHJhY3QgdGhlIEFDQ0RCIGZpbGUuIFRoaXMgZGF0YSByZXF1aXJlcyB0aGUgdXNlIG9mIHRoZSBkYXRhYmFzZSBhcHBsaWNhdGlvbiBwcm9ncmFtIE1pY3Jvc29mdCBBY2Nlc3MgW29yLCBwb3NzaWJseSwgTWljcm9zb2Z0IFNRTC48L2Rlc2NyaXB0aW9uPgogICAgPGRlc2NyaXB0aW9uIGRlc2NyaXB0aW9uVHlwZT0iT3RoZXIiPlRoaXMgc3VibWlzc2lvbiBpbmNsdWRlcyBwdWJsaWNseSBhdmFpbGFibGUgZGF0YSBleHRyYWN0ZWQgaW4gaXRzIG9yaWdpbmFsIGZvcm0uIFBsZWFzZSByZWZlcmVuY2UgdGhlIFJlbGF0ZWQgUHVibGljYXRpb24gbGlzdGVkIGhlcmUgZm9yIHNvdXJjZSBhbmQgY2l0YXRpb24gaW5mb3JtYXRpb24uCgpEb3dubG9hZGFibGUgMjAwNiBJVVIgUHVibGljIERhdGFiYXNlIGRhdGEgd2FzIGRvd25sb2FkZWQgYnkgQW5uZSBHdW5uCm9uIDIwMjUtMDItMTg8L2Rlc2NyaXB0aW9uPgogICAgPGRlc2NyaXB0aW9uIGRlc2NyaXB0aW9uVHlwZT0iVGVjaG5pY2FsSW5mbyI+TWljcm9zb2Z0IEFjY2VzcywgdW5rbm93bjwvZGVzY3JpcHRpb24+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJNZXRob2RzIj5UaGUgZGlzY2xhaW1lcnMgYmVsb3cgd2VyZSBjb3BpZWQgYXMgaXMgZnJvbSB0aGUgRVBBIERpc2NsYWltZXJzIHNpdGUgKGh0dHBzOi8vd3d3LmVwYS5nb3Yvd2ViLXBvbGljaWVzLWFuZC1wcm9jZWR1cmVzL2VwYS1kaXNjbGFpbWVycyk6ICJEaXNjbGFpbWVyIG9mIEVuZG9yc2VtZW50IE1lbnRpb24gb2Ygb3IgcmVmZXJyYWwgdG8gY29tbWVyY2lhbCBwcm9kdWN0cyBvciBzZXJ2aWNlcywgYW5kL29yIGxpbmtzIHRvIG5vbi1FUEEgc2l0ZXMgZG9lcyBub3QgaW1wbHkgb2ZmaWNpYWwgRVBBIGVuZG9yc2VtZW50IG9mIG9yIHJlc3BvbnNpYmlsaXR5IGZvciB0aGUgb3BpbmlvbnMsIGlkZWFzLCBkYXRhLCBvciBwcm9kdWN0cyBwcmVzZW50ZWQgYXQgdGhvc2UgbG9jYXRpb25zLCBvciBndWFyYW50ZWUgdGhlIHZhbGlkaXR5IG9mIHRoZSBpbmZvcm1hdGlvbiBwcm92aWRlZC4gTWVudGlvbiBvZiBjb21tZXJjaWFsIHByb2R1Y3RzL3NlcnZpY2VzIG9uIG5vbi1FUEEgd2Vic2l0ZXMgaXMgcHJvdmlkZWQgc29sZWx5IGFzIGEgcG9pbnRlciB0byBpbmZvcm1hdGlvbiBvbiB0b3BpY3MgcmVsYXRlZCB0byBlbnZpcm9ubWVudGFsIHByb3RlY3Rpb24gdGhhdCBtYXkgYmUgdXNlZnVsIHRvIEVQQSBzdGFmZiBhbmQgdGhlIHB1YmxpYy4gQ29weXJpZ2h0IFN0YXR1cyBUaGUgVS5TLiBHb3Zlcm5tZW50IHJldGFpbnMgYSBub25leGNsdXNpdmUsIHJveWFsdHktZnJlZSBsaWNlbnNlIHRvIHB1Ymxpc2ggb3IgcmVwcm9kdWNlIHRoZXNlIGRvY3VtZW50cywgb3IgYWxsb3cgb3RoZXJzIHRvIGRvIHNvLCBmb3IgVS5TLiBHb3Zlcm5tZW50IHB1cnBvc2VzLiBUaGVzZSBkb2N1bWVudHMgbWF5IGJlIGZyZWVseSBkaXN0cmlidXRlZCBhbmQgdXNlZCBmb3Igbm9uLWNvbW1lcmNpYWwsIHNjaWVudGlmaWMgYW5kIGVkdWNhdGlvbmFsIHB1cnBvc2VzLiBDb21tZXJjaWFsIHVzZSBvZiB0aGUgZG9jdW1lbnRzIGF2YWlsYWJsZSBmcm9tIHRoZSBFUEEgd2Vic2l0ZXMgbWF5IGJlIHByb3RlY3RlZCB1bmRlciB0aGUgVS5TLiBhbmQgRm9yZWlnbiBDb3B5cmlnaHQgTGF3cy4gSW5kaXZpZHVhbCBkb2N1bWVudHMgb24gdGhlIEVQQSB3ZWJzaXRlIG1heSBoYXZlIGRpZmZlcmVudCBjb3B5cmlnaHQgY29uZGl0aW9ucywgYW5kIHRoYXQgd2lsbCBiZSBub3RlZCBpbiB0aG9zZSBkb2N1bWVudHMuIERpc2NsYWltZXIgb2YgTGlhYmlsaXR5IFdpdGggcmVzcGVjdCB0byBkb2N1bWVudHMgYXZhaWxhYmxlIGZyb20gdGhlIEVQQSB3ZWJzaXRlLCBuZWl0aGVyIHRoZSBVbml0ZWQgU3RhdGVzIEdvdmVybm1lbnQgbm9yIGFueSBvZiB0aGVpciBlbXBsb3llZXMsIG1ha2VzIGFueSB3YXJyYW50eSwgZXhwcmVzcyBvciBpbXBsaWVkLCBpbmNsdWRpbmcgdGhlIHdhcnJhbnRpZXMgb2YgbWVyY2hhbnRhYmlsaXR5IGFuZCBmaXRuZXNzIGZvciBhIHBhcnRpY3VsYXIgcHVycG9zZSwgb3IgYXNzdW1lcyBhbnkgbGVnYWwgbGlhYmlsaXR5IG9yIHJlc3BvbnNpYmlsaXR5IGZvciB0aGUgYWNjdXJhY3ksIGNvbXBsZXRlbmVzcywgb3IgdXNlZnVsbmVzcyBvZiBhbnkgaW5mb3JtYXRpb24sIGFwcGFyYXR1cywgcHJvZHVjdCwgb3IgcHJvY2VzcyBkaXNjbG9zZWQsIG9yIHJlcHJlc2VudHMgdGhhdCBpdHMgdXNlIHdvdWxkIG5vdCBpbmZyaW5nZSBwcml2YXRlbHkgb3duZWQgcmlnaHRzLiBFeGl0IERpc2NsYWltZXIgVGhlIEV4aXQgaWNvbiBkZW5vdGVzIHRoYXQgeW91IGFyZSBsZWF2aW5nIHRoZSBFUEEgd2Vic2l0ZSBhbmQgZW50ZXJpbmcgYW4gZXh0ZXJuYWwgbGluayBvciBhIHRoaXJkLXBhcnR5IHNpdGUuIEVQQSdzIG9mZmljaWFsIHdlYiBzaXRlIGlzIHd3dy5lcGEuZ292LiBFUEEgaGFzIHByb3ZpZGVkIHRoaXMgbGluayBiZWNhdXNlIGl0IHByb3ZpZGVzIGFkZGl0aW9uYWwgaW5mb3JtYXRpb24gdGhhdCBtYXkgYmUgdXNlZnVsIG9yIGludGVyZXN0aW5nIGFuZCBpcyBiZWluZyBwcm92aWRlZCBpbiBhIG1hbm5lciBjb25zaXN0ZW50IHdpdGggdGhlIGludGVuZGVkIHB1cnBvc2Ugb2YgdGhlIEVQQSB3ZWJzaXRlLiBQbGVhc2Ugbm90ZSB0aGF0IEVQQSB1c2VzIHRoaXJkLXBhcnR5IHNpdGVzIHRvIHByb3ZpZGUgRVBBIGNvbnRlbnQgYWxyZWFkeSBvbiB3d3cuZXBhLmdvdiBpbiBhIGRpZmZlcmVudCBmb3JtYXQuIEVQQSBjYW5ub3QgYXR0ZXN0IHRvIHRoZSBhY2N1cmFjeSBvZiBub24tRVBBIGluZm9ybWF0aW9uIHByb3ZpZGVkIGJ5IHRoZXNlIHRoaXJkLXBhcnR5IHNpdGVzIG9yIGFueSBvdGhlciBsaW5rZWQgc2l0ZS4gRVBBIGlzIHByb3ZpZGluZyB0aGVzZSBsaW5rcyBmb3IgeW91ciByZWZlcmVuY2UuIEluIGRvaW5nIHNvLCBFUEEgZG9lcyBub3QgZW5kb3JzZSBhbnkgbm9uLWdvdmVybm1lbnQgd2Vic2l0ZXMsIGNvbXBhbmllcyBvciBhcHBsaWNhdGlvbnMuIEFsc28sIHBsZWFzZSBiZSBhd2FyZSB0aGF0IHRoZSBwcml2YWN5IHByb3RlY3Rpb24gcHJvdmlkZWQgb24gdGhlIEVQQS5nb3YgZG9tYWluIChFUEEgUHJpdmFjeSBhbmQgU2VjdXJpdHkgTm90aWNlKSBkb2VzIG5vdCBhcHBseSB0byB0aGVzZSB0aGlyZC1wYXJ0eSBzaXRlcy4gVG8gbGVhcm4gbW9yZSBhYm91dCBFUEEgYW5kIHNvY2lhbCBtZWRpYSwgcGxlYXNlIHJlZmVyIHRvJmFtcDsgb3VyIHNvY2lhbCBtZWRpYSBwYWdlLiBOb3RpY2UgSW5mb3JtYXRpb24gZnJvbSB0aGUgRVBBIHdlYnNpdGUgcmVzaWRlcyBvbiBudW1lcm91cyBjb21wdXRlciBzeXN0ZW1zIGZ1bmRlZCBieSB0aGUgQWdlbmN5LiBUaGUgdXNlIG9mIHRoZSBFUEEgd2Vic2l0ZXMgbWF5IGJlIG1vbml0b3JlZCBmb3IgY29tcHV0ZXIgc2VjdXJpdHkgcHVycG9zZXMuIEFueSB1bmF1dGhvcml6ZWQgYWNjZXNzIHRvIHRoZSBFUEEgd2Vic2l0ZSBpcyBwcm9oaWJpdGVkIGFuZCBpcyBzdWJqZWN0IHRvIGNyaW1pbmFsIGFuZCBjaXZpbCBwZW5hbHRpZXMgdW5kZXIgZmVkZXJhbCBsYXdzIGluY2x1ZGluZywgYnV0IG5vdCBsaW1pdGVkIHRvLCBQdWJsaWMgTGF3IDk5LTQ3NCwgdGhlIENvbXB1dGVyIEZyYXVkIGFuZCBBYnVzZSBBY3Qgb2YgMTk4Ni4gVGVybXMgb2YgVXNlIGZvciBHZW9zcGF0aWFsIERhdGEgVGhlc2UgZ2Vvc3BhdGlhbCBkYXRhIGFuZCBjb3JyZXNwb25kaW5nIGNhcnRvZ3JhcGhpYyBtYXRlcmlhbHMgaGF2ZSBiZWVuIGFwcHJvdmVkIGZvciB1c2UgYnkgdGhlIFUuUy4gRW52aXJvbm1lbnRhbCBQcm90ZWN0aW9uIEFnZW5jeSAoRVBBKSBhcyBhZHZpc2VkIGJ5IHRoZSBFUEEgR2Vvc3BhdGlhbCBBZHZpc29yeSBDb21taXR0ZWUgKEVHQUMpLiBUaGlzIGFwcHJvdmVkIHJlbGVhc2UgaXMgb24gdGhlIGNvbmRpdGlvbiB0aGF0IG5laXRoZXIgdGhlIEVQQSBub3IgdGhlIFUuUy4gR292ZXJubWVudCBtYXkgYmUgaGVsZCBsaWFibGUgZm9yIGFueSBkYW1hZ2VzIHJlc3VsdGluZyBmcm9tIGl0cyBhdXRob3JpemVkIG9yIHVuYXV0aG9yaXplZCB1c2UuIFRoZXNlIGRhdGEgYW5kIGFueSBjb3JyZXNwb25kaW5nIHByb2R1Y3RzLCBzZXJ2aWNlcywgb3IgbWF0ZXJpYWxzIGRvIG5vdCBuZWNlc3NhcmlseSByZXByZXNlbnQgdGhlIEVQQeKAmXMgb2ZmaWNpYWwgcG9zaXRpb24gb3Igdmlld3BvaW50LCBleHByZXNzZWQgb3IgaW1wbGllZC4gVGhlc2UgY29udGVudCBpdGVtcyBhcmUgbm90IGludGVuZGVkIGZvciB1c2UgaW4gZXN0YWJsaXNoaW5nIGxpYWJpbGl0eSBvciBjYWxjdWxhdGluZyBjb3N0IHJlY292ZXJ5IHN0YXR1dGVzIG9mIGxpbWl0YXRpb25zLiBUaGV5IGNhbm5vdCBiZSByZWxpZWQgdXBvbiB0byBjcmVhdGUgYW55IHJpZ2h0cywgc3Vic3RhbnRpdmUgb3IgcHJvY2VkdXJhbCwgZW5mb3JjZWFibGUgYnkgYW55IHBhcnR5IGluIGxpdGlnYXRpb24gd2l0aCB0aGUgVW5pdGVkIFN0YXRlcyBvciB0aGlyZCBwYXJ0aWVzLiBBZGRpdGlvbmFsbHksIGFsdGhvdWdoIHRoZXNlIGRhdGEgaGF2ZSBiZWVuIHByb2Nlc3NlZCBzdWNjZXNzZnVsbHkgb24gRVBBIGNvbXB1dGVyIHN5c3RlbXMsIG5vIHdhcnJhbnR5IGV4cHJlc3NlZCBvciBpbXBsaWVkIGNhbiBiZSBtYWRlIHJlZ2FyZGluZyB0aGUgYWNjdXJhY3kgb3IgdXRpbGl0eSBvZiB0aGUgZGF0YSBvbiBhbnkgb3RoZXIgc3lzdGVtIG9yIGZvciBnZW5lcmFsIG9yIHNjaWVudGlmaWMgcHVycG9zZXMsIG5vciBzaGFsbCB0aGUgYWN0IG9mIGRpc3RyaWJ1dGlvbiBjb25zdGl0dXRlIGFueSBzdWNoIHdhcnJhbnR5LiBUaGUgQWdlbmN5IHJlc2VydmVzIHRoZSByaWdodCB0byByZXZpc2UgRVBBLXN0ZXdhcmRlZCBkYXRhc2V0cyBwdXJzdWFudCB0byBmdXJ0aGVyIGFuYWx5c2lzIGFuZCByZXZpZXcgd2l0aG91dCBwdWJsaWMgbm90aWNlLiBVbmxlc3Mgb3RoZXJ3aXNlIHNwZWNpZmllZCwgZ2Vvc3BhdGlhbCBkYXRhIHByb2R1Y2VkIGJ5IHRoZSBFUEEgaXMgYnkgZGVmYXVsdCBpbiB0aGUgcHVibGljIGRvbWFpbiBhbmQgaXMgbm90IHN1YmplY3QgdG8gZG9tZXN0aWMgY29weXJpZ2h0IHByb3RlY3Rpb24gdW5kZXIgMTcgVS5TLkMuIMKnIDEwNS4gUmVmZXJlbmNlZCBkYXRhIGZyb20gbm9uLUVQQSBzb3VyY2VzIGFyZSBuZWl0aGVyIGluaGVyZW50bHkgdmVyaWZpZWQgbm9yIGluZGVwZW5kZW50bHkgdGVzdGVkIGJ5IHRoZSBBZ2VuY3kgaW4gYWxsIGNpcmN1bXN0YW5jZXMuIFBlcm1pc3Npb24gdG8gcmVwcm9kdWNlIGNvcHlyaWdodGVkIGl0ZW1zIG5vdCBwcm9kdWNlZCBieSB0aGUgRVBBIG11c3QgYmUgc2VjdXJlZCBmcm9tIHRoZSBjb3B5cmlnaHQgb3duZXIuIEVQQSBzdHJvbmdseSByZWNvbW1lbmRzIGNhcmVmdWwgYXR0ZW50aW9uIGJlIHBhaWQgdG8gbWV0YWRhdGEgZmlsZXMgYXNzb2NpYXRlZCB3aXRoIHRoZXNlIGRhdGEgdG8gYmV0dGVyIHVuZGVyc3RhbmQgbGltaXRhdGlvbnMsIHJlc3RyaWN0aW9ucyBvciBpbnRlbmRlZCB1c2UuIFRoZSBVLlMuIEVQQSBzaGFsbCBub3QgYmUgaGVsZCBsaWFibGUgZm9yIGltcHJvcGVyIG9yIGluY29ycmVjdCB1c2Ugb2YgdGhlIGRhdGEuIjwvZGVzY3JpcHRpb24+CiAgPC9kZXNjcmlwdGlvbnM+CiAgPGdlb0xvY2F0aW9ucz4KICAgIDxnZW9Mb2NhdGlvbi8+CiAgPC9nZW9Mb2NhdGlvbnM+CjwvcmVzb3VyY2U+', 'url': 'https://dataverse.harvard.edu/citation?persistentId=doi:10.7910/DVN/F3A62W', 'contentUrl': None, 'metadataVersion': 1, 'schemaVersion': 'http://datacite.org/schema/kernel-4', 'source': 'mds', 'isActive': True, 'state': 'findable', 'reason': None, 'viewCount': 56, 'viewsOverTime': [{'yearMonth': '2025-03', 'total': 8}, {'yearMonth': '2025-04', 'total': 2}, {'yearMonth': '2025-05', 'total': 10}, {'yearMonth': '2025-06', 'total': 11}, {'yearMonth': '2025-07', 'total': 7}, {'yearMonth': '2025-08', 'total': 16}, {'yearMonth': '2025-09', 'total': 2}], 'downloadCount': 0, 'downloadsOverTime': [], 'referenceCount': 0, 'citationCount': 0, 'citationsOverTime': [], 'partCount': 0, 'partOfCount': 0, 'versionCount': 0, 'versionOfCount': 0, 'created': '2025-02-18T13:10:34.000Z', 'registered': '2025-02-26T21:41:18.000Z', 'published': '2025', 'updated': '2025-05-11T23:42:01.000Z'}, 'relationships': {'client': {'data': {'id': 'gdcc.harvard-dv', 'type': 'clients'}}, 'provider': {'data': {'id': 'harvardu', 'type': 'providers'}}, 'media': {'data': {'id': '10.7910/dvn/f3a62w', 'type': 'media'}}, 'references': {'data': []}, 'citations': {'data': []}, 'parts': {'data': []}, 'partOf': {'data': []}, 'versions': {'data': []}, 'versionOf': {'data': []}}}}",10.791,dvn/f3a62w,Harvard Dataverse,2025.0,,1.0,https://dataverse.harvard.edu/citation?persistentId=doi:10.7910/DVN/F3A62W,,http://datacite.org/schema/kernel-4,mds,True,findable,,1.0,2025-02-18T13:10:34.000Z,2025-02-26T21:41:18.000Z,2025,2025-05-11T23:42:01.000Z,56.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DATA,misc,dataset,Dataset,Dataset,[],[],Extracted Data From: Downloadable 2006 IUR Public Database,[{'title': 'Extracted Data From: Downloadable 2006 IUR Public Database'}],"[{'name': 'EPA', 'nameType': 'Organizational', 'affiliation': ['U.S. EPA'], 'nameIdentifiers': []}]",EPA,Organizational,"[{'name': 'CAFE', 'nameType': 'Personal', 'affiliation': [], 'contributorType': 'ContactPerson', 'nameIdentifiers': []}]",CAFE,"[{'subject': 'Chemistry'}, {'subject': 'Earth and Environmental Sciences'}, {'subject': 'Environmental Health', 'schemeUri': 'https://tools.niehs.nih.gov/cchhglossary/'}, {'subject': 'Exposure'}]",Chemistry; Earth and Environmental Sciences; Environmental Health; Exposure,"[{'date': '2025-02-18', 'dateType': 'Submitted'}, {'date': '2025-02-26', 'dateType': 'Available'}, {'date': '2025-02-26', 'dateType': 'Updated'}, {'date': '2006-01-01/2006-12-31', 'dateType': 'Other', 'dateInformation': 'Time period covered by the data'}, {'date': '2025', 'dateType': 'Issued'}]",,2025,"[{'description': 'This submission includes publicly available data extracted in its original form. Please reference the Related Publication listed here for source and citation information &lt;br /&gt;&lt;br /&gt; The following file contains information reported to EPA under the 2006 Inventory Update Rule (IUR). Please note that no information claimed as TSCA Confidential Business Information by an IUR reporter is contained in this file. [https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database] &lt;br /&gt; &lt;br /&gt; If you have questions about the underlying data stored here, please contact U.S. Environmental Protection Agency. TSCA-Hotline@epa.gov is said to answer questions on chemical data reporting requirements for the current version of the program that generated this historical data. It's unclear if they would have much information on this particular data set. But they might be able to redirect a query. If you have questions or recommendations related to this metadata entry and extracted data, please contact the CAFE Data Management team at: climatecafe@bu.edu. &lt;br /&gt;&lt;br /&gt; This ACCDB [Access database] file is downloaded in compressed (ZIP) file format. After downloading the file to your preferred location, double-click on the file to extract the ACCDB file. This data requires the use of the database application program Microsoft Access [or, possibly, Microsoft SQL.', 'descriptionType': 'Abstract'}, {'description': 'This submission includes publicly available data extracted in its original form. Please reference the Related Publication listed here for source and citation information. Downloadable 2006 IUR Public Database data was downloaded by Anne Gunn on 2025-02-18', 'descriptionType': 'Other'}, {'description': 'Microsoft Access, unknown', 'descriptionType': 'TechnicalInfo'}, {'description': 'The disclaimers below were copied as is from the EPA Disclaimers site (https://www.epa.gov/web-policies-and-procedures/epa-disclaimers): ""Disclaimer of Endorsement Mention of or referral to commercial products or services, and/or links to non-EPA sites does not imply official EPA endorsement of or responsibility for the opinions, ideas, data, or products presented at those locations, or guarantee the validity of the information provided. Mention of commercial products/services on non-EPA websites is provided solely as a pointer to information on topics related to environmental protection that may be useful to EPA staff and the public. Copyright Status The U.S. Government retains a nonexclusive, royalty-free license to publish or reproduce these documents, or allow others to do so, for U.S. Government purposes. These documents may be freely distributed and used for non-commercial, scientific and educational purposes. Commercial use of the documents available from the EPA websites may be protected under the U.S. and Foreign Copyright Laws. Individual documents on the EPA website may have different copyright conditions, and that will be noted in those documents. Disclaimer of Liability With respect to documents available from the EPA website, neither the United States Government nor any of their employees, makes any warranty, express or implied, including the warranties of merchantability and fitness for a particular purpose, or assumes any legal liability or responsibility for the accuracy, completeness, or usefulness of any information, apparatus, product, or process disclosed, or represents that its use would not infringe privately owned rights. Exit Disclaimer The Exit icon denotes that you are leaving the EPA website and entering an external link or a third-party site. EPA's official web site is www.epa.gov. EPA has provided this link because it provides additional information that may be useful or interesting and is being provided in a manner consistent with the intended purpose of the EPA website. Please note that EPA uses third-party sites to provide EPA content already on www.epa.gov in a different format. EPA cannot attest to the accuracy of non-EPA information provided by these third-party sites or any other linked site. EPA is providing these links for your reference. In doing so, EPA does not endorse any non-government websites, companies or applications. Also, please be aware that the privacy protection provided on the EPA.gov domain (EPA Privacy and Security Notice) does not apply to these third-party sites. To learn more about EPA and social media, please refer to&amp; our social media page. Notice Information from the EPA website resides on numerous computer systems funded by the Agency. The use of the EPA websites may be monitored for computer security purposes. Any unauthorized access to the EPA website is prohibited and is subject to criminal and civil penalties under federal laws including, but not limited to, Public Law 99-474, the Computer Fraud and Abuse Act of 1986. Terms of Use for Geospatial Data These geospatial data and corresponding cartographic materials have been approved for use by the U.S. Environmental Protection Agency (EPA) as advised by the EPA Geospatial Advisory Committee (EGAC). This approved release is on the condition that neither the EPA nor the U.S. Government may be held liable for any damages resulting from its authorized or unauthorized use. These data and any corresponding products, services, or materials do not necessarily represent the EPA’s official position or viewpoint, expressed or implied. These content items are not intended for use in establishing liability or calculating cost recovery statutes of limitations. They cannot be relied upon to create any rights, substantive or procedural, enforceable by any party in litigation with the United States or third parties. Additionally, although these data have been processed successfully on EPA computer systems, no warranty expressed or implied can be made regarding the accuracy or utility of the data on any other system or for general or scientific purposes, nor shall the act of distribution constitute any such warranty. The Agency reserves the right to revise EPA-stewarded datasets pursuant to further analysis and review without public notice. Unless otherwise specified, geospatial data produced by the EPA is by default in the public domain and is not subject to domestic copyright protection under 17 U.S.C. § 105. Referenced data from non-EPA sources are neither inherently verified nor independently tested by the Agency in all circumstances. Permission to reproduce copyrighted items not produced by the EPA must be secured from the copyright owner. EPA strongly recommends careful attention be paid to metadata files associated with these data to better understand limitations, restrictions or intended use. The U.S. EPA shall not be held liable for improper or incorrect use of the data.""', 'descriptionType': 'Methods'}]","This submission includes publicly available data extracted in its original form. Please reference the Related Publication listed here for source and citation information &lt;br /&gt;&lt;br /&gt;\n\nThe following file contains information reported to EPA under the 2006 Inventory Update Rule (IUR). Please note that no information claimed as TSCA Confidential Business Information by an IUR reporter is contained in this file.\n[https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database] \n&lt;br /&gt; &lt;br /&gt;\n\nIf you have questions about the underlying data stored here, please contact U.S. Environmental Protection Agency. TSCA-Hotline@epa.gov is said to answer questions on chemical data reporting requirements for the current version of the program that generated this historical data. It's unclear if they would have much information on this particular data set. But they might be able to redirect a query. If you have questions or recommendations related to this metadata entry and extracted data, please contact the CAFE Data Management team at: climatecafe@bu.edu.\n&lt;br /&gt;&lt;br /&gt;\n\nThis ACCDB [Access database] file is downloaded in compressed (ZIP) file format. After downloading the file to your preferred location, double-click on the file to extract the ACCDB file. This data requires the use of the database application program Microsoft Access [or, possibly, Microsoft SQL.",[],,"[{'rightsUri': 'info:eu-repo/semantics/openAccess'}, {'rights': 'Creative Commons Attribution Non Commercial Share Alike 4.0 International', 'rightsUri': 'https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode', 'schemeUri': 'https://spdx.org/licenses/', 'rightsIdentifier': 'cc-by-nc-sa-4.0', 'rightsIdentifierScheme': 'SPDX'}]",[],[],,gdcc.harvard-dv,harvardu,10.7910/dvn/f3a62w,,,,,,,
3,10.3886/e222881v1,True,200,"{'data': {'id': '10.3886/e222881v1', 'type': 'dois', 'attributes': {'doi': '10.3886/e222881v1', 'prefix': '10.3886', 'suffix': 'e222881v1', 'identifiers': [], 'alternateIdentifiers': [], 'creators': [{'name': 'United States Department Of Veterans Affairs', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}], 'titles': [{'lang': 'en', 'title': '2014 Minority Veteran Report | Department of Veterans Affairs Open Data Portal'}], 'publisher': 'ICPSR - Interuniversity Consortium for Political and Social Research', 'container': {}, 'publicationYear': 2025, 'subjects': [], 'contributors': [], 'dates': [{'date': '2014-01-01/2014-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}], 'language': 'en', 'types': {'ris': 'DATA', 'bibtex': 'misc', 'citeproc': 'dataset', 'schemaOrg': 'Dataset', 'resourceTypeGeneral': 'Dataset'}, 'relatedIdentifiers': [{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e222881', 'relatedIdentifierType': 'DOI'}], 'relatedItems': [], 'sizes': [], 'formats': [], 'version': 'v1', 'rightsList': [], 'descriptions': [{'lang': 'en', 'description': 'This project includes a pdf capture of a webpage and the underlying data for the visualizations. <br><br>It is about the 2014 Minority Veteran Report, the goal of which is to gain an understanding of who our minority Veterans are, how their military service affects their post-military lives, and how they can be better served based on these insights.<br><br>', 'descriptionType': 'Abstract'}], 'geoLocations': [{'geoLocationPlace': 'United States'}], 'fundingReferences': [], 'xml': 'PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9InllcyI/Pgo8cmVzb3VyY2UgeG1sbnM9Imh0dHA6Ly9kYXRhY2l0ZS5vcmcvc2NoZW1hL2tlcm5lbC00IiB4bWxuczp4c2k9Imh0dHA6Ly93d3cudzMub3JnLzIwMDEvWE1MU2NoZW1hLWluc3RhbmNlIiB4c2k6c2NoZW1hTG9jYXRpb249Imh0dHA6Ly9kYXRhY2l0ZS5vcmcvc2NoZW1hL2tlcm5lbC00IGh0dHBzOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC4zODg2L0UyMjI4ODFWMTwvaWRlbnRpZmllcj4KICA8Y3JlYXRvcnM+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJPcmdhbml6YXRpb25hbCI+VW5pdGVkIFN0YXRlcyBEZXBhcnRtZW50IG9mIFZldGVyYW5zIEFmZmFpcnM8L2NyZWF0b3JOYW1lPgogICAgPC9jcmVhdG9yPgogIDwvY3JlYXRvcnM+CiAgPHRpdGxlcz4KICAgIDx0aXRsZSB4bWw6bGFuZz0iZW4iPjIwMTQgTWlub3JpdHkgVmV0ZXJhbiBSZXBvcnQgfCBEZXBhcnRtZW50IG9mIFZldGVyYW5zIEFmZmFpcnMgT3BlbiBEYXRhIFBvcnRhbDwvdGl0bGU+CiAgPC90aXRsZXM+CiAgPHB1Ymxpc2hlcj5JQ1BTUiAtIEludGVydW5pdmVyc2l0eSBDb25zb3J0aXVtIGZvciBQb2xpdGljYWwgYW5kIFNvY2lhbCBSZXNlYXJjaDwvcHVibGlzaGVyPgogIDxwdWJsaWNhdGlvblllYXI+MjAyNTwvcHVibGljYXRpb25ZZWFyPgogIDxyZXNvdXJjZVR5cGUgcmVzb3VyY2VUeXBlR2VuZXJhbD0iRGF0YXNldCIvPgogIDxkYXRlcz4KICAgIDxkYXRlIGRhdGVUeXBlPSJDb2xsZWN0ZWQiPjIwMTQtMDEtMDEvMjAxNC0xMi0zMTwvZGF0ZT4KICA8L2RhdGVzPgogIDxsYW5ndWFnZT5lbmc8L2xhbmd1YWdlPgogIDxyZWxhdGVkSWRlbnRpZmllcnM+CiAgICA8cmVsYXRlZElkZW50aWZpZXIgcmVsYXRlZElkZW50aWZpZXJUeXBlPSJET0kiIHJlbGF0aW9uVHlwZT0iSXNWZXJzaW9uT2YiPjEwLjM4ODYvRTIyMjg4MTwvcmVsYXRlZElkZW50aWZpZXI+CiAgPC9yZWxhdGVkSWRlbnRpZmllcnM+CiAgPHZlcnNpb24+djE8L3ZlcnNpb24+CiAgPGRlc2NyaXB0aW9ucz4KICAgIDxkZXNjcmlwdGlvbiBkZXNjcmlwdGlvblR5cGU9IkFic3RyYWN0IiB4bWw6bGFuZz0iZW4iPlRoaXMgcHJvamVjdCBpbmNsdWRlcyBhIHBkZiBjYXB0dXJlIG9mIGEgd2VicGFnZSBhbmQgdGhlIHVuZGVybHlpbmcgZGF0YSBmb3IgdGhlIHZpc3VhbGl6YXRpb25zLiAmbHQ7YnImZ3Q7Jmx0O2JyJmd0O0l0IGlzIGFib3V0IHRoZSAyMDE0IE1pbm9yaXR5IFZldGVyYW4gUmVwb3J0LCB0aGUgZ29hbCBvZiB3aGljaCBpcyB0byBnYWluIGFuIHVuZGVyc3RhbmRpbmcgb2Ygd2hvIG91ciBtaW5vcml0eSBWZXRlcmFucyBhcmUsIGhvdyB0aGVpciBtaWxpdGFyeSBzZXJ2aWNlIGFmZmVjdHMgdGhlaXIgcG9zdC1taWxpdGFyeSBsaXZlcywgYW5kIGhvdyB0aGV5IGNhbiBiZSBiZXR0ZXIgc2VydmVkIGJhc2VkIG9uIHRoZXNlIGluc2lnaHRzLiZsdDticiZndDsmbHQ7YnImZ3Q7PC9kZXNjcmlwdGlvbj4KICA8L2Rlc2NyaXB0aW9ucz4KICA8Z2VvTG9jYXRpb25zPgogICAgPGdlb0xvY2F0aW9uPgogICAgICA8Z2VvTG9jYXRpb25QbGFjZSB4bWxuczp4cz0iaHR0cDovL3d3dy53My5vcmcvMjAwMS9YTUxTY2hlbWEiIHhzaTp0eXBlPSJ4czpzdHJpbmciPlVuaXRlZCBTdGF0ZXM8L2dlb0xvY2F0aW9uUGxhY2U+CiAgICA8L2dlb0xvY2F0aW9uPgogIDwvZ2VvTG9jYXRpb25zPgo8L3Jlc291cmNlPg==', 'url': 'https://www.datalumos.org/datalumos/project/222881/version/V1/view', 'contentUrl': None, 'metadataVersion': 0, 'schemaVersion': 'http://datacite.org/schema/kernel-4', 'source': 'api', 'isActive': True, 'state': 'findable', 'reason': None, 'viewCount': 0, 'viewsOverTime': [], 'downloadCount': 0, 'downloadsOverTime': [], 'referenceCount': 0, 'citationCount': 0, 'citationsOverTime': [], 'partCount': 0, 'partOfCount': 0, 'versionCount': 0, 'versionOfCount': 1, 'created': '2025-03-14T13:30:04.000Z', 'registered': '2025-03-14T13:30:04.000Z', 'published': '2025', 'updated': '2025-03-14T13:30:05.000Z'}, 'relationships': {'client': {'data': {'id': 'gesis.icpsr', 'type': 'clients'}}, 'provider': {'data': {'id': 'icpsr', 'type': 'providers'}}, 'media': {'data': {'id': '10.3886/e222881v1', 'type': 'media'}}, 'references': {'data': []}, 'citations': {'data': []}, 'parts': {'data': []}, 'partOf': {'data': []}, 'versions': {'data': []}, 'versionOf': {'data': [{'id': '10.3886/e222881', 'type': 'dois'}]}}}}",10.3886,e222881v1,ICPSR - Interuniversity Consortium for Political and Social Research,2025.0,en,v1,https://www.datalumos.org/datalumos/project/222881/version/V1/view,,http://datacite.org/schema/kernel-4,api,True,findable,,0.0,2025-03-14T13:30:04.000Z,2025-03-14T13:30:04.000Z,2025,2025-03-14T13:30:05.000Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,DATA,misc,dataset,Dataset,Dataset,[],[],2014 Minority Veteran Report | Department of Veterans Affairs Open Data Portal,"[{'lang': 'en', 'title': '2014 Minority Veteran Report | Department of Veterans Affairs Open Data Portal'}]","[{'name': 'United States Department Of Veterans Affairs', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}]",United States Department Of Veterans Affairs,Organizational,[],,[],,"[{'date': '2014-01-01/2014-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}]",2014-01-01/2014-12-31,2025,"[{'lang': 'en', 'description': 'This project includes a pdf capture of a webpage and the underlying data for the visualizations. <br><br>It is about the 2014 Minority Veteran Report, the goal of which is to gain an understanding of who our minority Veterans are, how their military service affects their post-military lives, and how they can be better served based on these insights.<br><br>', 'descriptionType': 'Abstract'}]","This project includes a pdf capture of a webpage and the underlying data for the visualizations. <br><br>It is about the 2014 Minority Veteran Report, the goal of which is to gain an understanding of who our minority Veterans are, how their military service affects their post-military lives, and how they can be better served based on these insights.<br><br>",[{'geoLocationPlace': 'United States'}],United States,[],[],"[{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e222881', 'relatedIdentifierType': 'DOI'}]",10.3886/e222881,gesis.icpsr,icpsr,10.3886/e222881v1,10.3886/e222881,,,,,,
4,10.3886/e222043v1,True,200,"{'data': {'id': '10.3886/e222043v1', 'type': 'dois', 'attributes': {'doi': '10.3886/e222043v1', 'prefix': '10.3886', 'suffix': 'e222043v1', 'identifiers': [], 'alternateIdentifiers': [], 'creators': [{'name': 'Americorps', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}], 'titles': [{'lang': 'en', 'title': 'Americorps Member Exit Datasets 2016-2023'}], 'publisher': 'ICPSR - Interuniversity Consortium for Political and Social Research', 'container': {}, 'publicationYear': 2025, 'subjects': [], 'contributors': [], 'dates': [{'date': '2025', 'dateType': 'Issued'}], 'language': 'en', 'types': {'ris': 'DATA', 'bibtex': 'misc', 'citeproc': 'dataset', 'schemaOrg': 'Dataset', 'resourceTypeGeneral': 'Dataset'}, 'relatedIdentifiers': [{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e222043', 'relatedIdentifierType': 'DOI'}], 'relatedItems': [], 'sizes': [], 'formats': [], 'version': 'v1', 'rightsList': [], 'descriptions': [{'lang': 'en', 'description': 'Upon exiting service, AmeriCorps members are invited to complete the \nAmeriCorps Member Exit Survey (AmeriCorps MES) to provide information on\n their service experiences. By collecting data on member experiences, \nAmeriCorps and its partners can better understand how to support its \nmembers, while individuals interested in becoming a member can see the \nbenefits of serving. Members that leave service after completing their \nfull term or members that depart from service early both complete the \nexit survey. The data includes information on members’ civic-mindedness,\n community involvement, cultural competency, and developed life and \ncareer skills (based on the AmeriCorps Member Theory of Change). As well\n as measures of members’ experience and training, motivations to serve, \nand post-service plans. Any data on members that started prior to them \nturning 18 years of age is excluded from this data. <br><br>', 'descriptionType': 'Abstract'}], 'geoLocations': [], 'fundingReferences': [], 'xml': 'PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9InllcyI/Pgo8cmVzb3VyY2UgeG1sbnM9Imh0dHA6Ly9kYXRhY2l0ZS5vcmcvc2NoZW1hL2tlcm5lbC00IiB4bWxuczp4c2k9Imh0dHA6Ly93d3cudzMub3JnLzIwMDEvWE1MU2NoZW1hLWluc3RhbmNlIiB4c2k6c2NoZW1hTG9jYXRpb249Imh0dHA6Ly9kYXRhY2l0ZS5vcmcvc2NoZW1hL2tlcm5lbC00IGh0dHBzOi8vc2NoZW1hLmRhdGFjaXRlLm9yZy9tZXRhL2tlcm5lbC00LjMvbWV0YWRhdGEueHNkIj4KICA8aWRlbnRpZmllciBpZGVudGlmaWVyVHlwZT0iRE9JIj4xMC4zODg2L0UyMjIwNDNWMTwvaWRlbnRpZmllcj4KICA8Y3JlYXRvcnM+CiAgICA8Y3JlYXRvcj4KICAgICAgPGNyZWF0b3JOYW1lIG5hbWVUeXBlPSJPcmdhbml6YXRpb25hbCI+QW1lcmljb3JwczwvY3JlYXRvck5hbWU+CiAgICA8L2NyZWF0b3I+CiAgPC9jcmVhdG9ycz4KICA8dGl0bGVzPgogICAgPHRpdGxlIHhtbDpsYW5nPSJlbiI+QW1lcmljb3JwcyBNZW1iZXIgRXhpdCBEYXRhc2V0cyAyMDE2LTIwMjM8L3RpdGxlPgogIDwvdGl0bGVzPgogIDxwdWJsaXNoZXI+SUNQU1IgLSBJbnRlcnVuaXZlcnNpdHkgQ29uc29ydGl1bSBmb3IgUG9saXRpY2FsIGFuZCBTb2NpYWwgUmVzZWFyY2g8L3B1Ymxpc2hlcj4KICA8cHVibGljYXRpb25ZZWFyPjIwMjU8L3B1YmxpY2F0aW9uWWVhcj4KICA8cmVzb3VyY2VUeXBlIHJlc291cmNlVHlwZUdlbmVyYWw9IkRhdGFzZXQiLz4KICA8bGFuZ3VhZ2U+ZW5nPC9sYW5ndWFnZT4KICA8cmVsYXRlZElkZW50aWZpZXJzPgogICAgPHJlbGF0ZWRJZGVudGlmaWVyIHJlbGF0ZWRJZGVudGlmaWVyVHlwZT0iRE9JIiByZWxhdGlvblR5cGU9IklzVmVyc2lvbk9mIj4xMC4zODg2L0UyMjIwNDM8L3JlbGF0ZWRJZGVudGlmaWVyPgogIDwvcmVsYXRlZElkZW50aWZpZXJzPgogIDx2ZXJzaW9uPnYxPC92ZXJzaW9uPgogIDxkZXNjcmlwdGlvbnM+CiAgICA8ZGVzY3JpcHRpb24gZGVzY3JpcHRpb25UeXBlPSJBYnN0cmFjdCIgeG1sOmxhbmc9ImVuIj5VcG9uIGV4aXRpbmcgc2VydmljZSwgQW1lcmlDb3JwcyBtZW1iZXJzIGFyZSBpbnZpdGVkIHRvIGNvbXBsZXRlIHRoZSAKQW1lcmlDb3JwcyBNZW1iZXIgRXhpdCBTdXJ2ZXkgKEFtZXJpQ29ycHMgTUVTKSB0byBwcm92aWRlIGluZm9ybWF0aW9uIG9uCiB0aGVpciBzZXJ2aWNlIGV4cGVyaWVuY2VzLiBCeSBjb2xsZWN0aW5nIGRhdGEgb24gbWVtYmVyIGV4cGVyaWVuY2VzLCAKQW1lcmlDb3JwcyBhbmQgaXRzIHBhcnRuZXJzIGNhbiBiZXR0ZXIgdW5kZXJzdGFuZCBob3cgdG8gc3VwcG9ydCBpdHMgCm1lbWJlcnMsIHdoaWxlIGluZGl2aWR1YWxzIGludGVyZXN0ZWQgaW4gYmVjb21pbmcgYSBtZW1iZXIgY2FuIHNlZSB0aGUgCmJlbmVmaXRzIG9mIHNlcnZpbmcuIE1lbWJlcnMgdGhhdCBsZWF2ZSBzZXJ2aWNlIGFmdGVyIGNvbXBsZXRpbmcgdGhlaXIgCmZ1bGwgdGVybSBvciBtZW1iZXJzIHRoYXQgZGVwYXJ0IGZyb20gc2VydmljZSBlYXJseSBib3RoIGNvbXBsZXRlIHRoZSAKZXhpdCBzdXJ2ZXkuIFRoZSBkYXRhIGluY2x1ZGVzIGluZm9ybWF0aW9uIG9uIG1lbWJlcnPigJkgY2l2aWMtbWluZGVkbmVzcywKIGNvbW11bml0eSBpbnZvbHZlbWVudCwgY3VsdHVyYWwgY29tcGV0ZW5jeSwgYW5kIGRldmVsb3BlZCBsaWZlIGFuZCAKY2FyZWVyIHNraWxscyAoYmFzZWQgb24gdGhlIEFtZXJpQ29ycHMgTWVtYmVyIFRoZW9yeSBvZiBDaGFuZ2UpLiBBcyB3ZWxsCiBhcyBtZWFzdXJlcyBvZiBtZW1iZXJz4oCZIGV4cGVyaWVuY2UgYW5kIHRyYWluaW5nLCBtb3RpdmF0aW9ucyB0byBzZXJ2ZSwgCmFuZCBwb3N0LXNlcnZpY2UgcGxhbnMuIEFueSBkYXRhIG9uIG1lbWJlcnMgdGhhdCBzdGFydGVkIHByaW9yIHRvIHRoZW0gCnR1cm5pbmcgMTggeWVhcnMgb2YgYWdlIGlzIGV4Y2x1ZGVkIGZyb20gdGhpcyBkYXRhLiAmbHQ7YnImZ3Q7Jmx0O2JyJmd0OzwvZGVzY3JpcHRpb24+CiAgPC9kZXNjcmlwdGlvbnM+CjwvcmVzb3VyY2U+', 'url': 'https://www.datalumos.org/datalumos/project/222043/version/V1/view', 'contentUrl': None, 'metadataVersion': 0, 'schemaVersion': 'http://datacite.org/schema/kernel-4', 'source': 'api', 'isActive': True, 'state': 'findable', 'reason': None, 'viewCount': 0, 'viewsOverTime': [], 'downloadCount': 0, 'downloadsOverTime': [], 'referenceCount': 0, 'citationCount': 0, 'citationsOverTime': [], 'partCount': 0, 'partOfCount': 0, 'versionCount': 0, 'versionOfCount': 1, 'created': '2025-03-09T01:43:58.000Z', 'registered': '2025-03-09T01:43:58.000Z', 'published': '2025', 'updated': '2025-03-09T01:43:59.000Z'}, 'relationships': {'client': {'data': {'id': 'gesis.icpsr', 'type': 'clients'}}, 'provider': {'data': {'id': 'icpsr', 'type': 'providers'}}, 'media': {'data': {'id': '10.3886/e222043v1', 'type': 'media'}}, 'references': {'data': []}, 'citations': {'data': []}, 'parts': {'data': []}, 'partOf': {'data': []}, 'versions': {'data': []}, 'versionOf': {'data': [{'id': '10.3886/e222043', 'type': 'dois'}]}}}}",10.3886,e222043v1,ICPSR - Interuniversity Consortium for Political and Social Research,2025.0,en,v1,https://www.datalumos.org/datalumos/project/222043/version/V1/view,,http://datacite.org/schema/kernel-4,api,True,findable,,0.0,2025-03-09T01:43:58.000Z,2025-03-09T01:43:58.000Z,2025,2025-03-09T01:43:59.000Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,DATA,misc,dataset,Dataset,Dataset,[],[],Americorps Member Exit Datasets 2016-2023,"[{'lang': 'en', 'title': 'Americorps Member Exit Datasets 2016-2023'}]","[{'name': 'Americorps', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}]",Americorps,Organizational,[],,[],,"[{'date': '2025', 'dateType': 'Issued'}]",,2025,"[{'lang': 'en', 'description': 'Upon exiting service, AmeriCorps members are invited to complete the AmeriCorps Member Exit Survey (AmeriCorps MES) to provide information on  their service experiences. By collecting data on member experiences, AmeriCorps and its partners can better understand how to support its members, while individuals interested in becoming a member can see the benefits of serving. Members that leave service after completing their full term or members that depart from service early both complete the exit survey. The data includes information on members’ civic-mindedness,  community involvement, cultural competency, and developed life and career skills (based on the AmeriCorps Member Theory of Change). As well  as measures of members’ experience and training, motivations to serve, and post-service plans. Any data on members that started prior to them turning 18 years of age is excluded from this data. <br><br>', 'descriptionType': 'Abstract'}]","Upon exiting service, AmeriCorps members are invited to complete the \nAmeriCorps Member Exit Survey (AmeriCorps MES) to provide information on\n their service experiences. By collecting data on member experiences, \nAmeriCorps and its partners can better understand how to support its \nmembers, while individuals interested in becoming a member can see the \nbenefits of serving. Members that leave service after completing their \nfull term or members that depart from service early both complete the \nexit survey. The data includes information on members’ civic-mindedness,\n community involvement, cultural competency, and developed life and \ncareer skills (based on the AmeriCorps Member Theory of Change). As well\n as measures of members’ experience and training, motivations to serve, \nand post-service plans. Any data on members that started prior to them \nturning 18 years of age is excluded from this data. <br><br>",[],,[],[],"[{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e222043', 'relatedIdentifierType': 'DOI'}]",10.3886/e222043,gesis.icpsr,icpsr,10.3886/e222043v1,10.3886/e222043,,,,,,


In [97]:
df["doi_norm"] = (
    df["doi"]
    .astype(str)
    .str.strip()
    .str.lower()
    .replace({"nan": None, "none": None, "": None})
)

datacite_df["doi_norm"] = (
    datacite_df["doi"]
    .astype(str)
    .str.strip()
    .str.lower()
)

df = df.merge(
    datacite_df.drop(columns=["doi"], errors="ignore"),
    on="doi_norm",
    how="left"
)

In [101]:
df = df.drop(columns='dc_raw')

In [102]:
df.head(5)

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain,doi,project_id,version,doi_guess,doi_norm,dc_ok,dc_status,dc_prefix,dc_suffix,dc_publisher,dc_publicationYear,dc_language,dc_version,dc_url,dc_contentUrl,dc_schemaVersion,dc_source,dc_isActive,dc_state,dc_reason,dc_metadataVersion,dc_created,dc_registered,dc_published,dc_updated,dc_viewCount,dc_downloadCount,dc_referenceCount,dc_citationCount,dc_partCount,dc_partOfCount,dc_versionCount,dc_versionOfCount,dc_types_ris,dc_types_bibtex,dc_types_citeproc,dc_types_schemaOrg,dc_types_resourceTypeGeneral,dc_identifiers,dc_alternateIdentifiers,dc_title,dc_titles_all,dc_creators_all,dc_creators_names,dc_creators_types,dc_contributors_all,dc_contributors_names,dc_subjects_all,dc_subjects,dc_dates_all,dc_date_collected,dc_date_issued,dc_descriptions_all,dc_abstract,dc_geoLocations_all,dc_geo_places,dc_rightsList,dc_fundingReferences,dc_relatedIdentifiers_all,dc_isVersionOf,dc_rel_client,dc_rel_provider,dc_rel_media,dc_rel_versionOf,dc_rel_versions,dc_rel_parts,dc_rel_partOf,dc_rel_citations,dc_rel_references,dc_error
0,_datasets/10j-injunctions.md,data_rescue_project,10(j) Injunctions,National Labor Relations Board,National Labor Relations Board,nlrb.gov,https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions,,2025-04-20,True,,Labor & Employment,1,10(j) Injunctions,759,https://doi.org/10.3886/E226824V1,"CSV, TXT",Finished,0.0,2025-04-07,"DRP, DL","Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010.",doi.org,10.3886/e226824v1,,,,10.3886/e226824v1,True,200.0,10.3886,e226824v1,ICPSR - Interuniversity Consortium for Political and Social Research,2025.0,en,v1,https://www.datalumos.org/datalumos/project/226824/version/V1/view,,http://datacite.org/schema/kernel-4,api,True,findable,,0.0,2025-04-15T15:51:51.000Z,2025-04-15T15:51:52.000Z,2025,2025-04-15T15:51:52.000Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,DATA,misc,dataset,Dataset,Dataset,[],[],10(j) Injunctions,"[{'lang': 'en', 'title': '10(j) Injunctions'}]","[{'name': 'National Labor Relations Board', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}]",National Labor Relations Board,Organizational,[],,"[{'lang': 'en', 'subject': 'labor'}, {'lang': 'en', 'subject': 'labor unions'}, {'lang': 'en', 'subject': 'labor disputes'}, {'lang': 'en', 'subject': 'injunctions'}, {'lang': 'en', 'subject': 'collective bargaining'}, {'lang': 'en', 'subject': 'labor relations'}, {'lang': 'en', 'subject': 'unionization'}]",labor; labor unions; labor disputes; injunctions; collective bargaining; labor relations; unionization,"[{'date': '2010-01-01/2025-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}]",2010-01-01/2025-12-31,2025,"[{'lang': 'en', 'description': 'Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.<br><br>There are 15 categories of labor disputes in which Section 10(j) injunctions may be appropriate, listed at [https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions/section-10j-categories]. Under NLRB processes, potential cases are identified by Regional Offices and reviewed by the General Counsel, who must seek authorization from the Board before proceeding to court. <br><br>The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010. <br>', 'descriptionType': 'Abstract'}]","Section 10(j) of the National Labor Relations Act authorizes the National Labor Relations Board to seek temporary injunctions against employers and unions in federal district courts to stop unfair labor practices while the case is being litigated before administrative law judges and the Board. These temporary injunctions are needed to protect the process of collective bargaining and employee rights under the Act, and to ensure that Board decisions will be meaningful. The section was added as part of a set of reforms to the Act in 1947. Over the years, all NLRB General Counsels have made use of this effective enforcement tool, as shown in this chart.<br><br>There are 15 categories of labor disputes in which Section 10(j) injunctions may be appropriate, listed at [https://www.nlrb.gov/what-we-do/investigate-charges/10j-injunctions/section-10j-categories]. Under NLRB processes, potential cases are identified by Regional Offices and reviewed by the General Counsel, who must seek authorization from the Board before proceeding to court. <br><br>The csv contains Authorization Dates, Case Numbers, Case Names, and Injunction Status as of the date collected (2025-04-07). This list is all 10(j) injunction cases authorized by the Board since September 1, 2010. <br>",[{'geoLocationPlace': 'United States'}],United States,[],[],"[{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e226824', 'relatedIdentifierType': 'DOI'}]",10.3886/e226824,gesis.icpsr,icpsr,10.3886/e226824v1,10.3886/e226824,,,,,,
1,_datasets/2002-2024-mbda-grantees.md,data_rescue_project,2002-2024 MBDA Grantees,Minority Business Development Agency,Department of Commerce,mbda.gov,https://www.mbda.gov/research/data/mbda-grantees,,2025-04-02,False,,Business & Economy,1,2002-2024 MBDA Grantees,696,https://www.datalumos.org/datalumos/project/223443/version/V1/view,CSV,Finished,0.0,2025-03-18,"DRP, DL",,www.datalumos.org,10.3886/E223443V1,223443.0,V1,10.3886/E223443V1,10.3886/e223443v1,True,200.0,10.3886,e223443v1,ICPSR - Interuniversity Consortium for Political and Social Research,2025.0,en,v1,https://www.datalumos.org/datalumos/project/223443/version/V1/view,,http://datacite.org/schema/kernel-4,api,True,findable,,0.0,2025-03-19T15:17:50.000Z,2025-03-19T15:17:50.000Z,2025,2025-03-19T15:17:51.000Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,DATA,misc,dataset,Dataset,Dataset,[],[],2022-2024 MBDA Grantees,"[{'lang': 'en', 'title': '2022-2024 MBDA Grantees'}]","[{'name': 'United States Department Of Commerce. Minority Business Development Agency', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}]",United States Department Of Commerce. Minority Business Development Agency,Organizational,[],,"[{'lang': 'en', 'subject': 'grants'}, {'lang': 'en', 'subject': 'minority businesses'}]",grants; minority businesses,"[{'date': '2022-01-01/2024-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}]",2022-01-01/2024-12-31,2025,"[{'lang': 'en', 'description': 'This dataset contains public information on grantees of the Minority Business Development Agency (MBDA) programs, covering grants awarded since 2022. It includes data on all grant-funded centers across all MBDA programs, providing information on each grantee’s including location, contact details, service area, and its associated MBDA program.<br><br>Additional information includes the status of each grantee (whether they are currently funded and operating), grant award identifiers, and a brief description of the grantees services and specialities. Detailed information is provided in the data schema provided below.<br><br>MBDA’s mission is to promote the growth and global competitiveness of Minority Business Enterprises (MBE) in order to unlock the country’s full economic potential. MBDA programs provide support for MBEs through a variety of services aimed at improving access to capital, contracts, and markets. These programs help entrepreneurs overcome barriers to success and expand their businesses by offering tailored technical assistance, business consulting, and access to networks.<br><br>Note: the original website is titled 2002-2024 grantees but the description refers to grants awarded since 2022<br>', 'descriptionType': 'Abstract'}]","This dataset contains public information on grantees of the Minority Business Development Agency (MBDA) programs, covering grants awarded since 2022. It includes data on all grant-funded centers across all MBDA programs, providing information on each grantee’s including location, contact details, service area, and its associated MBDA program.<br><br>Additional information includes the status of each grantee (whether they are currently funded and operating), grant award identifiers, and a brief description of the grantees services and specialities. Detailed information is provided in the data schema provided below.<br><br>MBDA’s mission is to promote the growth and global competitiveness of Minority Business Enterprises (MBE) in order to unlock the country’s full economic potential. MBDA programs provide support for MBEs through a variety of services aimed at improving access to capital, contracts, and markets. These programs help entrepreneurs overcome barriers to success and expand their businesses by offering tailored technical assistance, business consulting, and access to networks.<br><br>Note: the original website is titled 2002-2024 grantees but the description refers to grants awarded since 2022<br>","[{'geoLocationPlace': 'U.S. Territories'}, {'geoLocationPlace': 'U.S. States'}]",U.S. Territories; U.S. States,[],[],"[{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e223443', 'relatedIdentifierType': 'DOI'}]",10.3886/e223443,gesis.icpsr,icpsr,10.3886/e223443v1,10.3886/e223443,,,,,,
2,_datasets/2006-iur-public-database.md,data_rescue_project,2006 IUR Public Database,Environmental Protection Agency,Environmental Protection Agency,epa.gov,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,,2025-09-18,True,https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database,Climate & Environment,1,2006 IUR Public Database,1280,https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi10.7910/DVN/F3A62W,"mhtml, ZIP, PDF",Finished,0.005,2025-02-26,"HD, CAFE-RCC","There was a 2006 version and a 2002-1986 version. Both are archived, hence 2 dataverse URLs. ~ag. Seperate Metadata https://www.epa.gov/chemical-data-reporting/summary-cdr-reporting-requirements-year, https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database",dataverse.harvard.edu,10.7910/dvn/f3a62w,,,10.7910/dvn/f3a62w,10.7910/dvn/f3a62w,True,200.0,10.791,dvn/f3a62w,Harvard Dataverse,2025.0,,1.0,https://dataverse.harvard.edu/citation?persistentId=doi:10.7910/DVN/F3A62W,,http://datacite.org/schema/kernel-4,mds,True,findable,,1.0,2025-02-18T13:10:34.000Z,2025-02-26T21:41:18.000Z,2025,2025-05-11T23:42:01.000Z,56.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DATA,misc,dataset,Dataset,Dataset,[],[],Extracted Data From: Downloadable 2006 IUR Public Database,[{'title': 'Extracted Data From: Downloadable 2006 IUR Public Database'}],"[{'name': 'EPA', 'nameType': 'Organizational', 'affiliation': ['U.S. EPA'], 'nameIdentifiers': []}]",EPA,Organizational,"[{'name': 'CAFE', 'nameType': 'Personal', 'affiliation': [], 'contributorType': 'ContactPerson', 'nameIdentifiers': []}]",CAFE,"[{'subject': 'Chemistry'}, {'subject': 'Earth and Environmental Sciences'}, {'subject': 'Environmental Health', 'schemeUri': 'https://tools.niehs.nih.gov/cchhglossary/'}, {'subject': 'Exposure'}]",Chemistry; Earth and Environmental Sciences; Environmental Health; Exposure,"[{'date': '2025-02-18', 'dateType': 'Submitted'}, {'date': '2025-02-26', 'dateType': 'Available'}, {'date': '2025-02-26', 'dateType': 'Updated'}, {'date': '2006-01-01/2006-12-31', 'dateType': 'Other', 'dateInformation': 'Time period covered by the data'}, {'date': '2025', 'dateType': 'Issued'}]",,2025,"[{'description': 'This submission includes publicly available data extracted in its original form. Please reference the Related Publication listed here for source and citation information &lt;br /&gt;&lt;br /&gt; The following file contains information reported to EPA under the 2006 Inventory Update Rule (IUR). Please note that no information claimed as TSCA Confidential Business Information by an IUR reporter is contained in this file. [https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database] &lt;br /&gt; &lt;br /&gt; If you have questions about the underlying data stored here, please contact U.S. Environmental Protection Agency. TSCA-Hotline@epa.gov is said to answer questions on chemical data reporting requirements for the current version of the program that generated this historical data. It's unclear if they would have much information on this particular data set. But they might be able to redirect a query. If you have questions or recommendations related to this metadata entry and extracted data, please contact the CAFE Data Management team at: climatecafe@bu.edu. &lt;br /&gt;&lt;br /&gt; This ACCDB [Access database] file is downloaded in compressed (ZIP) file format. After downloading the file to your preferred location, double-click on the file to extract the ACCDB file. This data requires the use of the database application program Microsoft Access [or, possibly, Microsoft SQL.', 'descriptionType': 'Abstract'}, {'description': 'This submission includes publicly available data extracted in its original form. Please reference the Related Publication listed here for source and citation information. Downloadable 2006 IUR Public Database data was downloaded by Anne Gunn on 2025-02-18', 'descriptionType': 'Other'}, {'description': 'Microsoft Access, unknown', 'descriptionType': 'TechnicalInfo'}, {'description': 'The disclaimers below were copied as is from the EPA Disclaimers site (https://www.epa.gov/web-policies-and-procedures/epa-disclaimers): ""Disclaimer of Endorsement Mention of or referral to commercial products or services, and/or links to non-EPA sites does not imply official EPA endorsement of or responsibility for the opinions, ideas, data, or products presented at those locations, or guarantee the validity of the information provided. Mention of commercial products/services on non-EPA websites is provided solely as a pointer to information on topics related to environmental protection that may be useful to EPA staff and the public. Copyright Status The U.S. Government retains a nonexclusive, royalty-free license to publish or reproduce these documents, or allow others to do so, for U.S. Government purposes. These documents may be freely distributed and used for non-commercial, scientific and educational purposes. Commercial use of the documents available from the EPA websites may be protected under the U.S. and Foreign Copyright Laws. Individual documents on the EPA website may have different copyright conditions, and that will be noted in those documents. Disclaimer of Liability With respect to documents available from the EPA website, neither the United States Government nor any of their employees, makes any warranty, express or implied, including the warranties of merchantability and fitness for a particular purpose, or assumes any legal liability or responsibility for the accuracy, completeness, or usefulness of any information, apparatus, product, or process disclosed, or represents that its use would not infringe privately owned rights. Exit Disclaimer The Exit icon denotes that you are leaving the EPA website and entering an external link or a third-party site. EPA's official web site is www.epa.gov. EPA has provided this link because it provides additional information that may be useful or interesting and is being provided in a manner consistent with the intended purpose of the EPA website. Please note that EPA uses third-party sites to provide EPA content already on www.epa.gov in a different format. EPA cannot attest to the accuracy of non-EPA information provided by these third-party sites or any other linked site. EPA is providing these links for your reference. In doing so, EPA does not endorse any non-government websites, companies or applications. Also, please be aware that the privacy protection provided on the EPA.gov domain (EPA Privacy and Security Notice) does not apply to these third-party sites. To learn more about EPA and social media, please refer to&amp; our social media page. Notice Information from the EPA website resides on numerous computer systems funded by the Agency. The use of the EPA websites may be monitored for computer security purposes. Any unauthorized access to the EPA website is prohibited and is subject to criminal and civil penalties under federal laws including, but not limited to, Public Law 99-474, the Computer Fraud and Abuse Act of 1986. Terms of Use for Geospatial Data These geospatial data and corresponding cartographic materials have been approved for use by the U.S. Environmental Protection Agency (EPA) as advised by the EPA Geospatial Advisory Committee (EGAC). This approved release is on the condition that neither the EPA nor the U.S. Government may be held liable for any damages resulting from its authorized or unauthorized use. These data and any corresponding products, services, or materials do not necessarily represent the EPA’s official position or viewpoint, expressed or implied. These content items are not intended for use in establishing liability or calculating cost recovery statutes of limitations. They cannot be relied upon to create any rights, substantive or procedural, enforceable by any party in litigation with the United States or third parties. Additionally, although these data have been processed successfully on EPA computer systems, no warranty expressed or implied can be made regarding the accuracy or utility of the data on any other system or for general or scientific purposes, nor shall the act of distribution constitute any such warranty. The Agency reserves the right to revise EPA-stewarded datasets pursuant to further analysis and review without public notice. Unless otherwise specified, geospatial data produced by the EPA is by default in the public domain and is not subject to domestic copyright protection under 17 U.S.C. § 105. Referenced data from non-EPA sources are neither inherently verified nor independently tested by the Agency in all circumstances. Permission to reproduce copyrighted items not produced by the EPA must be secured from the copyright owner. EPA strongly recommends careful attention be paid to metadata files associated with these data to better understand limitations, restrictions or intended use. The U.S. EPA shall not be held liable for improper or incorrect use of the data.""', 'descriptionType': 'Methods'}]","This submission includes publicly available data extracted in its original form. Please reference the Related Publication listed here for source and citation information &lt;br /&gt;&lt;br /&gt;\n\nThe following file contains information reported to EPA under the 2006 Inventory Update Rule (IUR). Please note that no information claimed as TSCA Confidential Business Information by an IUR reporter is contained in this file.\n[https://www.epa.gov/chemical-data-reporting/downloadable-2006-iur-public-database] \n&lt;br /&gt; &lt;br /&gt;\n\nIf you have questions about the underlying data stored here, please contact U.S. Environmental Protection Agency. TSCA-Hotline@epa.gov is said to answer questions on chemical data reporting requirements for the current version of the program that generated this historical data. It's unclear if they would have much information on this particular data set. But they might be able to redirect a query. If you have questions or recommendations related to this metadata entry and extracted data, please contact the CAFE Data Management team at: climatecafe@bu.edu.\n&lt;br /&gt;&lt;br /&gt;\n\nThis ACCDB [Access database] file is downloaded in compressed (ZIP) file format. After downloading the file to your preferred location, double-click on the file to extract the ACCDB file. This data requires the use of the database application program Microsoft Access [or, possibly, Microsoft SQL.",[],,"[{'rightsUri': 'info:eu-repo/semantics/openAccess'}, {'rights': 'Creative Commons Attribution Non Commercial Share Alike 4.0 International', 'rightsUri': 'https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode', 'schemeUri': 'https://spdx.org/licenses/', 'rightsIdentifier': 'cc-by-nc-sa-4.0', 'rightsIdentifierScheme': 'SPDX'}]",[],[],,gdcc.harvard-dv,harvardu,10.7910/dvn/f3a62w,,,,,,,
3,_datasets/2014-minority-veteran-report.md,data_rescue_project,2014 Minority Veteran Report,Office of Information and Technology - IT Operations and Services (ITOPS),Department of Veterans Affairs,data.va.gov,https://www.data.va.gov/stories/s/gavm-n6bm,,2025-03-17,False,,Science & Research; Military & Veterans Affairs,1,2014 Minority Veteran Report,412,https://www.datalumos.org/datalumos/project/222881/version/V1/view,CSV,Finished,0.0,2025-03-11,"DRP, DL",,www.datalumos.org,10.3886/E222881V1,222881.0,V1,10.3886/E222881V1,10.3886/e222881v1,True,200.0,10.3886,e222881v1,ICPSR - Interuniversity Consortium for Political and Social Research,2025.0,en,v1,https://www.datalumos.org/datalumos/project/222881/version/V1/view,,http://datacite.org/schema/kernel-4,api,True,findable,,0.0,2025-03-14T13:30:04.000Z,2025-03-14T13:30:04.000Z,2025,2025-03-14T13:30:05.000Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,DATA,misc,dataset,Dataset,Dataset,[],[],2014 Minority Veteran Report | Department of Veterans Affairs Open Data Portal,"[{'lang': 'en', 'title': '2014 Minority Veteran Report | Department of Veterans Affairs Open Data Portal'}]","[{'name': 'United States Department Of Veterans Affairs', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}]",United States Department Of Veterans Affairs,Organizational,[],,[],,"[{'date': '2014-01-01/2014-12-31', 'dateType': 'Collected'}, {'date': '2025', 'dateType': 'Issued'}]",2014-01-01/2014-12-31,2025,"[{'lang': 'en', 'description': 'This project includes a pdf capture of a webpage and the underlying data for the visualizations. <br><br>It is about the 2014 Minority Veteran Report, the goal of which is to gain an understanding of who our minority Veterans are, how their military service affects their post-military lives, and how they can be better served based on these insights.<br><br>', 'descriptionType': 'Abstract'}]","This project includes a pdf capture of a webpage and the underlying data for the visualizations. <br><br>It is about the 2014 Minority Veteran Report, the goal of which is to gain an understanding of who our minority Veterans are, how their military service affects their post-military lives, and how they can be better served based on these insights.<br><br>",[{'geoLocationPlace': 'United States'}],United States,[],[],"[{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e222881', 'relatedIdentifierType': 'DOI'}]",10.3886/e222881,gesis.icpsr,icpsr,10.3886/e222881v1,10.3886/e222881,,,,,,
4,_datasets/2016-americorps-mes-americorps-member-exit-survey.md,data_rescue_project,2016 AmeriCorps MES AmeriCorps Member Exit Survey,AmeriCorps,AmeriCorps,data.americorps.gov,https://data.americorps.gov/National-Service/2016-AmeriCorps-MES-AmeriCorps-Member-Exit-Survey/wqhv-fm5d/about_data,,2025-03-10,False,,Humanitarian & Disaster Relief; Military & Veterans Affairs,1,2016 AmeriCorps MES AmeriCorps Member Exit Survey,396,https://www.datalumos.org/datalumos/project/222043/version/V1/view,"CSV, XLSX, PDF",Finished,0.0,2025-03-08,"DRP, DL",,www.datalumos.org,10.3886/E222043V1,222043.0,V1,10.3886/E222043V1,10.3886/e222043v1,True,200.0,10.3886,e222043v1,ICPSR - Interuniversity Consortium for Political and Social Research,2025.0,en,v1,https://www.datalumos.org/datalumos/project/222043/version/V1/view,,http://datacite.org/schema/kernel-4,api,True,findable,,0.0,2025-03-09T01:43:58.000Z,2025-03-09T01:43:58.000Z,2025,2025-03-09T01:43:59.000Z,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,DATA,misc,dataset,Dataset,Dataset,[],[],Americorps Member Exit Datasets 2016-2023,"[{'lang': 'en', 'title': 'Americorps Member Exit Datasets 2016-2023'}]","[{'name': 'Americorps', 'nameType': 'Organizational', 'affiliation': [], 'nameIdentifiers': []}]",Americorps,Organizational,[],,[],,"[{'date': '2025', 'dateType': 'Issued'}]",,2025,"[{'lang': 'en', 'description': 'Upon exiting service, AmeriCorps members are invited to complete the AmeriCorps Member Exit Survey (AmeriCorps MES) to provide information on  their service experiences. By collecting data on member experiences, AmeriCorps and its partners can better understand how to support its members, while individuals interested in becoming a member can see the benefits of serving. Members that leave service after completing their full term or members that depart from service early both complete the exit survey. The data includes information on members’ civic-mindedness,  community involvement, cultural competency, and developed life and career skills (based on the AmeriCorps Member Theory of Change). As well  as measures of members’ experience and training, motivations to serve, and post-service plans. Any data on members that started prior to them turning 18 years of age is excluded from this data. <br><br>', 'descriptionType': 'Abstract'}]","Upon exiting service, AmeriCorps members are invited to complete the \nAmeriCorps Member Exit Survey (AmeriCorps MES) to provide information on\n their service experiences. By collecting data on member experiences, \nAmeriCorps and its partners can better understand how to support its \nmembers, while individuals interested in becoming a member can see the \nbenefits of serving. Members that leave service after completing their \nfull term or members that depart from service early both complete the \nexit survey. The data includes information on members’ civic-mindedness,\n community involvement, cultural competency, and developed life and \ncareer skills (based on the AmeriCorps Member Theory of Change). As well\n as measures of members’ experience and training, motivations to serve, \nand post-service plans. Any data on members that started prior to them \nturning 18 years of age is excluded from this data. <br><br>",[],,[],[],"[{'relationType': 'IsVersionOf', 'relatedIdentifier': '10.3886/e222043', 'relatedIdentifierType': 'DOI'}]",10.3886/e222043,gesis.icpsr,icpsr,10.3886/e222043v1,10.3886/e222043,,,,,,


In [103]:
no_doi = df[df["doi"].isna()]

no_doi

Unnamed: 0,file,schema,title,organization,agency,websites,data_source,description,last_modified,metadata_available,metadata_url,category,resource_count,project_title,id,url,format,status,size,download_date,maintainer,notes,domain,doi,project_id,version,doi_guess,doi_norm,dc_ok,dc_status,dc_prefix,dc_suffix,dc_publisher,dc_publicationYear,dc_language,dc_version,dc_url,dc_contentUrl,dc_schemaVersion,dc_source,dc_isActive,dc_state,dc_reason,dc_metadataVersion,dc_created,dc_registered,dc_published,dc_updated,dc_viewCount,dc_downloadCount,dc_referenceCount,dc_citationCount,dc_partCount,dc_partOfCount,dc_versionCount,dc_versionOfCount,dc_types_ris,dc_types_bibtex,dc_types_citeproc,dc_types_schemaOrg,dc_types_resourceTypeGeneral,dc_identifiers,dc_alternateIdentifiers,dc_title,dc_titles_all,dc_creators_all,dc_creators_names,dc_creators_types,dc_contributors_all,dc_contributors_names,dc_subjects_all,dc_subjects,dc_dates_all,dc_date_collected,dc_date_issued,dc_descriptions_all,dc_abstract,dc_geoLocations_all,dc_geo_places,dc_rightsList,dc_fundingReferences,dc_relatedIdentifiers_all,dc_isVersionOf,dc_rel_client,dc_rel_provider,dc_rel_media,dc_rel_versionOf,dc_rel_versions,dc_rel_parts,dc_rel_partOf,dc_rel_citations,dc_rel_references,dc_error
44,_datasets/administrative-law-judge-decisions.md,data_rescue_project,Administrative Law Judge Decisions,National Labor Relations Board,National Labor Relations Board,nlrb.gov,https://www.nlrb.gov/cases-decisions/decisions/administrative-law-judge-decisions,,2025-05-14,False,,Labor & Employment,1,Administrative Law Judge Decisions,1008,https://nlrbresearch.com/NLRB/NLRB_DB?_search=type%3A+%22ALJ%22,PDF,Finished,0.000,,NLRB-R,Captured as part of NLRB Research a free database a researcher made.,nlrbresearch.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
61,_datasets/affirmatively-furthering-fair-housing-affh-data.md,data_rescue_project,Affirmatively Furthering Fair Housing (AFFH) Data,Department of Housing and Urban Development,Department of Housing and Urban Development,hud.gov,https://www.hud.gov/AFFH,,2025-05-19,True,https://datacatalog.urban.org/dataset/us-department-housing-and-urban-development-affirmatively-furthering-fair-housing-hud-affh,Housing & Community Development,1,Affirmatively Furthering Fair Housing (AFFH) Data,1037,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/04/03/housing-and-communities/hud-affh/data.zip,ZIP,Finished,0.876,2024-12-18,UI,"This dataset contains all data, documentation, and file resources linked to on the main US Department of Housing and Urban Development’s Affirmatively Furthering Fair Housing (AFFH) page and powering the AFFH tool.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
65,_datasets/aging-independence-and-disability-agid-program-portal-data.md,data_rescue_project,"AGing, Independence, and Disability (AGID) Program Portal Data",Administration for Community Living,Department of Health and Human Services,agid.acl.gov,https://agid.acl.gov/release.html,,2025-05-19,True,https://datacatalog.urban.org/dataset/aging-independence-and-disability-agid-program-portal-data,Social Services; Health & Healthcare,1,"AGing, Independence, and Disability (AGID) Program Portal Data",1041,https://urban-data-catalog.s3.us-east-1.amazonaws.com/drupal-root-live/2025/03/28/race-and-equity/agid/data.zip,ZIP,Finished,6.000,2025-03-28,UI,"This resource contains all data from AGing, Independence, and Disability’s Program Data Portal from the US Department of Health and Human Services’ Administration for Community Living.",urban-data-catalog.s3.us-east-1.amazonaws.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
67,_datasets/aids-drug-assistance-programs-adaps.md,data_rescue_project,AIDS Drug Assistance Programs (ADAPS),Health Resources and Services Administration,Department of Health and Human Services,hrsa.gov,https://www.hrsa.gov/grants/find-funding/HRSA-23-056,,2025-03-26,False,,Health & Healthcare,1,AIDS Drug Assistance Programs (ADAPS),242,https://www.dropbox.com/scl/fo/60drbfxp3p1hb1l4gr3cj/AHrpM10MrLH-0MvQHCcXFI4?rlkey=fps9vit5x2cg6367jhdykkl9a&dl=0,CSV,Finished,0.000,2025-01-31,ICPSR,,www.dropbox.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
68,_datasets/air-data-pre-generated-files-dec-2024.md,data_rescue_project,"Air Data Pre-Generated Files, Dec. 2024",Environmental Protection Agency,Environmental Protection Agency,aqs.epa.gov,https://aqs.epa.gov/aqsweb/airdata/download_files.html,,2025-05-18,False,,Climate & Environment,1,"Air Data Pre-Generated Files, Dec. 2024",1029,https://sciop.net/uploads/75bd916972ae78cbe59534dd88da55d11c4719f2,CSV,Finished,20.000,2025-05-15,"DRP, SRC",Alternate torrent location https://academictorrents.com/details/75bd916972ae78cbe59534dd88da55d11c4719f2,sciop.net,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017,_datasets/world-ocean-atlas-2023-figures.md,data_rescue_project,World Ocean Atlas 2023 Figures,National Oceanic and Atmospheric Administration,Department of Commerce,ncei.noaa.gov,https://www.ncei.noaa.gov/data/oceans/woa/WOA23F/,,2025-04-19,True,https://www.ncei.noaa.gov/data/oceans/woa/WOA23/DOCUMENTATION/WOA23_Product_Documentation.pdf,Climate & Environment,1,World Ocean Atlas 2023 Figures,756,https://archive.org/details/noaa-ncei-woa23-figures-2025-04-19,PNG,Finished,105.000,2025-04-18,"DRP, IA",Includes actively seeded torrent file (the one without _archive). Alternate torrent location https://academictorrents.com/details/9bcdcb5efbcec15e37d918784618329e487599ac,archive.org,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2018,_datasets/world-ocean-atlas-woa-2023.md,data_rescue_project,World Ocean Atlas (WOA) 2023,National Oceanic and Atmospheric Administration,Department of Commerce,ncei.noaa.gov,https://www.ncei.noaa.gov/data/oceans/ncei/woa/,,2025-04-14,True,https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc0270533,Climate & Environment,1,World Ocean Atlas (WOA) 2023,725,https://sciop.net/uploads/581bb9462e958df6011025b26fa1d609a2dafd73,nc,Uploading,157.000,2025-04-09,"DRP, SRC",New corrected/updated torrent. Alternate torrent location https://academictorrents.com/details/581bb9462e958df6011025b26fa1d609a2dafd73,sciop.net,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2019,_datasets/world-ocean-database-wod.md,data_rescue_project,World Ocean Database (WOD),National Oceanic and Atmospheric Administration,Department of Commerce,ncei.noaa.gov,https://www.ncei.noaa.gov/data/oceans/ncei/wod/,,2025-04-14,True,https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodcNCEI-WOD,Climate & Environment,1,World Ocean Database (WOD),726,https://sciop.net/uploads/c0dce33ade7d0f828a542d5bed069b8909b3ee87,nc,Finished,159.000,2025-04-09,"DRP, SRC",New corrected/updated torrent. Alternate torrent location https://academictorrents.com/details/c0dce33ade7d0f828a542d5bed069b8909b3ee87,sciop.net,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2023,_datasets/youth-risk-behavior-surveillance-system-yrbss.md,data_rescue_project,Youth Risk Behavior Surveillance System (YRBSS),Centers for Disease Control and Prevention (CDC),Department of Health and Human Services,cdc.gov,https://www.cdc.gov/index.html,,2025-05-19,True,,Health & Healthcare,2,Youth Risk Behavior Surveillance System (YRBSS),326,https://www.dropbox.com/scl/fo/2t0ehrteq62jqrj0slm3q/AHed9WK-9ydPJT1398w42zo?rlkey=pqb2fisu1rgjjc2badfyfq0nk&dl=0,"DTA, XLSX, do, MDB, dat, SPS, SAS",Finished,0.000,2025-01-31,ICPSR,,www.dropbox.com,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [104]:
df_2 = df[df["doi"].notna()].copy()

In [105]:
df_2.isna().mean().sort_values(ascending=False).head(50)

dc_rel_partOf                   1.000000
dc_rel_parts                    1.000000
dc_reason                       1.000000
dc_contentUrl                   1.000000
dc_rel_references               0.998252
dc_rel_versions                 0.998252
description                     0.996503
dc_rel_citations                0.970862
dc_contributors_names           0.970862
metadata_url                    0.970862
dc_error                        0.951632
notes                           0.603730
dc_geo_places                   0.511072
dc_date_collected               0.480186
dc_subjects                     0.430653
format                          0.303030
size                            0.168998
dc_language                     0.093240
dc_isVersionOf                  0.086830
dc_rel_versionOf                0.085664
project_id                      0.075175
version                         0.075175
dc_version                      0.062937
dc_source                       0.053030
dc_schemaVersion

In [106]:
threshold = 0.97

cols_to_drop = df_2.columns[
    df_2.isna().mean() >= threshold
]

df_2 = df_2.drop(columns=cols_to_drop)

print(f"Dropped {len(cols_to_drop)} columns")

Dropped 10 columns


In [107]:
df_2.columns

Index(['file', 'schema', 'title', 'organization', 'agency', 'websites',
       'data_source', 'last_modified', 'metadata_available', 'category',
       'resource_count', 'project_title', 'id', 'url', 'format', 'status',
       'size', 'download_date', 'maintainer', 'notes', 'domain', 'doi',
       'project_id', 'version', 'doi_guess', 'doi_norm', 'dc_ok', 'dc_status',
       'dc_prefix', 'dc_suffix', 'dc_publisher', 'dc_publicationYear',
       'dc_language', 'dc_version', 'dc_url', 'dc_schemaVersion', 'dc_source',
       'dc_isActive', 'dc_state', 'dc_metadataVersion', 'dc_created',
       'dc_registered', 'dc_published', 'dc_updated', 'dc_viewCount',
       'dc_downloadCount', 'dc_referenceCount', 'dc_citationCount',
       'dc_partCount', 'dc_partOfCount', 'dc_versionCount',
       'dc_versionOfCount', 'dc_types_ris', 'dc_types_bibtex',
       'dc_types_citeproc', 'dc_types_schemaOrg',
       'dc_types_resourceTypeGeneral', 'dc_identifiers',
       'dc_alternateIdentifiers', 'dc_tit

In [108]:
df_2.to_csv('drp_withdoi.csv')

In [109]:
df_2.shape

(1716, 83)