# Retrieve RADx-rad Study Abstracts from dbGaP Database
This notebook uses the list of dbGaP accession numbers to retrieve the study titles and abstracts for the RADx-rad project.

**Author:** Peter W. Rose ([pwrose@ucsd.edu](mailto:pwrose@ucsd.edu))  
**Date:** 2025-03-13

In [1]:
import glob
import pandas as pd
import nih_utils

In [2]:
# Input files
DBGAP_IDS = "../data/studies.csv"
RADX_RAD_GRANTS = "../data/grants.csv"
# Output file
DBGAP_ABSTRACTS = "../derived_data/dbgap_abstracts.csv"

## Data Integration
Merge dbGaP data with grant data

In [3]:
dbgap = pd.read_csv(DBGAP_IDS, usecols=["project_num", "dbgap_accession"], dtype=str, keep_default_na=False)
grants = pd.read_csv(RADX_RAD_GRANTS, usecols=["project_num", "sub_project", "research_initiative"], dtype=str, keep_default_na=False)
dbgap = dbgap.merge(grants, on="project_num")

In [4]:
# Note, study phs002603.v1.p1 has two associated project numbers
dbgap["project_serial_num"] = dbgap["project_num"].apply(nih_utils.get_project_serial_num)
print(dbgap.shape[0])
dbgap

49


Unnamed: 0,project_num,dbgap_accession,research_initiative,sub_project,project_serial_num
0,1U01HL152410-01,phs002522.v1.p1,RADx-rad,Novel Biosensing and VOC,HL152410
1,1R01NR020105-01,phs002523.v1.p1,RADx-rad,Multimodal Surveillance,NR020105
2,1R01DE031114-01,phs002524.v1.p1,RADx-rad,Multimodal Surveillance,DE031114
3,1U01DA053941-01,phs002525.v1.p1,RADx-rad,Wastewater,DA053941
4,3U01LM013129-02S1,phs002527.v1.p1,RADx-rad,Wastewater,LM013129
5,1U01DA053903-01,phs002542.v1.p1,RADx-rad,Wastewater,DA053903
6,1U01AA029324-01,phs002543.v1.p1,RADx-rad,Automatic Detection & Tracing,AA029324
7,1U18TR003778-01,phs002544.v1.p1,RADx-rad,Exosome,TR003778
8,1U01AA029328-01,phs002546.v1.p1,RADx-rad,Automatic Detection & Tracing,AA029328
9,1R61HD105610-01,phs002549.v1.p1,RADx-rad,PreVAIL kIds,HD105610


## Retrieve and Save dbGaP Titles and Abstracts

In [5]:
dbgap[['dbgap_accession', 'title', 'focus', 'description']] = dbgap['dbgap_accession'].apply(
    lambda accession: pd.Series(nih_utils.extract_dbgap_study_info(accession))
)
dbgap.head()

phs002522.v1.p1
phs002523.v1.p1
phs002524.v1.p1
phs002525.v1.p1
phs002527.v1.p1
phs002542.v1.p1
phs002543.v1.p1
phs002544.v1.p1
phs002546.v1.p1
phs002549.v1.p1
phs002550.v1.p1
phs002551.v1.p1
phs002553.v1.p1
phs002561.v1.p1
phs002563.v1.p1
phs002565.v1.p1
phs002569.v1.p1
phs002570.v1.p1
phs002572.v1.p1
phs002573.v1.p1
phs002583.v1.p1
phs002585.v1.p1
phs002588.v1.p1
phs002600.v1.p1
phs002602.v1.p1
phs002603.v1.p1
phs002603.v1.p1
phs002604.v1.p1
phs002609.v1.p1
phs002631.v1.p1
phs002642.v1.p1
phs002657.v1.p1
phs002679.v1.p1
phs002685.v1.p1
phs002689.v1.p1
phs002699.v1.p1
phs002700.v1.p1
phs002702.v1.p1
phs002709.v1.p1
phs002729.v1.p1
phs002744.v1.p1
phs002747.v1.p1
phs002778.v1.p1
phs002781.v1.p1
phs002782.v1.p1
phs002924.v1.p1
phs002945.v1.p1
phs002964.v1.p1
phs003124.v1.p1


Unnamed: 0,project_num,dbgap_accession,research_initiative,sub_project,project_serial_num,title,focus,description
0,1U01HL152410-01,phs002522.v1.p1,RADx-rad,Novel Biosensing and VOC,HL152410,Rapid Acceleration of Diagnostics - Radical (R...,COVID-19,"This proposal describes the design, fabricatio..."
1,1R01NR020105-01,phs002523.v1.p1,RADx-rad,Multimodal Surveillance,NR020105,Rapid Acceleration of Diagnostics - Radical (R...,COVID-19,We plan to extend and improve our COH/PHD syst...
2,1R01DE031114-01,phs002524.v1.p1,RADx-rad,Multimodal Surveillance,DE031114,Rapid Acceleration of Diagnostics - Radical (R...,COVID-19,Vulnerable populations do not just need testin...
3,1U01DA053941-01,phs002525.v1.p1,RADx-rad,Wastewater,DA053941,Rapid Acceleration of Diagnostics - Radical (R...,COVID-19,"The University of Miami (UM), with three prima..."
4,3U01LM013129-02S1,phs002527.v1.p1,RADx-rad,Wastewater,LM013129,Rapid Acceleration of Diagnostics - Radical (R...,COVID-19,COVID-19 is expected to become one of the larg...


In [6]:
dbgap.to_csv(DBGAP_ABSTRACTS, index=False)