# get_grant_info
This notebook creates grant-related node and relationship files for the RADx-KG:
* Grant.csv
* FundingOpportunity.csv
* FundingOpportunity-PROVIDES-Grant.csv

In [1]:
import os
import grant_query
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
#pd.set_option('display.max_colwidth', None)

In [2]:
KG_PATH = "../kg/data"
DERIVED_DATA_PATH = "../derived_data"

In [3]:
START_FISCAL_YEAR = 2021 # starts Oct. 1, 2020

In [4]:
projects = pd.read_csv(os.path.join(DERIVED_DATA_PATH, "radx-projects.csv"))
projects.query("researchInitiative == 'RADx-rad'", inplace=True)

In [5]:
print(f"Number of projects {projects.shape[0]}")
projects.head()

Number of projects 50


Unnamed: 0,researchInitiative,dbgapAccession,projectNum,coreProjectNum,projectSerialNum,studyInvestigator,studyTitle,subProject
0,RADx-rad,phs002744.v1.p1,U01AA029345,U01AA029345,AA029345,Khalid Salaita,Rapid Acceleration of Diagnostics - Radical (R...,Automatic Detection & Tracing
12,RADx-rad,phs002778.v1.p1,1U18TR003793-01,U18TR003793,TR003793,Shannon Stott,Rapid Acceleration of Diagnostics - Radical (R...,Exosome
26,RADx-rad,phs002604.v1.p1,1U01DA053949-01,U01DA053949,DA053949,Anne-Catrin Uhlemann,Rapid Acceleration of Diagnostics - RADICAL (R...,Wastewater
27,RADx-rad,phs002583.v1.p1,5U01HL150852-02,U01HL150852,HL150852,Edward P DeMauro,Rapid Acceleration of Diagnostics - Radical (...,Novel Biosensing and VOC
28,RADx-rad,phs002603.v1.p1,3R33HD105594-03S1,R33HD105594,HD105594,Audrey Odom-John,Rapid Acceleration of Diagnostics - Radical (...,PreVAIL kIds


In [6]:
project_num = list(projects["coreProjectNum"].unique())
print("Number of unique core project numbers:", len(project_num))

Number of unique core project numbers: 48


### Create Grant nodes

In [7]:
grants = grant_query.get_projects(project_num)
grants["applId"] = grants["applId"].astype(str)
grants = grant_query.get_projects(project_num)
grants["fiscalYear"] = grants["fiscalYear"].astype(int)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["project_serial_num"].fillna("", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["covid_response"].fillna("", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting valu

In [8]:
grants = grants[(grants["fiscalYear"] >= START_FISCAL_YEAR) | grants["fiscalYear"].isna()]

In [9]:
grants.sort_values(by="fiscalYear", inplace=True)
grants.drop_duplicates(subset=["coreProjectNum", "fiscalYear"], keep='last', inplace=True)

In [10]:
grants["agency"].unique()

array(['NIDA', 'NIAAA', 'NICHD', 'NIDCR', 'NHLBI', 'NCATS', 'NIDCD',
       'NINR', 'NIDDK', 'NLM', 'NIMHD', 'NIEHS'], dtype=object)

In [11]:
print("Number of grants:", grants.shape[0])

Number of grants: 99


In [12]:
grants["fiscalYear"].value_counts()

fiscalYear
2021    47
2022    42
2023    10
Name: count, dtype: int64

In [13]:
grant_nodes = grants.merge(projects[["coreProjectNum", "researchInitiative", "subProject"]], on="coreProjectNum", how="right")
print("Number of grant nodes:", grant_nodes.shape[0])

Number of grant nodes: 101


In [14]:
grant_node_map = {"coreProjectNum": "id", "projectTitle": "name", "abstract": "abstract", 
                  "narrative": "narrative", "fundingMechanism": "fundingMechanism",
                  "awardCode": "awardCode", "researchInitiative": "researchInitiative", "subProject": "subProject"}

In [15]:
grant_nodes = grant_nodes[grant_node_map.keys()].copy()
grant_nodes.rename(columns=grant_node_map, inplace=True)
grant_nodes.drop_duplicates(inplace=True)
grant_nodes.fillna("", inplace=True)

In [16]:
# Remove common headers from abstract and narrative text
grant_nodes["abstract"] = grant_nodes["abstract"].apply(grant_query.remove_prefix, prefix="project")
grant_nodes["abstract"] = grant_nodes["abstract"].apply(grant_query.remove_prefix, prefix="abstract")
grant_nodes["abstract"] = grant_nodes["abstract"].apply(grant_query.remove_prefix, prefix="summary")
grant_nodes["narrative"] = grant_nodes["narrative"].apply(grant_query.remove_prefix, prefix="project")
grant_nodes["narrative"] = grant_nodes["narrative"].apply(grant_query.remove_prefix, prefix="narrative")
grant_nodes["narrative"] = grant_nodes["narrative"].apply(grant_query.remove_prefix, prefix="public health narrative")
grant_nodes["narrative"] = grant_nodes["narrative"].apply(grant_query.remove_prefix, prefix="relevance to public health statement")

In [17]:
# TODO check where name of grant varies for the same grant id
grant_nodes.drop_duplicates(subset="id", inplace=True)
print(grant_nodes.shape[0])
grant_nodes

48


Unnamed: 0,id,name,abstract,narrative,fundingMechanism,awardCode,researchInitiative,subProject
0,U01AA029345,Rolosense: An innovative platform for automati...,The ultimate goal of this proposal is to devel...,Public Health Statement/Narrative\nThe aim of ...,Non-SBIR/STTR,U01,RADx-rad,Automatic Detection & Tracing
2,U18TR003793,Microfluidic Isolation and Characterization of...,"Robust, efficient and reliable testing for SAR...",We will develop a SARS-CoV-2 detection assay u...,Other Research-Related,U18,RADx-rad,Exosome
4,U01DA053949,Optimizing SARS-CoV-2 wastewater based surveil...,The novel coronavirus SARS-CoV-2 is causing si...,Wastewater based testing (WBT) holds great pro...,Non-SBIR/STTR,U01,RADx-rad,Wastewater
6,U01HL150852,Rutgers Optimizes Innovation (ROI) Program,The Rutgers Optimizes Innovation (ROI) Program...,The Rutgers Optimizes Innovation (ROI) Program...,Non-SBIR/STTR,U01,RADx-rad,Novel Biosensing and VOC
8,R33HD105594,Diagnosis of MIS-C in febrile children,The recent emergence of SARS-CoV-2 and resulta...,This research is highly relevant to public hea...,Non-SBIR/STTR,R33,RADx-rad,PreVAIL kIds
9,R01DC016112,A confectionary-based screening tool for asses...,The goals of this Emergency Competitive Revisi...,This research will develop a novel screening t...,Non-SBIR/STTR,R01,RADx-rad,Chemosensory Testing
10,R44DE030842,A scalable aptamer-based electrochemical biose...,The COVID-19 pandemic is a critical global pub...,"Worldwide, healthcare systems are struggling t...",SBIR/STTR,R44,RADx-rad,Novel Biosensing and VOC
12,R44DE030852,Designer DNA Nanostructure Based Biosensing fo...,A Novel Saliva-Based Aptamer Detection Assay f...,Direct detection of viral pathogens is needed ...,SBIR/STTR,R44,RADx-rad,Novel Biosensing and VOC
14,U18TR003780,Exosome-based Non-traditional Technologies Tow...,,The urgent need to curb the spread of SARS-CoV...,Other Research-Related,U18,RADx-rad,Exosome
16,U01DA053899,"Improved scalability, sensitivity, and interpr...","Presently, the application of molecular techno...",The proposed research will enhance the usefuln...,Non-SBIR/STTR,U01,RADx-rad,Wastewater


In [18]:
grant_nodes.to_csv(os.path.join(KG_PATH, "nodes", "Grant.csv"), index=False)
print("Number of grants:", grant_nodes.shape[0])
grant_nodes

Number of grants: 48


Unnamed: 0,id,name,abstract,narrative,fundingMechanism,awardCode,researchInitiative,subProject
0,U01AA029345,Rolosense: An innovative platform for automati...,The ultimate goal of this proposal is to devel...,Public Health Statement/Narrative\nThe aim of ...,Non-SBIR/STTR,U01,RADx-rad,Automatic Detection & Tracing
2,U18TR003793,Microfluidic Isolation and Characterization of...,"Robust, efficient and reliable testing for SAR...",We will develop a SARS-CoV-2 detection assay u...,Other Research-Related,U18,RADx-rad,Exosome
4,U01DA053949,Optimizing SARS-CoV-2 wastewater based surveil...,The novel coronavirus SARS-CoV-2 is causing si...,Wastewater based testing (WBT) holds great pro...,Non-SBIR/STTR,U01,RADx-rad,Wastewater
6,U01HL150852,Rutgers Optimizes Innovation (ROI) Program,The Rutgers Optimizes Innovation (ROI) Program...,The Rutgers Optimizes Innovation (ROI) Program...,Non-SBIR/STTR,U01,RADx-rad,Novel Biosensing and VOC
8,R33HD105594,Diagnosis of MIS-C in febrile children,The recent emergence of SARS-CoV-2 and resulta...,This research is highly relevant to public hea...,Non-SBIR/STTR,R33,RADx-rad,PreVAIL kIds
9,R01DC016112,A confectionary-based screening tool for asses...,The goals of this Emergency Competitive Revisi...,This research will develop a novel screening t...,Non-SBIR/STTR,R01,RADx-rad,Chemosensory Testing
10,R44DE030842,A scalable aptamer-based electrochemical biose...,The COVID-19 pandemic is a critical global pub...,"Worldwide, healthcare systems are struggling t...",SBIR/STTR,R44,RADx-rad,Novel Biosensing and VOC
12,R44DE030852,Designer DNA Nanostructure Based Biosensing fo...,A Novel Saliva-Based Aptamer Detection Assay f...,Direct detection of viral pathogens is needed ...,SBIR/STTR,R44,RADx-rad,Novel Biosensing and VOC
14,U18TR003780,Exosome-based Non-traditional Technologies Tow...,,The urgent need to curb the spread of SARS-CoV...,Other Research-Related,U18,RADx-rad,Exosome
16,U01DA053899,"Improved scalability, sensitivity, and interpr...","Presently, the application of molecular techno...",The proposed research will enhance the usefuln...,Non-SBIR/STTR,U01,RADx-rad,Wastewater


In [19]:
print(f"Number of grant nodes: {grant_nodes.shape[0]}")

Number of grant nodes: 48


In [20]:
projects.head()

Unnamed: 0,researchInitiative,dbgapAccession,projectNum,coreProjectNum,projectSerialNum,studyInvestigator,studyTitle,subProject
0,RADx-rad,phs002744.v1.p1,U01AA029345,U01AA029345,AA029345,Khalid Salaita,Rapid Acceleration of Diagnostics - Radical (R...,Automatic Detection & Tracing
12,RADx-rad,phs002778.v1.p1,1U18TR003793-01,U18TR003793,TR003793,Shannon Stott,Rapid Acceleration of Diagnostics - Radical (R...,Exosome
26,RADx-rad,phs002604.v1.p1,1U01DA053949-01,U01DA053949,DA053949,Anne-Catrin Uhlemann,Rapid Acceleration of Diagnostics - RADICAL (R...,Wastewater
27,RADx-rad,phs002583.v1.p1,5U01HL150852-02,U01HL150852,HL150852,Edward P DeMauro,Rapid Acceleration of Diagnostics - Radical (...,Novel Biosensing and VOC
28,RADx-rad,phs002603.v1.p1,3R33HD105594-03S1,R33HD105594,HD105594,Audrey Odom-John,Rapid Acceleration of Diagnostics - Radical (...,PreVAIL kIds


In [21]:
project_num = set(projects["projectSerialNum"].unique())
grant_num = set(grants["projectSerialNum"].unique())
print("Matching grants:", len(grant_num))

Matching grants: 48


In [22]:
missing = list(project_num - grant_num)
print("Missing grants:", len(missing))

Missing grants: 0


In [23]:
projects[projects["projectSerialNum"].isin(missing)]

Unnamed: 0,researchInitiative,dbgapAccession,projectNum,coreProjectNum,projectSerialNum,studyInvestigator,studyTitle,subProject


### Create FundingOpportunity-PROVIDES-Grant relationships

In [24]:
grant_funding_opportunities = grants[["coreProjectNum", "opportunityNumber"]].copy()
grant_funding_opportunities.dropna(inplace=True)
grant_funding_opportunities.drop_duplicates(inplace=True)

In [25]:
gfo_map = {"opportunityNumber": "from", "coreProjectNum": "to"}

In [26]:
gfo = grants[gfo_map.keys()].copy()
gfo.rename(columns=gfo_map, inplace=True)
gfo.drop_duplicates(inplace=True)
gfo = gfo[gfo["from"] != ""]
gfo

Unnamed: 0,from,to
85,RFA-OD-20-015,U01DA053949
84,RFA-OD-20-014,U01AA029316
83,RFA-OD-20-023,R61HD105590
82,RFA-OD-20-020,R44DE030852
81,RFA-OD-20-020,R44DE030842
79,RFA-OD-20-014,U01AA029328
78,RFA-OD-20-015,U01DA053899
76,RFA-OD-20-023,R61HD105619
75,RFA-OD-20-016,R01DE031114
74,RFA-OD-20-014,U01AA029348


In [27]:
gfo.to_csv(os.path.join(KG_PATH, "relationships", "FundingOpportunity-PROVIDES-Grant.csv"), index=False)
print("Number of funding opportunities:", gfo.shape[0])
gfo.head()

Number of funding opportunities: 53


Unnamed: 0,from,to
85,RFA-OD-20-015,U01DA053949
84,RFA-OD-20-014,U01AA029316
83,RFA-OD-20-023,R61HD105590
82,RFA-OD-20-020,R44DE030852
81,RFA-OD-20-020,R44DE030842


In [28]:
funding_opportunities = gfo["from"].unique().tolist()

In [29]:
fo_details = grant_query.search_funding_opportunities(funding_opportunities)

In [30]:
fo_details.head()

Unnamed: 0,id,number,title,agencyCode,agency,openDate,closeDate,oppStatus,docType,cfdaList
0,328588,RFA-OD-20-015,Emergency Awards: RADx-rad Wastewater Detectio...,HHS-NIH11,National Institutes of Health,08/06/2020,09/15/2020,archived,synopsis,"[93.879, 93.113, 93.310, 93.859, 93.279]"
1,328587,RFA-OD-20-014,Emergency Awards: Automatic Detection and Trac...,HHS-NIH11,National Institutes of Health,08/06/2020,09/15/2020,archived,synopsis,"[93.393, 93.213, 93.273, 93.396, 93.233, 93.83..."
2,328593,RFA-OD-20-023,Emergency Awards: RADx-rad Predicting Viral-As...,HHS-NIH11,National Institutes of Health,08/06/2020,09/30/2020,archived,synopsis,"[93.837, 93.233, 93.307, 93.855, 93.989, 93.84..."
3,328569,RFA-OD-20-020,Emergency Awards RADx-RAD: Novel Biosensing fo...,HHS-NIH11,National Institutes of Health,08/06/2020,09/18/2020,archived,synopsis,"[93.837, 93.286, 93.307, 93.879, 93.350, 93.86..."
4,328567,RFA-OD-20-016,Emergency Awards: RADx-RAD Multimodal COVID-19...,HHS-NIH11,National Institutes of Health,08/06/2020,09/30/2020,archived,synopsis,"[93.361, 93.121, 93.307, 93.859, 93.310, 93.84..."


### Create FundingOpportunity Nodes

In [31]:
fo_map = {"number": "id", "title": "name"}
fo = fo_details[fo_map.keys()].copy()
fo.rename(columns=fo_map, inplace=True)
fo.drop_duplicates(inplace=True)

In [32]:
fo["url"] = fo["id"].apply(grant_query.add_funding_opportunity_url)

In [33]:
fo.to_csv(os.path.join(KG_PATH, "nodes", "FundingOpportunity.csv"), index=False)
print("Number of funding opportunities:", fo.shape[0])
fo

Number of funding opportunities: 16


Unnamed: 0,id,name,url
0,RFA-OD-20-015,Emergency Awards: RADx-rad Wastewater Detectio...,https://grants.nih.gov/grants/guide/rfa-files/...
1,RFA-OD-20-014,Emergency Awards: Automatic Detection and Trac...,https://grants.nih.gov/grants/guide/rfa-files/...
2,RFA-OD-20-023,Emergency Awards: RADx-rad Predicting Viral-As...,https://grants.nih.gov/grants/guide/rfa-files/...
3,RFA-OD-20-020,Emergency Awards RADx-RAD: Novel Biosensing fo...,https://grants.nih.gov/grants/guide/rfa-files/...
4,RFA-OD-20-016,Emergency Awards: RADx-RAD Multimodal COVID-19...,https://grants.nih.gov/grants/guide/rfa-files/...
5,RFA-HL-13-008,The NIH Centers for Accelerated Innovations (U54),https://grants.nih.gov/grants/guide/rfa-files/...
6,RFA-OD-20-018,Emergency Awards: Exosome-based Non-traditiona...,https://grants.nih.gov/grants/guide/rfa-files/...
7,RFA-OD-20-021,Emergency Awards RADx-RAD: Novel Biosensing fo...,https://grants.nih.gov/grants/guide/rfa-files/...
8,RFA-OD-20-017,Emergency Awards RADx-RAD: Screening for COVID...,https://grants.nih.gov/grants/guide/rfa-files/...
9,RFA-OD-20-022,Emergency Awards: Chemosensory Testing as a CO...,https://grants.nih.gov/grants/guide/rfa-files/...
