# FibROAD 
a manually curated resource for multi-omics level evidence integration of fibrosis research

# Reading Data <!-- 二级标题 -->

In [1]:
import pandas as pd
import os

df_basic = pd.read_csv("FRG_Basic_Info.csv", sep=None, engine='python') 
df_articles = pd.read_csv("FRG_Articles.csv",sep=None, engine='python') 

# Drop the "Unnamed: 7" column from the basic info dataframe before merging
df_articles = df_articles.drop(columns=["Unnamed: 7"], errors="ignore")

# Rename the "Fibrosis" column in basic_info_df to "GeneName" for consistent merging
df_basic.rename(columns={"Fibrosis": "GeneName"}, inplace=True)

# Merge again using cleaned basic info dataframe
merged = pd.merge(df_articles, df_basic, on="GeneName", how="left")

# Update the EntrezID column to prefix values with "Entrez:"
merged["EntrezID"] = "ENTREZ:" + merged["EntrezID"].astype(str)


In [2]:
merged

Unnamed: 0,GeneName,Alias,FullName,PMID,Method,DiseaseModel,DiseaseName,DiseaseID,Fibrosis,EntrezID,UniprotID,FullName_NCBI,Alias_NCBI,Summary_NCBI,FullName_UNIPROT,Summary_UNIPROT
0,ABCB4,MDR2/MDR3,ATP binding cassette subfamily B member 4,31071368,Gene Knockout,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,anti,ENTREZ:5244,P21439,ATP binding cassette subfamily B member 4,GBD1; ICP3; MDR2; MDR3; PGY3; ABC21; MDR2/3; P...,The membrane-associated protein encoded by thi...,Phosphatidylcholine translocator ABCB4,Energy-dependent phospholipid efflux transloca...
1,ABCB4,MDR2/MDR3,ATP binding cassette subfamily B member 4,16223543,Gene Knockout,Biliary Fibrosis,fibrosis of bile duct,MONDO:0041959,anti,ENTREZ:5244,P21439,ATP binding cassette subfamily B member 4,GBD1; ICP3; MDR2; MDR3; PGY3; ABC21; MDR2/3; P...,The membrane-associated protein encoded by thi...,Phosphatidylcholine translocator ABCB4,Energy-dependent phospholipid efflux transloca...
2,ABCB4,MDR2/MDR3,ATP binding cassette subfamily B member 4,33340584,Gene Knockout,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,anti,ENTREZ:5244,P21439,ATP binding cassette subfamily B member 4,GBD1; ICP3; MDR2; MDR3; PGY3; ABC21; MDR2/3; P...,The membrane-associated protein encoded by thi...,Phosphatidylcholine translocator ABCB4,Energy-dependent phospholipid efflux transloca...
3,ABCB4,MDR2/MDR3,ATP binding cassette subfamily B member 4,21868490,Gene Knockout,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,anti,ENTREZ:5244,P21439,ATP binding cassette subfamily B member 4,GBD1; ICP3; MDR2; MDR3; PGY3; ABC21; MDR2/3; P...,The membrane-associated protein encoded by thi...,Phosphatidylcholine translocator ABCB4,Energy-dependent phospholipid efflux transloca...
4,ACE,,angiotensin converting enzyme,11425779,Inhibition,Cardiac Fibrosis,endomyocardial fibrosis,MONDO:0006746,pro,ENTREZ:1636,P12821,angiotensin I converting enzyme,DCP; ACE1; DCP1; CD143,This gene encodes an enzyme involved in blood ...,Angiotensin-converting enzyme,Converts angiotensin I to angiotensin II by re...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,IL11,,interleukin 11,29160304,Gene Knockout/Gene Overexpression/Direct Treat...,Cardiac Fibrosis,endomyocardial fibrosis,MONDO:0006746,pro,ENTREZ:3589,P20809,interleukin 11,AGIF; IL-11,The protein encoded by this gene is a member o...,Interleukin-11,Cytokine that stimulates the proliferation of ...
854,IL13,,interleukin 13,26436920,Direct Treatment,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,pro,ENTREZ:3596,P35225,interleukin 13,P600; IL-13,This gene encodes an immunoregulatory cytokine...,Interleukin-13,"Cytokine (PubMed:8096327, PubMed:8097324). Inh..."
855,IL4,,interleukin 4,26436920,Direct Treatment,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,pro,ENTREZ:3565,P05112,interleukin 4,BSF1; IL-4; BCGF1; BSF-1; BCGF-1,The protein encoded by this gene is a pleiotro...,Interleukin-4,Participates in at least several B-cell activa...
856,IL7,,interleukin 7,25352130,Direct Treatment,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,pro,ENTREZ:3574,P13232,interleukin 7,IL-7,The protein encoded by this gene is a cytokine...,Interleukin-7,Hematopoietic growth factor capable of stimula...


In [3]:
# Combine more columns into the 'key_sentence' field, using '｜' as the delimiter
merged["key_sentence"] = (
    "Method: " + merged["Method"].fillna("N/A") + "｜" +
    "FullName: " + merged["FullName"].fillna("N/A") + "｜" +
    "DiseaseModel: " + merged["DiseaseModel"].fillna("N/A") + "｜" +
    "Alias: " + merged["Alias"].fillna("N/A") + "｜" +
    "Summary: " + merged["Summary_NCBI"].fillna("N/A") + "｜" +
    "UniprotID: " + merged["UniprotID"].fillna("N/A") + "｜" +
    "FullName_UNIPROT: " + merged["FullName_UNIPROT"].fillna("N/A") + "｜" +
    "Summary_UNIPROT: " + merged["Summary_UNIPROT"].fillna("N/A")
)
merged

Unnamed: 0,GeneName,Alias,FullName,PMID,Method,DiseaseModel,DiseaseName,DiseaseID,Fibrosis,EntrezID,UniprotID,FullName_NCBI,Alias_NCBI,Summary_NCBI,FullName_UNIPROT,Summary_UNIPROT,key_sentence
0,ABCB4,MDR2/MDR3,ATP binding cassette subfamily B member 4,31071368,Gene Knockout,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,anti,ENTREZ:5244,P21439,ATP binding cassette subfamily B member 4,GBD1; ICP3; MDR2; MDR3; PGY3; ABC21; MDR2/3; P...,The membrane-associated protein encoded by thi...,Phosphatidylcholine translocator ABCB4,Energy-dependent phospholipid efflux transloca...,Method: Gene Knockout｜FullName: ATP binding ca...
1,ABCB4,MDR2/MDR3,ATP binding cassette subfamily B member 4,16223543,Gene Knockout,Biliary Fibrosis,fibrosis of bile duct,MONDO:0041959,anti,ENTREZ:5244,P21439,ATP binding cassette subfamily B member 4,GBD1; ICP3; MDR2; MDR3; PGY3; ABC21; MDR2/3; P...,The membrane-associated protein encoded by thi...,Phosphatidylcholine translocator ABCB4,Energy-dependent phospholipid efflux transloca...,Method: Gene Knockout｜FullName: ATP binding ca...
2,ABCB4,MDR2/MDR3,ATP binding cassette subfamily B member 4,33340584,Gene Knockout,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,anti,ENTREZ:5244,P21439,ATP binding cassette subfamily B member 4,GBD1; ICP3; MDR2; MDR3; PGY3; ABC21; MDR2/3; P...,The membrane-associated protein encoded by thi...,Phosphatidylcholine translocator ABCB4,Energy-dependent phospholipid efflux transloca...,Method: Gene Knockout｜FullName: ATP binding ca...
3,ABCB4,MDR2/MDR3,ATP binding cassette subfamily B member 4,21868490,Gene Knockout,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,anti,ENTREZ:5244,P21439,ATP binding cassette subfamily B member 4,GBD1; ICP3; MDR2; MDR3; PGY3; ABC21; MDR2/3; P...,The membrane-associated protein encoded by thi...,Phosphatidylcholine translocator ABCB4,Energy-dependent phospholipid efflux transloca...,Method: Gene Knockout｜FullName: ATP binding ca...
4,ACE,,angiotensin converting enzyme,11425779,Inhibition,Cardiac Fibrosis,endomyocardial fibrosis,MONDO:0006746,pro,ENTREZ:1636,P12821,angiotensin I converting enzyme,DCP; ACE1; DCP1; CD143,This gene encodes an enzyme involved in blood ...,Angiotensin-converting enzyme,Converts angiotensin I to angiotensin II by re...,Method: Inhibition｜FullName: angiotensin conve...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,IL11,,interleukin 11,29160304,Gene Knockout/Gene Overexpression/Direct Treat...,Cardiac Fibrosis,endomyocardial fibrosis,MONDO:0006746,pro,ENTREZ:3589,P20809,interleukin 11,AGIF; IL-11,The protein encoded by this gene is a member o...,Interleukin-11,Cytokine that stimulates the proliferation of ...,Method: Gene Knockout/Gene Overexpression/Dire...
854,IL13,,interleukin 13,26436920,Direct Treatment,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,pro,ENTREZ:3596,P35225,interleukin 13,P600; IL-13,This gene encodes an immunoregulatory cytokine...,Interleukin-13,"Cytokine (PubMed:8096327, PubMed:8097324). Inh...",Method: Direct Treatment｜FullName: interleukin...
855,IL4,,interleukin 4,26436920,Direct Treatment,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,pro,ENTREZ:3565,P05112,interleukin 4,BSF1; IL-4; BCGF1; BSF-1; BCGF-1,The protein encoded by this gene is a pleiotro...,Interleukin-4,Participates in at least several B-cell activa...,Method: Direct Treatment｜FullName: interleukin...
856,IL7,,interleukin 7,25352130,Direct Treatment,Hepatic Fibrosis,fibrotic liver disease,MONDO:0100430,pro,ENTREZ:3574,P13232,interleukin 7,IL-7,The protein encoded by this gene is a cytokine...,Interleukin-7,Hematopoietic growth factor capable of stimula...,Method: Direct Treatment｜FullName: interleukin...


In [4]:
merged["Fibrosis"].unique()

array(['anti', 'pro', 'pro/anti'], dtype=object)

In [5]:
relation_type_map = {
    "anti": "GNBR::D::Gene:Disease",
    "pro": "GNBR::G::Gene:Disease",
    "pro/anti": "GNBR::J::Gene:Disease",
}
relation_type_map

{'anti': 'GNBR::D::Gene:Disease',
 'pro': 'GNBR::G::Gene:Disease',
 'pro/anti': 'GNBR::J::Gene:Disease'}

In [6]:
formatted_df = pd.DataFrame()
formatted_df["source_name"] = merged["GeneName"]
formatted_df["source_type"] = "Gene"
formatted_df["source_id"] = merged["EntrezID"]
formatted_df["target_name"] = merged["DiseaseName"]
formatted_df["target_type"] = "Disease"
formatted_df["target_id"] = merged["DiseaseID"]
formatted_df["relation_type"] =merged["Fibrosis"].str.lower().map(relation_type_map)
formatted_df["resource"] = "FIBROAD"
formatted_df["pmid"] = merged["PMID"]

formatted_df

Unnamed: 0,source_name,source_type,source_id,target_name,target_type,target_id,relation_type,resource,pmid
0,ABCB4,Gene,ENTREZ:5244,fibrotic liver disease,Disease,MONDO:0100430,GNBR::D::Gene:Disease,FIBROAD,31071368
1,ABCB4,Gene,ENTREZ:5244,fibrosis of bile duct,Disease,MONDO:0041959,GNBR::D::Gene:Disease,FIBROAD,16223543
2,ABCB4,Gene,ENTREZ:5244,fibrotic liver disease,Disease,MONDO:0100430,GNBR::D::Gene:Disease,FIBROAD,33340584
3,ABCB4,Gene,ENTREZ:5244,fibrotic liver disease,Disease,MONDO:0100430,GNBR::D::Gene:Disease,FIBROAD,21868490
4,ACE,Gene,ENTREZ:1636,endomyocardial fibrosis,Disease,MONDO:0006746,GNBR::G::Gene:Disease,FIBROAD,11425779
...,...,...,...,...,...,...,...,...,...
853,IL11,Gene,ENTREZ:3589,endomyocardial fibrosis,Disease,MONDO:0006746,GNBR::G::Gene:Disease,FIBROAD,29160304
854,IL13,Gene,ENTREZ:3596,fibrotic liver disease,Disease,MONDO:0100430,GNBR::G::Gene:Disease,FIBROAD,26436920
855,IL4,Gene,ENTREZ:3565,fibrotic liver disease,Disease,MONDO:0100430,GNBR::G::Gene:Disease,FIBROAD,26436920
856,IL7,Gene,ENTREZ:3574,fibrotic liver disease,Disease,MONDO:0100430,GNBR::G::Gene:Disease,FIBROAD,25352130


In [7]:
formatted_df.to_csv("formatted_fibroad_Disease.tsv", sep="\t", index=False)