Skip to content

Commit

Permalink
Add information from R&D Systems.
Browse files Browse the repository at this point in the history
  • Loading branch information
yjcyxky committed Apr 10, 2024
1 parent c7b64d1 commit 6bed14a
Show file tree
Hide file tree
Showing 19 changed files with 457 additions and 75 deletions.
19 changes: 19 additions & 0 deletions examples/merge_relation_entity.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
SELECT
r.id,
r.source_id,
e1.name AS source_name,
r.source_type,
r.target_id,
e2.name AS target_name,
r.target_type,
r.relation_type,
r.formatted_relation_type,
r.key_sentence,
r.resource,
r.dataset,
r.pmids,
r.score
FROM
public.biomedgps_relation_with_score r
LEFT JOIN public.biomedgps_entity e1 ON r.source_id = e1.id AND r.source_type = e1.label
LEFT JOIN public.biomedgps_entity e2 ON r.target_id = e2.id AND r.target_type = e2.label;
54 changes: 48 additions & 6 deletions src/bin/biomedgps-cli.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
extern crate log;

use biomedgps::model::entity_attr::CompoundAttr;
use biomedgps::model::init_db::create_kg_score_table;
use biomedgps::model::kge::{init_kge_models, DEFAULT_MODEL_NAME};
use biomedgps::model::entity_attr::CompoundAttr;
use biomedgps::model::{
init_db::{
create_score_table, get_kg_score_table_name, kg_entity_table2graphdb,
Expand All @@ -21,7 +21,7 @@ use std::path::PathBuf;
use std::sync::Arc;
use structopt::StructOpt;

/// NOTE: In the first time, you need to follow the order to run the commands: initdb -> importdb (entity + entity_metadata + relation + relation_metadata etc.) -> importkge (embeddings) -> cachetable (compound-disease-symptom, knowledge-score). In the current stage, we don't have a mechanism to check the format of entity ids and relation_types and keep the consistent of the data, such as whether all entities in the relation table exist in the entity table. But we provide a script for this purpose, you can follow this link to check the data consistency: https://github.com/open-prophetdb/biomedgps-data/blob/main/graph_data/scripts/correct_graph_data.py
/// NOTE: In the first time, you need to follow the order to run the commands: initdb -> importdb (entity + entity_metadata + relation + relation_metadata etc.) -> importkge (embeddings) -> cachetable (compound-disease-symptom, gene-disease-symptom, knowledge-score). In the current stage, we don't have a mechanism to check the format of entity ids and relation_types and keep the consistent of the data, such as whether all entities in the relation table exist in the entity table. But we provide a script for this purpose, you can follow this link to check the data consistency: https://github.com/open-prophetdb/biomedgps-data/blob/main/graph_data/scripts/correct_graph_data.py
///
#[derive(StructOpt, Debug)]
#[structopt(setting=structopt::clap::AppSettings::ColoredHelp, name = "A cli for biomedgps service.", author="Jingcheng Yang <yjcyxky@163.com>;")]
Expand Down Expand Up @@ -61,7 +61,7 @@ pub struct InitDbArguments {
}

/// Output the statistics of the database, such as the number of entities, relations, metadata etc.
/// The statistics include the number of entities, relations, metadata, subgraph, knowledge_curation, entity2d, compound-disease-symptom, knowledge-score, embedding, graph etc.
/// The statistics include the number of entities, relations, metadata, subgraph, knowledge_curation, entity2d, compound-disease-symptom, gene-disease-symptom, knowledge-score, embedding, graph etc.
#[derive(StructOpt, PartialEq, Debug)]
#[structopt(setting=structopt::clap::AppSettings::ColoredHelp, name="BioMedGPS - statdb", author="Jingcheng Yang <yjcyxky@163.com>")]
pub struct StatDBArguments {
Expand All @@ -78,8 +78,8 @@ pub struct CleanDBArguments {
#[structopt(name = "database_url", short = "d", long = "database-url")]
database_url: Option<String>,

/// Which table to clean. e.g. entity, relation, entity_metadata, relation_metadata, knowledge_curation, subgraph, entity2d, compound-disease-symptom, knowledge-score, embedding, graph etc.
#[structopt(name = "table", short = "t", long = "table", possible_values = &["entity", "entity2d", "relation", "relation_metadata", "entity_metadata", "knowledge_curation", "subgraph", "compound-disease-symptom", "knowledge-score", "embedding", "graph"])]
/// Which table to clean. e.g. entity, relation, entity_metadata, relation_metadata, knowledge_curation, subgraph, entity2d, compound-disease-symptom, gene-disease-symptom, knowledge-score, embedding, graph etc.
#[structopt(name = "table", short = "t", long = "table", possible_values = &["entity", "entity2d", "relation", "relation_metadata", "entity_metadata", "knowledge_curation", "subgraph", "compound-disease-symptom", "gene-disease-symptom", "knowledge-score", "embedding", "graph"])]
table: String,
}

Expand Down Expand Up @@ -173,7 +173,7 @@ pub struct CacheTableArguments {
#[structopt(name = "db_host", short = "D", long = "db-host")]
db_host: Option<String>,

/// [Required] The table name to init. supports compound-disease-symptom, knowledge-score etc.
/// [Required] The table name to init. supports compound-disease-symptom, gene-disease-symptom, knowledge-score etc.
#[structopt(name = "table", short = "t", long = "table")]
table: String,

Expand Down Expand Up @@ -392,6 +392,48 @@ async fn main() {
Err(e) => error!("Init compound-disease-symptom table failed: {}", e),
}
}
"gene-disease-symptom" => {
let default_relation_types =
"GNBR::J::Gene:Disease,HSDN::has_symptom::Disease:Symptom";
let relation_types = arguments.relation_types.unwrap_or(
// TODO: the HSDN::has_symptom::Disease:Symptom is non-standard relation type. We need to change it to the standard format.
default_relation_types.to_string(),
);
let relation_types = relation_types.split(",").collect::<Vec<&str>>();

if relation_types.len() != 2 {
error!("The number of relation types should be 2 and the order should be consistent with the pairs of table name. e.g. gene-disease-symptom table should have two relation types for gene-disease and disease-symptom.");
std::process::exit(1);
}

let compound_disease_relation_type = relation_types.get(0).unwrap();
let disease_symptom_relation_type = relation_types.get(1).unwrap();

if !compound_disease_relation_type.contains("Gene:Disease") {
error!("The first relation type should be for compound-disease. e.g. GNBR::J::Gene:Disease.");
std::process::exit(1);
}

if !disease_symptom_relation_type.contains("Disease:Symptom") {
error!("The second relation type should be for disease-symptom. e.g. HSDN::has_symptom::Disease:Symptom");
std::process::exit(1);
}

match create_score_table(
&pool,
"Gene",
"Disease",
"Symptom",
compound_disease_relation_type,
disease_symptom_relation_type,
Some(&arguments.table_prefix),
)
.await
{
Ok(_) => info!("Init gene-disease-symptom table successfully."),
Err(e) => error!("Init gene-disease-symptom table failed: {}", e),
}
}
"knowledge-score" => {
let neo4j_url = if arguments.neo4j_url.is_none() {
match std::env::var("NEO4J_URL") {
Expand Down
28 changes: 28 additions & 0 deletions src/model/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,34 @@ impl Graph {
source_type = source_type,
topk = topk
)
} else if relation_type == "GNBR::J::Gene:Symptom" {
// source_id might be a symptom, so we need to check the target_id
// GNBR::J::Gene:Disease,HSDN::has_symptom::Disease:Symptom
format!(
"
SELECT
COALESCE(target_type, '') || '::' || COALESCE(target_id, '') AS query_node_id,
COALESCE(source_type, '') || '::' || COALESCE(source_id, '') AS node_id,
percentile_cont(0.5) WITHIN GROUP (ORDER BY score)::FLOAT4 AS score
FROM
{table_name} ee1
WHERE
target_id IN ('{source_id}') AND target_type = '{source_type}'
GROUP BY
target_id, target_type, source_id, source_type
ORDER BY score DESC, node_id ASC
LIMIT {topk};
",
table_name = get_triple_entity_score_table_name(
&embedding_metadata.table_name,
"Gene",
"Disease",
"Symptom"
),
source_id = source_id,
source_type = source_type,
topk = topk
)
} else {
// Example SQL:
// SELECT
Expand Down
Loading

0 comments on commit 6bed14a

Please sign in to comment.