-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a new table for managing metadata.
- Loading branch information
Showing
11 changed files
with
291 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
-- Drop the compound metadata table when rolling back the migration. | ||
DROP TABLE IF EXISTS biomedgps_compound_metadata; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
-- biomedgps_compound_metadata table is created to store metadata for compounds, such as the compound name, the compound type, patents, etc. | ||
CREATE TABLE | ||
IF NOT EXISTS biomedgps_compound_metadata ( | ||
id BIGSERIAL PRIMARY KEY, -- The entity metadata ID | ||
compound_type VARCHAR(64) NOT NULL, -- The type of the compound, such as drug, small molecule, etc. | ||
created VARCHAR(16) NOT NULL, -- The created time of the compound metadata | ||
updated VARCHAR(16) NOT NULL, -- The updated time of the compound metadata | ||
drugbank_id ARRAY[TEXT] NOT NULL, -- The DrugBank IDs of the compound | ||
name VARCHAR(128) NOT NULL, -- The name of the compound | ||
description TEXT NOT NULL, -- The description of the compound | ||
cas_number VARCHAR(32) NOT NULL, -- The CAS number of the compound | ||
unii VARCHAR(32) NOT NULL, -- The UNII of the compound | ||
compound_state VARCHAR(32) NOT NULL, -- The state of the compound, such as solid, liquid, etc. | ||
groups ARRAY[TEXT] NOT NULL, -- The groups of the compound, such as approved, investigational, etc. | ||
synthesis_reference TEXT NOT NULL, -- The synthesis reference of the compound | ||
indication TEXT NOT NULL, -- The indication of the compound | ||
pharmacodynamics TEXT NOT NULL, -- The pharmacodynamics of the compound | ||
mechanism_of_action TEXT NOT NULL, -- The mechanism of action of the compound | ||
toxicity TEXT NOT NULL, -- The toxicity of the compound | ||
metabolism TEXT NOT NULL, -- The metabolism of the compound | ||
absorption TEXT NOT NULL, -- The absorption of the compound | ||
half_life TEXT NOT NULL, -- The half-life of the compound | ||
protein_binding TEXT NOT NULL, -- The protein binding of the compound | ||
route_of_elimination TEXT NOT NULL, -- The route of elimination of the compound | ||
volume_of_distribution TEXT NOT NULL, -- The volume of distribution of the compound | ||
clearance TEXT NOT NULL, -- The clearance of the compound | ||
synonyms ARRAY[TEXT] NOT NULL, -- The synonyms of the compound | ||
categories JSONB NOT NULL, -- The categories of the compound | ||
patents JSONB NOT NULL, -- The patents of the compound | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,4 @@ | |
pub mod route; | ||
pub mod schema; | ||
pub mod auth; | ||
pub mod req; | ||
pub mod publication; |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
//! Add metadata to the entity and relationship model. | ||
use log::debug; | ||
use poem_openapi::Object; | ||
use serde::{Deserialize, Serialize}; | ||
use serde_json; | ||
use std::{error::Error, path::PathBuf}; | ||
use validator::{Validate, ValidationErrors}; | ||
|
||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Object, sqlx::FromRow, Validate)] | ||
pub struct Category { | ||
pub category: String, | ||
pub mesh_id: String, | ||
} | ||
|
||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Object, sqlx::FromRow, Validate)] | ||
pub struct Patent { | ||
pub number: String, | ||
pub country: String, | ||
pub approved: String, | ||
pub expires: String, | ||
pub pediatric_extension: String, | ||
} | ||
|
||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Object, sqlx::FromRow, Validate)] | ||
pub struct CompoundMetadata { | ||
pub compound_type: String, | ||
pub created: String, | ||
pub updated: String, | ||
pub drugbank_id: Vec<String>, | ||
pub name: String, | ||
pub description: String, | ||
pub cas_number: String, | ||
pub unii: String, | ||
pub compound_state: String, | ||
pub groups: Vec<String>, | ||
// pub general_references: GeneralReference, | ||
pub synthesis_reference: String, | ||
pub indication: String, | ||
pub pharmacodynamics: String, | ||
pub mechanism_of_action: String, | ||
pub toxicity: String, | ||
pub metabolism: String, | ||
pub absorption: String, | ||
pub half_life: String, | ||
pub protein_binding: String, | ||
pub route_of_elimination: String, | ||
pub volume_of_distribution: String, | ||
pub clearance: String, | ||
// pub classification: Classification, | ||
// pub salts: Vec<String>, | ||
pub synonyms: Vec<String>, | ||
// pub products: Vec<Product>, | ||
// pub international_brands: Vec<InternationalBrand>, | ||
// pub mixtures: Vec<Mixture>, | ||
// pub packagers: Vec<Packager>, | ||
// pub manufacturers: Vec<Manufacturer>, | ||
// pub prices: Vec<Price>, | ||
pub categories: Vec<Category>, | ||
// pub affected_organisms: Vec<String>, | ||
// pub dosages: Vec<Dosage>, | ||
// pub atc_codes: Vec<AtcCode>, | ||
// pub ahfs_codes: Vec<AhfsCode>, | ||
// pub pdb_entries: Vec<PdbEntry>, | ||
pub patents: Vec<Patent>, | ||
// pub food_interactions: Vec<String>, | ||
// pub drug_interactions: Vec<DrugInteraction>, | ||
// pub sequences: Vec<Sequence>, | ||
// pub experimental_properties: ExperimentalProperty, | ||
// pub external_identifiers: Vec<ExternalIdentifier>, | ||
// pub external_links: Vec<ExternalLink>, | ||
// pub pathways: Vec<Pathway>, | ||
// pub reactions: Vec<Reaction>, | ||
// pub snp_effects: Vec<SnpEffect>, | ||
// pub snp_adverse_drug_reactions: Vec<SnpAdverseDrugReaction>, | ||
// pub targets: Vec<Target>, | ||
// pub enzymes: Vec<Enzyme>, | ||
// pub carriers: Vec<Carrier>, | ||
// pub transporters: Vec<Transporter>, | ||
} | ||
|
||
impl CompoundMetadata { | ||
pub async fn sync2db(pool: &sqlx::PgPool, filepath: &PathBuf) -> Result<(), Box<dyn Error>> { | ||
match sqlx::query("DROP TABLE IF EXISTS staging") | ||
.execute(pool) | ||
.await | ||
{ | ||
Ok(_) => debug!("Drop table staging successfully."), | ||
Err(e) => debug!("Drop table staging failed: {:?}", e), | ||
} | ||
|
||
let mut tx = pool.begin().await?; | ||
sqlx::query( | ||
"CREATE TEMPORARY TABLE staging (LIKE biomedgps_compound_metadata INCLUDING DEFAULTS)", | ||
) | ||
.execute(&mut tx) | ||
.await?; | ||
|
||
let columns = Self::fields().join(", "); | ||
let query_str = format!( | ||
"COPY staging ({}) FROM {} WITH (FORMAT JSON)", | ||
columns, | ||
filepath.display() | ||
); | ||
|
||
debug!("Start to copy data to the staging table."); | ||
sqlx::query(&query_str).execute(&mut tx).await?; | ||
|
||
let where_clause = Self::unique_fields() | ||
.iter() | ||
.map(|c| format!("biomedgps_compound_metadata.{} = staging.{}", c, c)) | ||
.collect::<Vec<String>>() | ||
.join(" AND "); | ||
|
||
sqlx::query(&format!( | ||
"INSERT INTO biomedgps_compound_metadata ({}) | ||
SELECT {} FROM staging | ||
WHERE NOT EXISTS (SELECT 1 FROM biomedgps_compound_metadata WHERE {}) | ||
ON CONFLICT DO NOTHING", | ||
columns, columns, where_clause | ||
)) | ||
.execute(&mut tx) | ||
.await?; | ||
|
||
tx.commit().await?; | ||
|
||
match sqlx::query("DROP TABLE IF EXISTS staging") | ||
.execute(pool) | ||
.await | ||
{ | ||
Ok(_) => {} | ||
Err(_) => {} | ||
}; | ||
|
||
Ok(()) | ||
} | ||
} | ||
|
||
pub trait CheckMetadata { | ||
fn check_json_is_valid(filepath: &PathBuf) -> Vec<Box<ValidationErrors>>; | ||
|
||
// Implement the check function | ||
fn check_json_is_valid_default< | ||
S: for<'de> serde::Deserialize<'de> + Validate + std::fmt::Debug, | ||
>( | ||
filepath: &PathBuf, | ||
) -> Vec<Box<ValidationErrors>> { | ||
let file = std::fs::File::open(filepath).unwrap(); | ||
let reader = std::io::BufReader::new(file); | ||
let data: Vec<S> = serde_json::from_reader(reader).unwrap(); | ||
let mut errors: Vec<Box<ValidationErrors>> = Vec::new(); | ||
for d in data.iter() { | ||
match d.validate() { | ||
Ok(_) => {} | ||
Err(e) => { | ||
errors.push(Box::new(e)); | ||
} | ||
} | ||
} | ||
errors | ||
} | ||
|
||
fn fields() -> Vec<String>; | ||
|
||
fn unique_fields() -> Vec<String>; | ||
|
||
fn get_error_msg<S: for<'de> serde::Deserialize<'de> + Validate + std::fmt::Debug>( | ||
r: Result<Vec<S>, Box<ValidationErrors>>, | ||
) -> String { | ||
match r { | ||
Ok(_) => "".to_string(), | ||
Err(e) => { | ||
return e.to_string(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl CheckMetadata for CompoundMetadata { | ||
fn check_json_is_valid(filepath: &PathBuf) -> Vec<Box<ValidationErrors>> { | ||
Self::check_json_is_valid_default::<CompoundMetadata>(filepath) | ||
} | ||
|
||
fn unique_fields() -> Vec<String> { | ||
vec!["name".to_string()] | ||
} | ||
|
||
fn fields() -> Vec<String> { | ||
vec![ | ||
"compound_type".to_string(), | ||
"created".to_string(), | ||
"updated".to_string(), | ||
"drugbank_id".to_string(), | ||
"name".to_string(), | ||
"description".to_string(), | ||
"cas_number".to_string(), | ||
"unii".to_string(), | ||
"compound_state".to_string(), | ||
"groups".to_string(), | ||
"synthesis_reference".to_string(), | ||
"indication".to_string(), | ||
"pharmacodynamics".to_string(), | ||
"mechanism_of_action".to_string(), | ||
"toxicity".to_string(), | ||
"metabolism".to_string(), | ||
"absorption".to_string(), | ||
"half_life".to_string(), | ||
"protein_binding".to_string(), | ||
"route_of_elimination".to_string(), | ||
"volume_of_distribution".to_string(), | ||
"clearance".to_string(), | ||
"categories".to_string(), | ||
"patents".to_string(), | ||
"synonyms".to_string(), | ||
] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,4 @@ pub mod graph; | |
pub mod llm; | ||
pub mod kge; | ||
pub mod init_db; | ||
pub mod metadata; |