In [37]:
import pandas as pd


df_all = pd.concat([
    pd.read_csv(".data/TA_KG_subject_nodes.csv"),
    pd.read_csv(".data/TA_KG_source_nodes.csv"),
    pd.read_csv(".data/TA_KG_method_nodes.csv"),
    pd.read_csv(".data/TA_KG_compounds_nodes.csv")
], ignore_index=True)

len(df_all)


546040

In [40]:
def get_graph_data(T_id, rows):
    # rows.columns = ["T_id", "type", "data"]
    nodes_data_template = {
        "id": "0",
        "name": T_id,
        "symbolSize": 100,
        "x": 0,
        "y": 0,
        "value": 0,
        "category": 0
    }
    nodes = []  # {"source":"1", "target":"2"}
    links = []
    categories_data = {
        "T_id": 0,
        "subject": 1,
        "method": 2,
        "compound": 3,
        "source": 4
    }
    nodes.append(nodes_data_template)
    for idx, (_, row) in enumerate(rows.iterrows(), start=1):
        _nodes_data = nodes_data_template.copy()
        _nodes_data["id"] = str(idx)
        _nodes_data["symbolSize"] = 40
        _nodes_data["name"] = row["target"]
        _nodes_data["category"] = categories_data[row["type"]]
        _nodes_data["value"] = row["target"]
        nodes.append(_nodes_data)
        links.append({
            "source": "0",
            "target": str(idx)
        })
    return {
        "nodes": nodes,
        "links": links,
        "categories": [{"name": a} for a in categories_data.keys()]
    }

def parse_rows(rows):
    rows.loc[rows["subject"].notna(), "target"] = rows["subject"]
    rows.loc[rows["species"].notna(), "target"] = rows["species"]
    rows.loc[rows["method"].notna(), "target"] = rows["method"]
    rows.loc[rows["compound"].notna(), "target"] = rows["compound"]
    return rows

T_id = "TA000000"
rows = parse_rows(df_all[df_all["T_id"]==T_id])
get_graph_data("TA000000",rows)
# rows


{'nodes': [{'id': '0',
   'name': 'TA000000',
   'symbolSize': 100,
   'x': 0,
   'y': 0,
   'value': 0,
   'category': 0},
  {'id': '1',
   'name': 'Biotechnology',
   'symbolSize': 40,
   'x': 0,
   'y': 0,
   'value': 'Biotechnology',
   'category': 1},
  {'id': '2',
   'name': 'Pharmaceutical Sciences',
   'symbolSize': 40,
   'x': 0,
   'y': 0,
   'value': 'Pharmaceutical Sciences',
   'category': 1},
  {'id': '3',
   'name': 'Agricultural Sciences',
   'symbolSize': 40,
   'x': 0,
   'y': 0,
   'value': 'Agricultural Sciences',
   'category': 1},
  {'id': '4',
   'name': 'Chemistry',
   'symbolSize': 40,
   'x': 0,
   'y': 0,
   'value': 'Chemistry',
   'category': 1},
  {'id': '5',
   'name': 'Microbiology',
   'symbolSize': 40,
   'x': 0,
   'y': 0,
   'value': 'Microbiology',
   'category': 1},
  {'id': '6',
   'name': 'Origanum vulgare',
   'symbolSize': 40,
   'x': 0,
   'y': 0,
   'value': 'Origanum vulgare',
   'category': 4},
  {'id': '7',
   'name': 'Syzygium aromaticum'

In [None]:
df_kg = pd.concat([
    pd.read_csv(".data/TA_KG_subject_nodes.csv"),
    pd.read_csv(".data/TA_KG_source_nodes.csv"),
    pd.read_csv(".data/TA_KG_method_nodes.csv"),
    pd.read_csv(".data/TA_KG_compounds_nodes.csv")
], ignore_index=True)
df_info = pd.read_csv(".data/available_TA_all_filtered.csv")



In [36]:
def paper_info(T_id):
    """
    Get paper information for a given T_id.
    """
    rows = df_backend[df_backend["T_id"] == T_id]
    if rows.empty:
        return {}
    row = rows.iloc[0]
    graph_data = get_graph_data("TA000000",parse_rows(df_all[df_all["T_id"]==T_id]))
    return {
        "T_id": row["T_id"],
        "doi": row["doi"],
        "title": row["title"],
        "authors": row["authors"],
        "year": row["year"],
        "publisher": row["publisher"],
        "abstract": row["abstract"],
        "I_summary": row["I_summary"],
        "M_summary": row["M_summary"],
        "R_summary": row["R_summary"],
        "D_summary": row["D_summary"],
        "C_summary": row["C_summary"],
        "graph_data": graph_data
    }

paper_info("TA000000")

{'T_id': 'TA000000',
 'doi': '10.3390/foods10061207',
 'title': 'Yeast Particle Encapsulation of Scaffolded Terpene Compounds for ControlledTerpene Release.',
 'authors': 'Soto, Ernesto R, Rus, Florentina, Li, Hanchen, Garceau, Carli, Chicca, Jeffrey, Elfawal, Mostafa, Gazzola, David, Nielsen, Martin K, Urban, Joseph F Jr, Aroian, Raffi V, Ostroff, Gary R',
 'year': np.int64(2021),
 'publisher': 'Foods (Basel, Switzerland)',
 'abstract': 'Terpenes are naturally occurring compounds produced by plants that are of greatcommercial interest in the food, agricultural, cosmetic, and pharmaceuticalindustries due to their broad spectra of antibacterial, antifungal, anthelmintic,membrane permeation enhancement, and antioxidant biological activities.Applications of terpenes are often limited by their volatility and the need forsurfactants or alcohols to produce stable, soluble (non-precipitated) products.Yeast particles (YPs) are hollow, porous microspheres that have been used for theencapsulatio