In [None]:
import glob
from pathlib import Path
import pandas as pd

In [None]:
# Prepare Downloaded TSV Files (Convert to CSV)
path = "data/**/**/*.tsv"
for fname in glob.glob(path):
    csv_table = pd.read_table(fname, sep='\t')
    file_name = Path(fname).stem
    file_path = Path(fname).resolve().parent
    csv_table.to_csv(f'{file_path}/{file_name}.csv', index=False)

In [None]:
# Prepare Data Mapping (Map ID to Label)

ad_genes = "APP,HFE,MPO,NOS3,PLAU,ABCA7,PSEN2,PSEN1,APOE"
schizo_genes = "APOL2,APOL4,CHI3L1,COMT,DAOA,DISC2,DRD3,HTR2A,MTHFR,RTN4R,SYN2,SHANK3,DISC1,RBM12,NRXN1,SLC1A1,PRODH,NRG1"

ad_dict = {
    # Alzheimer Disease
    "APP": "9606.ENSP00000284981",
    "HFE": "9606.ENSP00000417404",
    "MPO": "9606.ENSP00000225275",
    "NOS3": "9606.ENSP00000297494",
    "PLAU": "9606.ENSP00000361850",
}

ad_related_dict = {
    # AD Related
    "ABCA7": "9606.ENSP00000263094",
    "PSEN1": "9606.ENSP00000326366",
    "PSEN2": "9606.ENSP00000355747",
    "APOE": "9606.ENSP00000252486",
}

ad_full = ad_dict | ad_related_dict

schiz_dict = {
    # Schizophrenia
    "APOL2": "9606.ENSP00000249066",
    "APOL4": "9606.ENSP00000338260",
    "CHI3L1": "9606.ENSP00000255409",
    "COMT": "9606.ENSP00000354511",
    "DAOA": "9606.ENSP00000483757",
    "DISC1": "9606.ENSP00000355593",
    "DRD3": "9606.ENSP00000373169",
    "HTR2A": "9606.ENSP00000437737",
    "MTHFR": "9606.ENSP00000365777",
    "RTN4R": "9606.ENSP00000043402",
    "SYN2": "9606.ENSP00000480050",
}

schiz_related_dict = {
    # Schizophrenia Related
    "RBM12": "9606.ENSP00000363228",
    "NRXN1": "9606.ENSP00000385142",
    "SLC1A1": "9606.ENSP00000262352",
    "PRODH": "9606.ENSP00000481127",
    "NRG1": "9606.ENSP00000384620",
}

schiz_full = schiz_dict | schiz_related_dict

In [None]:
# Prepare Data (Create Label Columns)
def get_key(val):
    val = all_nodes.loc[val]["preferred_name"]
    if val:
        return val

    return ""


all_nodes = pd.read_csv('data/all_protein_info.csv', sep=",", index_col="string_protein_id")

df_ad = pd.read_csv('data/ad/ad_network_full.csv', sep=",")
df_schiz = pd.read_csv('data/schiz/schiz_network_full.csv', sep=",")

df_ad["Source"] = df_ad.apply(lambda row: get_key(row["SourceId"]), axis=1)
df_ad["Target"] = df_ad.apply(lambda row: get_key(row["TargetId"]), axis=1)

df_ad.to_csv('data/ad/ad_network_full_with_labels.csv', index=False)

df_schiz["Source"] = df_schiz.apply(lambda row: get_key(row["SourceId"]), axis=1)
df_schiz["Target"] = df_schiz.apply(lambda row: get_key(row["TargetId"]), axis=1)

df_schiz.to_csv('data/schiz/schiz_network_full_with_labels.csv', index=False)