# Analysis of SI Table 6 for genes overexpressed in cancer

**Genes overexpressed in different human solid cancers exhibit different tissue-specific expression profiles**

In [2]:
%load_ext blackcellmagic
import numpy as np
from glob import glob as glob
from io import StringIO
import os as os

import pandas as pd
import urllib
import json

To make things easier, I'm going to split Table 6 into separate tables for each type of cancer and then save as a `csv`.

In [14]:
file_list = glob("*.csv")

In [15]:
file_list = [i for i in file_list if "ligands" not in i]

In [16]:
file_list

['melanoma.csv',
 'endometrial.csv',
 'lung.csv',
 'kidney.csv',
 'colon.csv',
 'ovarian.csv',
 'astrocytoma.csv',
 'liver.csv',
 'thyroid.csv',
 'breast.csv',
 'glioblastoma.csv',
 'prostate.csv']

Now, define the helper functions.

In [3]:
def get_uniprot(gene):
    """
    Look up gene name in Uniprot.
    """
    print(f"Looking up {gene} in Uniprot database...")
    url = f"https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+gene_exact:{gene}&format=tab"
    request = urllib.request.Request(url)
    try:
        response = urllib.request.urlopen(request)
    except urllib.error.HTTPError as e:
        # print(f"{gene:10}: HTTP error. Skipping...")
        return None
    page = response.read()
    if page == b"":
        # print(f"{gene:10}: Response error. Skipping...")
        return None
    else:
        return pd.read_csv(StringIO(page.decode("utf-8")), sep="\t")


In [4]:
def get_bindingdb(uniprot):
    """
    Look up uniprot in Binding DB.
    """
    # print(f"Looking up {uniprot} in BindingDB...")
    url = f"http://www.bindingdb.org/axis2/services/BDBService/getLigandsByUniprots?uniprot={uniprot}&cutoff=10&code=0&response=application/json"
    request = urllib.request.Request(url)
    try:
        response = urllib.request.urlopen(request)
    except urllib.error.HTTPError as e:
        print(f"BindingDB error...")
        return None

    page = response.read()
    binding_db = json.loads(page.decode("utf-8"))
    if binding_db["getLigandsByUniprotsResponse"] == "":
        return None
    if binding_db["getLigandsByUniprotsResponse"]["hit"] == "0":
        return None
    return pd.DataFrame(binding_db["getLigandsByUniprotsResponse"]["affinities"])


In [5]:
def bindingdb_to_table(table):
    """
    Parse the Binding DB `json` response.
    """

    query = table["query"].values[0]
    affinities = table["affinity"].values
    affinity_types = table["affinity_type"].values
    smiles = table["smile"].values
    monomers = table["monomerid"].values
    return pd.DataFrame(
        {
            "Affinities": affinities,
            "Affinity type": affinity_types,
            "SMILES": smiles,
            "MonomerID": monomers,
            "Query": query,
        }
    )


In [6]:
def manual_pivot(table):
    """
    The data we get from Binding DB will be in rows.
    But we want each row to basically be a new column, so that gene names are the rows.
    """
    # I had problems working with the built-in `pivot` because the data is non-numeric
    # tmp3 = tmp2.pivot_table(index="Query",
    #                        columns=tmp2.index,
    #                        values=["Affinities"],
    #                        aggfunc=lambda x: ' '.join(x))

    df = pd.DataFrame()
    for row in range(len(table)):
        df[f"Affinity {row:03d}"] = pd.Series(table["Affinities"].values[row])
        df[f"Type {row:03d}"] = pd.Series(table["Affinity type"].values[row])
        df[f"SMILES {row:03d}"] = pd.Series(table["SMILES"].values[row])
        df[f"ID {row:03d}"] = pd.Series(table["MonomerID"].values[row])
    return df


In [7]:
def filter_top_three_ligands(table):
    """
    Extract the top three ligands with the tightest binding.
    `errors="coerce"` will ignore entries like >10 or <10.
    """
    table["Affinities"] = pd.to_numeric(table["Affinities"], errors="coerce")
    return table.nsmallest(3, "Affinities")

Loop over the files, then loop over the genes.

Tiqing says to go in steps of 100 queries.

First, I map from gene to protein and Uniprot ID. That is stored in the `gene_df` DataFrame.
Then I use the Uniprot ID to look up data from Binding DB.

Then I join `gene_df` and the Binding DB data.

In [26]:
for file in file_list:

    if os.path.isfile(os.path.splitext(file)[0] + "-ligands.csv"):
        print(f"Found {os.path.splitext(file)[0] + '-ligands.csv'}, skipping...")
        continue

    table = pd.read_csv(file, skiprows=1, names=["Unknown", "Gene", "Overexpression"])
    print(f"Loaded {len(table)} genes from {file}...")
    table = table.drop_duplicates(subset="Gene", keep="first")
    table = table[table["Gene"] != "---"]
    print(f"Found {len(table)} unique genes...")

    # Workaround BindingDB issues.
    full_multiples = int(np.floor(len(table) / 100))
    chunks = [[i * 100, i * 100 + 100] for i in range(full_multiples)] + [
        [full_multiples * 100, len(table)]
    ]
    df = pd.DataFrame()

    for chunk in chunks:
        for gene, overexpression in zip(
            table["Gene"].values[chunk[0] : chunk[1]],
            table["Overexpression"].values[chunk[0] : chunk[1]],
        ):

            import time

            start = time.time()

            if "///" in gene:
                gene = gene.split("///")[0]

            gene_table = get_uniprot(gene)
            if gene_table is None:
                print(f"{gene:10} → Unknown. Skipping...")
                continue
            uniprot = gene_table["Entry"].values[0]
            protein = gene_table["Protein names"].values[0]

            gene_df = pd.DataFrame()
            gene_df["Gene"] = pd.Series(gene)
            gene_df["Uniprot"] = pd.Series(uniprot)
            gene_df["Protein"] = pd.Series(protein)
            gene_df["Overexpression"] = pd.Series(overexpression)

            binding_db = get_bindingdb(uniprot)

            if binding_db is not None:
                print(
                    f"{gene:10} → {uniprot:10} → {len(binding_db):4} ligands found..."
                )
                binding_table = bindingdb_to_table(binding_db)
                trimmed_table = filter_top_three_ligands(binding_table)
                binding_pivot = manual_pivot(trimmed_table)

                gene_df = gene_df.join(binding_pivot)
                # Only track the proteins with entries in BindingDB
                df = df.append(gene_df, ignore_index=True)
            else:
                print(f"{gene:10} → {uniprot:10} → {'0':>4} ligands found...")
                pass

        # At the end of each chunk, do a sanity check to make sure BindingDB is still working.
        test = get_bindingdb("Q14680")
        if test.empty:
            print("Problem with BindingDB.")
            break
        time.sleep(60)

    if df.empty:
        continue

    column_list = df.columns.tolist()
    column_end = [i for i in column_list if "Affinity" in i]
    column_max = int(column_end[-1].split(" ")[1])
    column_order = ["Gene", "Uniprot", "Protein", "Overexpression"]
    for i in range(column_max + 1):
        column_order.append(f"Affinity {i:03d}")
        column_order.append(f"Type {i:03d}")
        column_order.append(f"SMILES {i:03d}")
        column_order.append(f"ID {i:03d}")
    df = df[column_order]
    df.to_csv(os.path.splitext(file)[0] + "-ligands.csv")


Found melanoma-ligands.csv, skipping...
Found endometrial-ligands.csv, skipping...
Found lung-ligands.csv, skipping...
Found kidney-ligands.csv, skipping...
Found colon-ligands.csv, skipping...
Found ovarian-ligands.csv, skipping...
Loaded 575 genes from astrocytoma.csv...
Found 495 unique genes...
LTF        → P02788     →    0 ligands found...
FABP7      → O15540     →    0 ligands found...
SOX11      → P35716     →    0 ligands found...
TMSL8      → P0CG34     →    0 ligands found...
AQP4       → P55087     →    0 ligands found...
TOP2A      → P11388     →    0 ligands found...
COL11A1    → P12107     →    0 ligands found...
CHI3L1     → P36222     →    0 ligands found...
PDPN       → Q86YL7     →    0 ligands found...
IL13RA2    → Q14627     →    0 ligands found...
PTX3       → P26022     →    0 ligands found...
CDC2       → P06493     →   86 ligands found...
IL1RAP     → Q9NPH3     →    0 ligands found...
LOX        → P28300     →    0 ligands found...
ASCL1      → P50553     →   

In [29]:
rows_with_multiple_genes = table[table["Gene"].str.contains("///")]
df = pd.DataFrame()
for gene in rows_with_multiple_genes:
    genes_list = rows_with_multiple_genes["Gene"].str.split("///")
    overexpression = rows_with_multiple_genes["Overexpression"][gene]
    unknown = rows_with_multiple_genes["Unknown"][gene]
    for indvidual_gene in genes_list:
        df = df.append({"Gene": individual_gene,
                       "Overexpression": overexpression,
                       "Unknown": unknown})


KeyError: 'Unknown'

In [27]:
duplicate_genes

8                                     [AKR1C1 ,  AKR1C2]
31     [UGT1A10 ,  UGT1A8 ,  UGT1A7 ,  UGT1A6 ,  UGT1...
90                [IGHA1 ,  IGHG1 ,  IGHG3 ,  LOC390714]
93     [IGH@ ,  IGHA1 ,  IGHA2 ,  IGHD ,  IGHG1 ,  IG...
106                                  [MAGEA2 ,  MAGEA2B]
123                                   [CKMT1B ,  CKMT1A]
240                              [IGH@ ,  IGHD ,  IGHG1]
262             [IGHG1 ,  IGHG3 ,  IGHV1-69 ,  MGC27165]
287        [IGHA1 ,  IGHD ,  IGHG1 ,  IGHM ,  LOC390714]
293                               [ARHGAP8 ,  LOC553158]
318                                      [CHKB ,  CPT1B]
345                                  [IGHG1 ,  MGC27165]
367       [HMGB3 ,  LOC392314 ,  LOC442289 ,  LOC442661]
386                                    [TRBV19 ,  TRBC1]
404    [IGL@ ,  IGLC1 ,  IGLC2 ,  IGLV3-25 ,  IGLV2-1...
414                            [ADH1A ,  ADH1B ,  ADH1C]
Name: Gene, dtype: object

In [11]:
test = get_bindingdb("Q14680")

# Pancreatic Ductal Adenocarcinoma
Highly Expressed Genes in Pancreatic Ductal Adenocarcinomas: A Comprehensive
Characterization and Comparison of the Transcription Profiles Obtained from
Three Major Technologies

In [18]:
file = "pancreatic-ductal-adenocarcinoma.csv"

if os.path.isfile(os.path.splitext(file)[0] + "-ligands.csv"):
    print(f"Found {os.path.splitext(file)[0] + '-ligands.csv'}, skipping...")
    

table = pd.read_csv(file, skiprows=1, names=["Gene", "Overexpression"])
print(f"Loaded {len(table)} genes from {file}...")
table = table.drop_duplicates(subset="Gene", keep="first")
table = table[table["Gene"] != "---"]
print(f"Found {len(table)} unique genes...")

# Workaround BindingDB issues.
full_multiples = int(np.floor(len(table) / 100))
chunks = [[i * 100, i * 100 + 100] for i in range(full_multiples)] + [
    [full_multiples * 100, len(table)]
]
df = pd.DataFrame()

for chunk in chunks:
    for gene, overexpression in zip(
        table["Gene"].values[chunk[0] : chunk[1]],
        table["Overexpression"].values[chunk[0] : chunk[1]],
    ):

        import time

        start = time.time()

        if "///" in gene:
            gene = gene.split("///")[0]

        gene_table = get_uniprot(gene)
        if gene_table is None:
            print(f"{gene:10} → Unknown. Skipping...")
            continue
        uniprot = gene_table["Entry"].values[0]
        protein = gene_table["Protein names"].values[0]

        gene_df = pd.DataFrame()
        gene_df["Gene"] = pd.Series(gene)
        gene_df["Uniprot"] = pd.Series(uniprot)
        gene_df["Protein"] = pd.Series(protein)
        gene_df["Overexpression"] = pd.Series(overexpression)

        binding_db = get_bindingdb(uniprot)

        if binding_db is not None:
            print(f"{gene:10} → {uniprot:10} → {len(binding_db):4} ligands found...")
            binding_table = bindingdb_to_table(binding_db)
            trimmed_table = filter_top_three_ligands(binding_table)
            binding_pivot = manual_pivot(trimmed_table)

            gene_df = gene_df.join(binding_pivot)
            # Only track the proteins with entries in BindingDB
            df = df.append(gene_df, ignore_index=True)
        else:
            print(f"{gene:10} → {uniprot:10} → {'0':>4} ligands found...")
            pass

    # At the end of each chunk, do a sanity check to make sure BindingDB is still working.
    test = get_bindingdb("Q14680")
    if test.empty:
        print("Problem with BindingDB.")
        break
    time.sleep(60)

if df.empty:
    continue

column_list = df.columns.tolist()
column_end = [i for i in column_list if "Affinity" in i]
column_max = int(column_end[-1].split(" ")[1])
column_order = ["Gene", "Uniprot", "Protein", "Overexpression"]
for i in range(column_max + 1):
    column_order.append(f"Affinity {i:03d}")
    column_order.append(f"Type {i:03d}")
    column_order.append(f"SMILES {i:03d}")
    column_order.append(f"ID {i:03d}")
df = df[column_order]
df.to_csv(os.path.splitext(file)[0] + "-ligands.csv")


Loaded 123 genes from pancreatic-ductal-adenocarcinoma.csv...
Found 119 unique genes...
Looking up Neuromedin U in Uniprot database...
Neuromedin U → Unknown. Skipping...
Looking up small inducible cytokine subfamily B (Cys-X-Cys), member 5 (epithelial-derived neutrophil-activating peptide 78) in Uniprot database...
small inducible cytokine subfamily B (Cys-X-Cys), member 5 (epithelial-derived neutrophil-activating peptide 78) → Unknown. Skipping...
Looking up melanoma inhibitory activity in Uniprot database...
melanoma inhibitory activity → Unknown. Skipping...
Looking up kallikrein 10 in Uniprot database...
kallikrein 10 → Unknown. Skipping...
Looking up sciellin in Uniprot database...
sciellin   → Unknown. Skipping...
Looking up solute carrier family 21 (organic anion transporter), member 11 in Uniprot database...
solute carrier family 21 (organic anion transporter), member 11 → Unknown. Skipping...
Looking up FOS-like antigen 1 in Uniprot database...
FOS-like antigen 1 → Unknown. S

UnicodeEncodeError: 'ascii' codec can't encode character '\u03b2' in position 88: ordinal not in range(128)

In [35]:
def get_uniprot_from_protein(protein):
    """
    Look up protein name in Uniprot.
    """
    protein = protein.replace(" ", "+")
    print(f"Looking up {protein} in Uniprot database...")
    url = f"https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+name:{protein}&format=tab"
    print(url)
    request = urllib.request.Request(url)
    try:
        response = urllib.request.urlopen(request)
    except urllib.error.HTTPError as e:
        # print(f"{gene:10}: HTTP error. Skipping...")
        return None
    page = response.read()
    if page == b"":
        # print(f"{gene:10}: Response error. Skipping...")
        return None
    else:
        return pd.read_csv(StringIO(page.decode("utf-8")), sep="\t")


In [36]:
get_uniprot_from_protein("small inducible cytokine subfamily B (Cys-X-Cys), member 5 (epithelial-derived neutrophil-activating peptide 78)")

Looking up small+inducible+cytokine+subfamily+B+(Cys-X-Cys),+member+5+(epithelial-derived+neutrophil-activating+peptide+78) in Uniprot database...
https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+name:small+inducible+cytokine+subfamily+B+(Cys-X-Cys),+member+5+(epithelial-derived+neutrophil-activating+peptide+78)&format=tab


In [38]:
table = get_uniprot_from_protein("Neuromedin U")

Looking up Neuromedin+U in Uniprot database...
https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+name:Neuromedin+U&format=tab


I don't think it's safe to assume the first result is going to be the one that we want, so we're going to need a way to sorted the table.

# Try 2: Pancreatic Ductal Adenocarcinoma


In [58]:
def get_protein_from_uniprot(uniprot):
    """
    Look up protein name in Uniprot.
    """
    print(f"Looking up {uniprot} in Uniprot database...")
    url = f"https://www.uniprot.org/uniprot/?query=reviewed:yes+AND+organism:9606+AND+id:{uniprot}&format=tab"
    request = urllib.request.Request(url)
    try:
        response = urllib.request.urlopen(request)
    except urllib.error.HTTPError as e:
        # print(f"{gene:10}: HTTP error. Skipping...")
        return None
    page = response.read()
    if page == b"":
        # print(f"{gene:10}: Response error. Skipping...")
        return None
    else:
        return pd.read_csv(StringIO(page.decode("utf-8")), sep="\t")


In [45]:
file = "overexpressed_birnbaum.csv"

if os.path.isfile(os.path.splitext(file)[0] + "-ligands.csv"):
    print(f"Found {os.path.splitext(file)[0] + '-ligands.csv'}, skipping...")


table = pd.read_csv(file, skiprows=0, names=["Uniprot"])
print(f"Loaded {len(table)} Uniprots from {file}...")
table = table.drop_duplicates(subset="Uniprot", keep="first")
table = table[table["Uniprot"] != "---"]
print(f"Found {len(table)} unique Uniprots...")

# Workaround BindingDB issues.
full_multiples = int(np.floor(len(table) / 100))
chunks = [[i * 100, i * 100 + 100] for i in range(full_multiples)] + [
    [full_multiples * 100, len(table)]
]
df = pd.DataFrame()

for chunk in chunks:
    for uniprot in table["Uniprot"].values[chunk[0] : chunk[1]]:

        import time

        start = time.time()

        protein_name = get_protein_from_uniprot(uniprot.strip().replace(u"\xa0", u" "))
        if protein_name is None:
            print(f"{uniprot:10} → Unknown. Skipping...")
            continue
        else:
            protein = protein_name["Protein names"].values[0]

        protein_df = pd.DataFrame()
        protein_df["Uniprot"] = pd.Series(uniprot)
        protein_df["Protein"] = pd.Series(protein)

        binding_db = get_bindingdb(uniprot)

        if binding_db is not None:
            print(f"{uniprot:10} → {protein:.10} {len(binding_db):4} ligands found...")
            binding_table = bindingdb_to_table(binding_db)
            trimmed_table = filter_top_three_ligands(binding_table)
            binding_pivot = manual_pivot(trimmed_table)

            protein_df = protein_df.join(binding_pivot)
            # Only track the proteins with entries in BindingDB
            df = df.append(protein_df, ignore_index=True)
        else:
            print(f"{uniprot:10} → {protein:.10} → {'0':>4} ligands found...")
            pass

    # At the end of each chunk, do a sanity check to make sure BindingDB is still working.
    test = get_bindingdb("Q14680")
    if test.empty:
        print("Problem with BindingDB.")
        break
    time.sleep(60)

    if df.empty:
        continue

column_list = df.columns.tolist()
column_end = [i for i in column_list if "Affinity" in i]
column_max = int(column_end[-1].split(" ")[1])
column_order = ["Uniprot", "Protein"]
for i in range(column_max + 1):
    column_order.append(f"Affinity {i:03d}")
    column_order.append(f"Type {i:03d}")
    column_order.append(f"SMILES {i:03d}")
    column_order.append(f"ID {i:03d}")
df = df[column_order]
df.to_csv(os.path.splitext(file)[0] + "-ligands.csv")


Loaded 20 Uniprots from overexpressed_birnbaum.csv...
Found 20 unique Uniprots...
Looking up Q969P5 in Uniprot database...
Q969P5     → F-box only →    0 ligands found...
Looking up Q16553 in Uniprot database...
Q16553     → Lymphocyte →    0 ligands found...
Looking up Q9HCE7 in Uniprot database...
Q9HCE7     → E3 ubiquit    3 ligands found...
Looking up Q14534 in Uniprot database...
Q14534     → Squalene m →    0 ligands found...
Looking up O00300 in Uniprot database...
O00300     → Tumor necr →    0 ligands found...
Looking up O95388 in Uniprot database...
O95388     → WNT1-induc →    0 ligands found...
Looking up Q86X45 in Uniprot database...
Q86X45     → Protein ti →    0 ligands found...
Looking up Q06945 in Uniprot database...
Q06945     → Transcript →    0 ligands found...
Looking up Q07617 in Uniprot database...
Q07617     → Sperm-asso →    0 ligands found...
Looking up Q5K651 in Uniprot database...
Q5K651     → Sterile al →    0 ligands found...
Looking up Q6PL18 in Uniprot d

In [51]:
def quick_run(uniprot):
    protein_name = get_protein_from_uniprot(uniprot.strip().replace("\xa0", " "))
    if protein_name is None:
        print(f"{uniprot:10} → Unknown. Skipping...")
    else:
        protein = protein_name["Protein names"].values[0]

    protein_df = pd.DataFrame()
    protein_df["Uniprot"] = pd.Series(uniprot)
    protein_df["Protein"] = pd.Series(protein)

    binding_db = get_bindingdb(uniprot)

    if binding_db is not None:
        print(f"{uniprot:10} → {protein:.20}... → {len(binding_db):4} ligands found...")
        binding_table = bindingdb_to_table(binding_db)
        trimmed_table = filter_top_three_ligands(binding_table)
        binding_pivot = manual_pivot(trimmed_table)

        protein_df = protein_df.join(binding_pivot)
        # Only track the proteins with entries in BindingDB
        df = df.append(protein_df, ignore_index=True)
        return df
    else:
        print(f"{uniprot:10} → {protein:.20}... → {'0':>4} ligands found...")
        pass


In [52]:
quick_run("Q13421")

Looking up Q13421 in Uniprot database...
Q13421     → Mesothelin (CAK1 ant... →    0 ligands found...


In [59]:
file = "pancreatic_data_2.csv"

if os.path.isfile(os.path.splitext(file)[0] + "-ligands.csv"):
    print(f"Found {os.path.splitext(file)[0] + '-ligands.csv'}, skipping...")


table = pd.read_csv(file, skiprows=0, names=["Uniprot"])
print(f"Loaded {len(table)} Uniprots from {file}...")
table = table.drop_duplicates(subset="Uniprot", keep="first")
table = table[table["Uniprot"] != "---"]
print(f"Found {len(table)} unique Uniprots...")

# Workaround BindingDB issues.
full_multiples = int(np.floor(len(table) / 100))
chunks = [[i * 100, i * 100 + 100] for i in range(full_multiples)] + [
    [full_multiples * 100, len(table)]
]
df = pd.DataFrame()

for chunk in chunks:
    for uniprot in table["Uniprot"].values[chunk[0] : chunk[1]]:

        import time

        start = time.time()

        protein_name = get_protein_from_uniprot(uniprot.strip().replace(u"\xa0", u" "))
        if protein_name is None:
            print(f"{uniprot:10} → Unknown. Skipping...")
            continue
        else:
            protein = protein_name["Protein names"].values[0]

        protein_df = pd.DataFrame()
        protein_df["Uniprot"] = pd.Series(uniprot)
        protein_df["Protein"] = pd.Series(protein)

        binding_db = get_bindingdb(uniprot.strip().replace(u"\xa0", u" "))

        if binding_db is not None:
            print(f"{uniprot:10} → {protein:.10} {len(binding_db):4} ligands found...")
            binding_table = bindingdb_to_table(binding_db)
            trimmed_table = filter_top_three_ligands(binding_table)
            binding_pivot = manual_pivot(trimmed_table)

            protein_df = protein_df.join(binding_pivot)
            # Only track the proteins with entries in BindingDB
            df = df.append(protein_df, ignore_index=True)
        else:
            print(f"{uniprot:10} → {protein:.10} → {'0':>4} ligands found...")
            pass

    # At the end of each chunk, do a sanity check to make sure BindingDB is still working.
    test = get_bindingdb("Q14680")
    if test.empty:
        print("Problem with BindingDB.")
        break
    time.sleep(60)

    if df.empty:
        continue

column_list = df.columns.tolist()
column_end = [i for i in column_list if "Affinity" in i]
column_max = int(column_end[-1].split(" ")[1])
column_order = ["Uniprot", "Protein"]
for i in range(column_max + 1):
    column_order.append(f"Affinity {i:03d}")
    column_order.append(f"Type {i:03d}")
    column_order.append(f"SMILES {i:03d}")
    column_order.append(f"ID {i:03d}")
df = df[column_order]
df.to_csv(os.path.splitext(file)[0] + "-ligands.csv")


Loaded 21 Uniprots from pancreatic_data_2.csv...
Found 20 unique Uniprots...
Looking up P04626 in Uniprot database...
P04626     → Receptor t  192 ligands found...
Looking up P31947 in Uniprot database...
P31947     → 14-3-3 pro →    0 ligands found...
Looking up O43653 in Uniprot database...
O43653     → Prostate s →    0 ligands found...
Looking up O14493 in Uniprot database...
O14493     → Claudin-4  →    0 ligands found...
Looking up P80188 in Uniprot database...
P80188     → Neutrophil →    0 ligands found...
Looking up P26447 in Uniprot database...
P26447     → Protein S1 →    0 ligands found...
Looking up Q03403 in Uniprot database...
Q03403     → Trefoil fa →    0 ligands found...
Looking up Q9HBU1 in Uniprot database...
Q9HBU1     → Homeobox p →    0 ligands found...
Looking up P35222 in Uniprot database...
P35222     → Catenin be →    0 ligands found...
Looking up O95832 in Uniprot database...
O95832     → Claudin-1  →    0 ligands found...
Looking up P02751 in Uniprot databa