# Akuisisi senyawa aktif

In [4]:
import pubchempy as pcp
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
import xmltodict
import os
import ssl
import certifi
import urllib3

ssl._create_default_https_context = ssl._create_unverified_context

#Matikan warning SSL
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

#Verifikasi PubChem

In [23]:
import requests
import pandas as pd
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

def get_cids(compound_name):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{compound_name}/cids/TXT"
    r = requests.get(url, timeout=20, verify=False)
    if r.status_code == 200 and r.text.strip():
        return r.text.strip().split("\n")
    return []

def get_properties(cid):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,SMILES,IUPACName/JSON"
    r = requests.get(url, timeout=20, verify=False)
    if r.status_code == 200:
        data = r.json().get("PropertyTable", {}).get("Properties", [{}])[0]
        props = {}
        props["CID"] = cid
        props["Formula"] = data.get("MolecularFormula", "-")
        props["MolecularWeight"] = data.get("MolecularWeight", "-")
        props["IUPACName"] = data.get("IUPACName", "-")

        if "CanonicalSMILES" in data and data["CanonicalSMILES"]:
            props["SMILES"] = data["CanonicalSMILES"]
        elif "SMILES" in data and data["SMILES"]:
            props["SMILES"] = data["SMILES"]
        elif "IsomericSMILES" in data and data["IsomericSMILES"]:
            props["SMILES"] = data["IsomericSMILES"]
        else:
            props["SMILES"] = "-"
        return props
    return None

def search_compounds(compounds):
    results = []
    not_found = []

    for name in compounds:
        print("="*70)
        print(f"Senyawa : {name}")
        cids = get_cids(name)

        if not cids:
            print("❌ Tidak ditemukan di PubChem")
            results.append({
                "Compound": name,
                "CID": "",
                "Formula": "",
                "MolecularWeight": "",
                "IUPACName": "",
                "SMILES": "",
                "Status": "❌ Tidak ditemukan"
            })
            not_found.append(name)
            continue

        print(f"✅ Ditemukan {len(cids)} CID: {', '.join(cids)}")
        for cid in cids:
            props = get_properties(cid)
            if props:
                print(f"   CID {props['CID']}")
                print(f"      Formula     : {props['Formula']}")
                print(f"      MolWeight   : {props['MolecularWeight']}")
                print(f"      IUPAC Name  : {props['IUPACName']}")
                print(f"      SMILES      : {props['SMILES']}")
                results.append({
                    "Compound": name,
                    "CID": props["CID"],
                    "Formula": props["Formula"],
                    "MolecularWeight": props["MolecularWeight"],
                    "IUPACName": props["IUPACName"],
                    "SMILES": props["SMILES"],
                    "Status": "✅ Ditemukan"
                })
            else:
                print(f"   CID {cid} → ❌ Data tidak tersedia")
                results.append({
                    "Compound": name,
                    "CID": cid,
                    "Formula": "",
                    "MolecularWeight": "",
                    "IUPACName": "",
                    "SMILES": "",
                    "Status": "❌ Data tidak tersedia"
                })

    # Simpan semua hasil ke Excel
    df = pd.DataFrame(results)
    df.to_excel("D:/semhas/Data/senyawa_pubchem.xlsx", index=False)
    print("\n✅ Semua data tersimpan di 'senyawa_pubchem.xlsx'")

    if not_found:
        print("\nDaftar senyawa yang tidak ditemukan:")
        for nf in not_found:
            print(f" - {nf}")

# ==== CONTOH PENGGUNAAN ====
compounds = [
    "4-aminobenzoic acid",
    "Citric acid",
    "L-phenylalanine",
    "Trans-3-indoleacrylic acid",
    "DL-tryptophan",
    "4-O-feruloyl-D-quinic acid",
    "Murrangatin",
    "Alpha-lapachone",
    "Hainanmurpanin",
    "Murraol",
    "Murralongin",
    "(1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one",
    "Paniculatin",
    "Limonene, (+/-)-",  
    "Pheophorbide a",
    "SB236057"
]

search_compounds(compounds)


Senyawa : 4-aminobenzoic acid
✅ Ditemukan 1 CID: 978
   CID 978
      Formula     : C7H7NO2
      MolWeight   : 137.14
      IUPAC Name  : 4-aminobenzoic acid
      SMILES      : C1=CC(=CC=C1C(=O)O)N
Senyawa : Citric acid
✅ Ditemukan 1 CID: 311
   CID 311
      Formula     : C6H8O7
      MolWeight   : 192.12
      IUPAC Name  : 2-hydroxypropane-1,2,3-tricarboxylic acid
      SMILES      : C(C(=O)O)C(CC(=O)O)(C(=O)O)O
Senyawa : L-phenylalanine
✅ Ditemukan 1 CID: 6140
   CID 6140
      Formula     : C9H11NO2
      MolWeight   : 165.19
      IUPAC Name  : (2S)-2-amino-3-phenylpropanoic acid
      SMILES      : C1=CC=C(C=C1)C[C@@H](C(=O)O)N
Senyawa : Trans-3-indoleacrylic acid
✅ Ditemukan 1 CID: 5375048
   CID 5375048
      Formula     : C11H9NO2
      MolWeight   : 187.19
      IUPAC Name  : (E)-3-(1H-indol-3-yl)prop-2-enoic acid
      SMILES      : C1=CC=C2C(=C1)C(=CN2)/C=C/C(=O)O
Senyawa : DL-tryptophan
✅ Ditemukan 1 CID: 1148
   CID 1148
      Formula     : C11H12N2O2
      MolWeight  

#smiles Limonene, (+/-)-(CID=22311)

In [24]:
def get_properties_by_cid(cid):
    """Ambil Formula, MolWeight, IUPAC Name, dan SMILES dari CID PubChem"""
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,SMILES,IUPACName/JSON"
    r = requests.get(url, timeout=20)
    
    if r.status_code == 200:
        data = r.json().get("PropertyTable", {}).get("Properties", [{}])[0]
        return {
            "CID": cid,
            "Formula": data.get("MolecularFormula", "-"),
            "MolWeight": data.get("MolecularWeight", "-"),
            "IUPACName": data.get("IUPACName", "-"),
            "SMILES": data.get("CanonicalSMILES") or data.get("IsomericSMILES") or data.get("SMILES", "-")
        }
    return None

# Ambil data Limonene
cid = "22311"
props = get_properties_by_cid(cid)

# Tampilkan hasil
print("="*60)
print(f"CID         : {props['CID']}")
print(f"Formula     : {props['Formula']}")
print(f"MolWeight   : {props['MolWeight']}")
print(f"IUPAC Name  : {props['IUPACName']}")
print(f"SMILES      : {props['SMILES']}")
print("="*60)

# Buat DataFrame untuk disimpan
df_limonene = pd.DataFrame([props])

# Simpan ke Excel, sheet baru "Limonene"
excel_file = "D:/semhas/Data/senyawa_pubchem.xlsx"
with pd.ExcelWriter(excel_file, mode="a", engine="openpyxl") as writer:
    df_limonene.to_excel(writer, sheet_name="Limonene", index=False)

print(f"✅ Data Limonene tersimpan di sheet 'Limonene' pada {excel_file}")


CID         : 22311
Formula     : C10H16
MolWeight   : 136.23
IUPAC Name  : 1-methyl-4-prop-1-en-2-ylcyclohexene
SMILES      : CC1=CCC(CC1)C(=C)C
✅ Data Limonene tersimpan di sheet 'Limonene' pada D:/semhas/Data/senyawa_pubchem.xlsx


# (1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one (CACTUS)

In [19]:
import urllib.parse

def get_from_cactus(compound_name):
    base_url = "https://cactus.nci.nih.gov/chemical/structure"
    formats = {
        "SMILES": "smiles",
        "Formula": "formula",
        "MolecularWeight": "mw",
        "IUPAC_Name": "iupac_name"
    }

    results = {"Compound": compound_name}
    encoded_name = urllib.parse.quote(compound_name)

    for key, fmt in formats.items():
        url = f"{base_url}/{encoded_name}/{fmt}"
        try:
            r = requests.get(url, timeout=20)
            if r.status_code == 200 and r.text.strip():
                results[key] = r.text.strip()
            else:
                results[key] = "-"
        except Exception as e:
            results[key] = f"Error: {e}"
    return results


# ==== Contoh penggunaan ====
compound_name = "(1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one"

props = get_from_cactus(compound_name)

print("="*70)
print(f"Nama Senyawa   : {props['Compound']}")
print(f"SMILES         : {props['SMILES']}")
print(f"Formula        : {props['Formula']}")
print(f"MolecularWeight: {props['MolecularWeight']}")
print(f"IUPAC Name     : {props['IUPAC_Name']}")
print("="*70)


Nama Senyawa   : (1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one
SMILES         : Clc1ccc(cc1)\C=C\C2=CC=C3[C@@H]4C[C@@H](CN(C4)C(=O)c5cncnc5)CN3C2=O
Formula        : C24H21ClN4O2
MolecularWeight: 432.9085
IUPAC Name     : -


#Mencari cid dari smiles (1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one 
#di pubchem

In [26]:
def get_cid_from_smiles(smiles):
    """Cari CID PubChem berdasarkan SMILES"""
    encoded = urllib.parse.quote(smiles)
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{encoded}/cids/TXT"
    r = requests.get(url, timeout=20)
    if r.status_code == 200 and r.text.strip():
        return r.text.strip().split("\n")  # bisa lebih dari satu CID
    return []

def get_properties_by_cid(cid):
    """Ambil Formula, MolWeight, IUPAC Name, dan SMILES dari CID PubChem"""
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,SMILES,IUPACName/JSON"
    r = requests.get(url, timeout=20)
    
    if r.status_code == 200:
        data = r.json().get("PropertyTable", {}).get("Properties", [{}])[0]
        return {
            "CID": cid,
            "Formula": data.get("MolecularFormula", "-"),
            "MolWeight": data.get("MolecularWeight", "-"),
            "IUPACName": data.get("IUPACName", "-"),
            "SMILES": data.get("CanonicalSMILES") or data.get("IsomericSMILES") or data.get("SMILES", "-")
        }
    return None

def search_smiles_and_save(smiles, excel_file="D:/semhas/Data/senyawa_pubchem.xlsx", sheet_name="From_SMILES"):
    print("="*70)
    print(f"SMILES yang dicari: {smiles}")
    
    cids = get_cid_from_smiles(smiles)
    results = []

    if not cids:
        print("❌ CID tidak ditemukan di PubChem")
        return

    print(f"✅ Ditemukan CID(s): {', '.join(cids)}")
    for cid in cids:
        props = get_properties_by_cid(cid)
        if props:
            print(f"--- CID {props['CID']} ---")
            print(f"Formula       : {props['Formula']}")
            print(f"MolWeight     : {props['MolWeight']}")
            print(f"IUPAC Name    : {props['IUPACName']}")
            print(f"SMILES        : {props['SMILES']}")
            results.append(props)
        else:
            print(f"CID {cid} → ❌ Data tidak tersedia")

    # Simpan ke Excel di sheet baru
    if results:
        df = pd.DataFrame(results)
        with pd.ExcelWriter(excel_file, mode="a", engine="openpyxl") as writer:
            df.to_excel(writer, sheet_name=sheet_name, index=False)
        print(f"\n✅ Data disimpan di sheet '{sheet_name}' pada {excel_file}")

# SMILES
smiles = r"Clc1ccc(cc1)\C=C\C2=CC=C3[C@@H]4C[C@@H](CN(C4)C(=O)c5cncnc5)CN3C2=O" 
search_smiles_and_save(smiles)


SMILES yang dicari: Clc1ccc(cc1)\C=C\C2=CC=C3[C@@H]4C[C@@H](CN(C4)C(=O)c5cncnc5)CN3C2=O
✅ Ditemukan CID(s): 11886484
--- CID 11886484 ---
Formula       : C24H21ClN4O2
MolWeight     : 432.9
IUPAC Name    : (1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one
SMILES        : C1[C@H]2CN(C[C@@H]1C3=CC=C(C(=O)N3C2)/C=C/C4=CC=C(C=C4)Cl)C(=O)C5=CN=CN=C5

✅ Data disimpan di sheet 'From_SMILES' pada D:/semhas/Data/senyawa_pubchem.xlsx


In [None]:
#recap

In [None]:
!pip install tabulate

In [32]:
import pandas as pd
import requests
import urllib.parse
from tabulate import tabulate

# ----- Fungsi bantu -----
def get_cids_from_name(name):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{urllib.parse.quote(name)}/cids/TXT"
    r = requests.get(url, timeout=20)
    if r.status_code == 200 and r.text.strip():
        return r.text.strip().split("\n")
    return []

def get_properties_by_cid(cid):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/MolecularFormula,MolecularWeight,CanonicalSMILES,IsomericSMILES,SMILES,IUPACName/JSON"
    r = requests.get(url, timeout=20)
    if r.status_code == 200:
        data = r.json().get("PropertyTable", {}).get("Properties", [{}])[0]
        return data.get("CanonicalSMILES") or data.get("IsomericSMILES") or data.get("SMILES", "-")
    return "-"

# ----- Keterangan manual -----
manual_notes = {
    "(1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one":
        "Ditentukan manual via CACTUS → Terverifikasi PubChem",
    "Limonene, (+/-)-":
        "Hanya tersedia rasemat (Xu et al., 2017) → Terverifikasi PubChem"
}

# ----- Daftar senyawa -----
compounds = [
    "4-aminobenzoic acid",
    "Citric acid",
    "L-phenylalanine",
    "Trans-3-indoleacrylic acid",
    "DL-tryptophan",
    "4-O-feruloyl-D-quinic acid",
    "Murrangatin",
    "Alpha-lapachone",
    "Hainanmurpanin",
    "Murraol",
    "Murralongin",
    "(1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one",
    "Paniculatin",
    "Limonene, (+/-)-",
    "Pheophorbide a",
    "SB236057"
]

# ----- Proses semua senyawa -----
results = []
no = 1
for name in compounds:
    cids = get_cids_from_name(name)
    if cids:
        for cid in cids:
            smiles = get_properties_by_cid(cid)
            keterangan = manual_notes.get(name, "Diverifikasi")
            results.append({
                "No.": no,
                "Compound": name,
                "CID": cid,
                "SMILES": smiles,
                "Status Di PubChem": "Ada",
                "Keterangan": keterangan
            })
            no += 1
    else:
        keterangan = manual_notes.get(name, "Belum Diverifikasi")
        results.append({
            "No.": no,
            "Compound": name,
            "CID": "-",
            "SMILES": "-",
            "Status Di PubChem": "Tidak Ada",
            "Keterangan": keterangan
        })
        no += 1

# ----- Buat DataFrame -----
df = pd.DataFrame(results)
print(tabulate(df, headers='keys', tablefmt='grid', showindex=False))

# ----- Simpan ke Excel (sheet berbeda) -----
output_file = "D:/semhas/Data/senyawa_pubchem.xlsx"
sheet_name = "Hasil_Senyawa"  # bisa diganti tiap run, misalnya "Batch1", "Batch2"

with pd.ExcelWriter(output_file, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"\n✅ Data tersimpan di file '{output_file}' pada sheet '{sheet_name}'")


+-------+---------------------------------------------------------------------------------------------------------------------------+----------+------------------------------------------------------------------------------------------------------------------------------------+---------------------+------------------------------------------------------------------+
|   No. | Compound                                                                                                                  | CID      | SMILES                                                                                                                             | Status Di PubChem   | Keterangan                                                       |
|     1 | 4-aminobenzoic acid                                                                                                       | 978      | C1=CC(=CC=C1C(=O)O)N                                                                                                     

# SwissAdme

In [7]:
import pandas as pd

# File Excel
compound_file = 'D:/semhas/Data/senyawa_pubchem.xlsx'
swissadme_file = 'D:/semhas/Data/swissadme_hasil.csv'

# Baca sheet verifikasi_senyawa
df_verif = pd.read_excel(compound_file, sheet_name='Hasil_Senyawa')
print(f"Jumlah baris verifikasi_senyawa: {len(df_verif)}")

# Baca file SwissADME
df_swiss = pd.read_csv(swissadme_file)
print(f"Jumlah baris SwissADME: {len(df_swiss)}")

# Pastikan jumlah baris sama
if len(df_verif) != len(df_swiss):
    raise ValueError("Jumlah baris SwissADME dan verifikasi_senyawa tidak sama!")

# Tambahkan kolom Compound dan CID dari verifikasi_senyawa
df_swiss['Compound'] = df_verif['Compound']
df_swiss['CID'] = df_verif['CID']

# Tambahkan Status akhir berdasarkan Bioavailability Score
df_swiss['Status akhir'] = df_swiss['Bioavailability Score'].apply(
    lambda x: 'Dipilih' if x >= 0.55 else 'Tidak dipilih'
)

# Tambahkan kolom No.
df_swiss.insert(0, 'No.', range(1, len(df_swiss) + 1))

# Simpan ke sheet baru di file Excel yang sama
with pd.ExcelWriter(compound_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df_swiss.to_excel(writer, sheet_name='swissadme_senyawa', index=False)

print(f"\n✅ Data SwissADME berhasil disimpan di sheet 'swissadme_senyawa' dalam file {compound_file}")

# --- Tambahan: Hitung jumlah senyawa unik dan jumlah total SMILES ---
jumlah_senyawa = df_verif['Compound'].nunique()
jumlah_smiles = len(df_verif)

print("\n📊 Ringkasan Data Senyawa:")
print(f"Jumlah senyawa unik   : {jumlah_senyawa} senyawa")
print(f"Jumlah total SMILES   : {jumlah_smiles} SMILES")

# Tampilkan daftar senyawa unik
print("\nDaftar senyawa unik:")
for i, senyawa in enumerate(df_verif['Compound'].unique(), start=1):
    print(f"{i}. {senyawa}")

# --- Tambahan: Ringkasan berdasarkan Status akhir ---
dipilih = df_swiss[df_swiss['Status akhir'] == 'Dipilih']
tidak_dipilih = df_swiss[df_swiss['Status akhir'] == 'Tidak dipilih']

print("\n📌 Ringkasan Status Akhir:")
print(f"Senyawa dengan status 'Dipilih'     : {dipilih['Compound'].nunique()} senyawa ({len(dipilih)} SMILES)")
print(f"Senyawa dengan status 'Tidak dipilih': {tidak_dipilih['Compound'].nunique()} senyawa ({len(tidak_dipilih)} SMILES)")

# Tampilkan daftar senyawa dipilih & tidak dipilih
print("\nDaftar senyawa dengan status 'Dipilih':")
for i, senyawa in enumerate(dipilih['Compound'].unique(), start=1):
    print(f"{i}. {senyawa}")

print("\nDaftar senyawa dengan status 'Tidak dipilih':")
for i, senyawa in enumerate(tidak_dipilih['Compound'].unique(), start=1):
    print(f"{i}. {senyawa}")

# --- Preview hasil SwissADME ---
pd.set_option('display.max_rows', 20)
print("\nPreview hasil SwissADME:")
print(df_swiss.head())


Jumlah baris verifikasi_senyawa: 17
Jumlah baris SwissADME: 17

✅ Data SwissADME berhasil disimpan di sheet 'swissadme_senyawa' dalam file D:/semhas/Data/senyawa_pubchem.xlsx

📊 Ringkasan Data Senyawa:
Jumlah senyawa unik   : 16 senyawa
Jumlah total SMILES   : 17 SMILES

Daftar senyawa unik:
1. 4-aminobenzoic acid
2. Citric acid
3. L-phenylalanine
4. Trans-3-indoleacrylic acid
5. DL-tryptophan
6. 4-O-feruloyl-D-quinic acid
7. Murrangatin
8. Alpha-lapachone
9. Hainanmurpanin
10. Murraol
11. Murralongin
12. (1R,9S)-5-[(E)-2-(4-chlorophenyl)ethenyl]-11-(pyrimidine-5-carbonyl)-7,11-diazatricyclo[7.3.1.02,7]trideca-2,4-dien-6-one
13. Paniculatin
14. Limonene, (+/-)-
15. Pheophorbide a
16. SB236057

📌 Ringkasan Status Akhir:
Senyawa dengan status 'Dipilih'     : 13 senyawa (14 SMILES)
Senyawa dengan status 'Tidak dipilih': 3 senyawa (3 SMILES)

Daftar senyawa dengan status 'Dipilih':
1. 4-aminobenzoic acid
2. Citric acid
3. L-phenylalanine
4. Trans-3-indoleacrylic acid
5. DL-tryptophan
6. Mu

# Akuisisi Protein Target

In [1]:
import pandas as pd

# File Excel
target_file = 'D:/semhas/Data/target.xlsx'

# Baca semua sheet
xls = pd.ExcelFile(target_file)
sheet_names = xls.sheet_names[:14]  # Ambil 14 sheet pertama

all_data = []

for sheet in sheet_names:
    try:
        # Baca mulai dari baris kedua sebagai header
        df = pd.read_excel(target_file, sheet_name=sheet, header=1)
        all_data.append(df)
    except Exception as e:
        print(f"⚠️ Gagal membaca sheet {sheet}: {e}")

# Gabungkan semua sheet
if all_data:
    df_all = pd.concat(all_data, ignore_index=True)
    print("\n✅ Semua 14 sheet berhasil digabungkan. Preview:")
    
    # Ambil hanya kolom yang diinginkan
    selected_cols = ['Target Name','UniProt ID','Min Activity','Assay type','Probability']
    df_selected = df_all.reindex(columns=selected_cols)
    
    print(df_selected.head(20))
    
    # --- Tambahkan ringkasan jumlah ---
    total_rows = len(df_selected)
    print(f"\nJumlah total baris: {total_rows}")
    print("\nJumlah baris non-NA per kolom:")
    print(df_selected.count())
    
    # Simpan ke sheet baru
    with pd.ExcelWriter(target_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        df_selected.to_excel(writer, sheet_name='all_14_sheets', index=False)
    
    print(f"\n✅ Data berhasil disimpan di sheet 'all_14_sheets' dalam file {target_file}")
else:
    print("❌ Tidak ada data yang berhasil dibaca.")



✅ Semua 14 sheet berhasil digabungkan. Preview:
                                          Target Name UniProt ID Min Activity  \
0                Thyroid stimulating hormone receptor     P16473     125.9 nm   
1                                     NADPH oxidase 1     Q9Y5S8          NaN   
2                          Ras-related protein Rab-9A     P51151          NaN   
3                               Kruppel-like factor 5     Q13887          NaN   
4                             Niemann-Pick C1 protein     O15118          NaN   
5           Transcription intermediary factor 1-alpha     O15164          NaN   
6                             Proteasome component C5     P20618          NaN   
7   Endoplasmic reticulum-associated amyloid beta-...     Q99714          NaN   
8                                    Cyclooxygenase-1     P23219          NaN   
9                                       Transthyretin     P02766          NaN   
10                       Proteasome Macropain subunit     P4

In [2]:
import pandas as pd

# File Excel
file_path = 'D:/semhas/Data/target.xlsx'

# Baca sheet all_14_sheets
df_all = pd.read_excel(file_path, sheet_name='all_14_sheets')

# Pastikan kolom Probability bertipe numerik
df_all['Probability'] = pd.to_numeric(df_all['Probability'], errors='coerce')

# Filter berdasarkan kondisi
df_filtered = df_all[(df_all['Probability'] >= 0.55) | (df_all['Assay type'] == 'Potency')]

# Hitung jumlah sebelum menghapus duplikat
jumlah_sebelum = len(df_filtered)

# Hapus duplikat hanya berdasarkan UniProt ID
df_filtered_unique = df_filtered.drop_duplicates(subset=['UniProt ID'])
jumlah_setelah = len(df_filtered_unique)

# Simpan ke sheet baru
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df_filtered_unique.to_excel(writer, sheet_name='targets_filtered_unique', index=False)

print(f"✅ Data berhasil difilter dan disimpan di sheet 'targets_filtered_unique'.")
print(f"Jumlah baris Probability >= 0,55 dan Assay type = Potency : {jumlah_sebelum}")
print(f"Jumlah baris unik berdasarkan UniProt ID: {jumlah_setelah}")


✅ Data berhasil difilter dan disimpan di sheet 'targets_filtered_unique'.
Jumlah baris Probability >= 0,55 dan Assay type = Potency : 819
Jumlah baris unik berdasarkan UniProt ID: 229


In [65]:
import pandas as pd

# File Excel
file_path = 'D:/semhas/Data/target.xlsx'

# --- Baca sheet targets_filtered_unique ---
df_targets = pd.read_excel(file_path, sheet_name='targets_filtered_unique', engine='openpyxl')

# Nama kolom UniProt ID
col_targets = 'UniProt ID'  # pastikan sesuai nama kolom di sheet

# --- Hapus baris kosong (opsional) ---
df_targets = df_targets.dropna(subset=[col_targets])

# --- Tampilkan isi kolom UniProt ID ---
print(f"📄 Isi kolom '{col_targets}' di sheet 'targets_filtered_unique':")
print(df_targets[col_targets].reset_index(drop=True))

# --- Statistik ---
print(f"\n📊 Total UniProt ID di 'targets_filtered_unique' (setelah hapus NaN): {df_targets[col_targets].count()}")
print(f"📊 Jumlah UniProt ID unik: {df_targets[col_targets].nunique()}")


📄 Isi kolom 'UniProt ID' di sheet 'targets_filtered_unique':
0      P16473
1      Q9Y5S8
2      P51151
3      Q13887
4      O15118
        ...  
224    P30542
225    P55263
226    P48775
227    P43004
228    Q8N4C8
Name: UniProt ID, Length: 229, dtype: object

📊 Total UniProt ID di 'targets_filtered_unique' (setelah hapus NaN): 229
📊 Jumlah UniProt ID unik: 229


In [3]:
import pandas as pd

# File Excel sumber dan tujuan
input_file = 'D:/semhas/Data/target.xlsx'
output_file = 'D:/semhas/Data/target.xlsx'  # bisa sama dengan file sumber

# Sheet yang ingin digabungkan
sheet_names = ['Sheet16', 'Sheet12', 'Sheet14(limone)']

all_data = []

for sheet in sheet_names:
    try:
        df = pd.read_excel(input_file, sheet_name=sheet, engine='openpyxl', header=1)
        df['Sheet'] = sheet  # kolom asal sheet
        all_data.append(df)
        print(f"✅ Sheet {sheet} berhasil dibaca, baris: {len(df)}")
    except Exception as e:
        print(f"⚠️ Gagal membaca sheet {sheet}: {e}")

if all_data:
    # Gabungkan semua sheet
    df_combined = pd.concat(all_data, ignore_index=True)
    print(f"\n✅ Semua sheet berhasil digabungkan. Total baris: {len(df_combined)}")

    # Hitung total UniProt ID (pecah jika ada lebih dari satu ID dalam satu sel)
    if 'Uniprot ID' in df_combined.columns:
        df_uniprot_expanded = df_combined['Uniprot ID'].astype(str).str.split(' ').explode()
        total_uniprot = len(df_uniprot_expanded)
        unique_uniprot = df_uniprot_expanded.drop_duplicates().count()
        duplicate_uniprot = total_uniprot - unique_uniprot

        print("\n📊 Statistik UniProt ID setelah gabungan:")
        print(f"Total UniProt ID (dihitung semua, termasuk duplikat): {total_uniprot}")
        print(f"Jumlah UniProt ID unik: {unique_uniprot}")
        print(f"Jumlah UniProt ID duplikat: {duplicate_uniprot}")

        # Ambil dan tampilkan UniProt ID unik
        df_unique_uniprot = df_uniprot_expanded.drop_duplicates().reset_index(drop=True)
        print("\n✅ Daftar UniProt ID unik:")
        print(df_unique_uniprot)

        # Simpan daftar unik ke sheet baru (opsional)
        with pd.ExcelWriter(output_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            df_combined.to_excel(writer, sheet_name='swisstarget_gabungan', index=False)
            df_unique_uniprot.to_excel(writer, sheet_name='UniProt_ID_unique', index=False)

        print("\n✅ Data gabungan dan UniProt ID unik berhasil disimpan di file Excel.")
    else:
        print("⚠️ Kolom 'Uniprot ID' tidak ditemukan di sheet gabungan.")
else:
    print("❌ Tidak ada data yang berhasil dibaca.")


✅ Sheet Sheet16 berhasil dibaca, baris: 100
✅ Sheet Sheet12 berhasil dibaca, baris: 100
✅ Sheet Sheet14(limone) berhasil dibaca, baris: 93

✅ Semua sheet berhasil digabungkan. Total baris: 293

📊 Statistik UniProt ID setelah gabungan:
Total UniProt ID (dihitung semua, termasuk duplikat): 305
Jumlah UniProt ID unik: 263
Jumlah UniProt ID duplikat: 42

✅ Daftar UniProt ID unik:
0      P51679
1      P51681
2      Q12809
3      O95136
4      Q8TDU6
        ...  
258    P27361
259    P24723
260    P80365
261    P30304
262    Q06124
Name: Uniprot ID, Length: 263, dtype: object

✅ Data gabungan dan UniProt ID unik berhasil disimpan di file Excel.


In [6]:
import pandas as pd

# File Excel
file_path = 'D:/semhas/Data/target.xlsx'

# Baca sheet gabungan
df = pd.read_excel(file_path, sheet_name='swisstarget_gabungan', engine='openpyxl', header=0)

if 'Uniprot ID' in df.columns and 'Probability*' in df.columns:
    # Filter baris dengan Probability* > 0
    df_filtered = df[df['Probability*'] > 0]

    # Pecah UniProt ID menjadi baris terpisah sambil tetap menjaga kolom lain
    df_expanded = df_filtered.assign(Uniprot_ID_single=df_filtered['Uniprot ID'].astype(str).str.split(' ')).explode('Uniprot_ID_single')

    # Hapus duplikat berdasarkan UniProt ID (tetap simpan baris pertama dari setiap ID)
    df_unique = df_expanded.drop_duplicates(subset='Uniprot_ID_single').reset_index(drop=True)

    # Tampilkan preview
    print("✅ Daftar baris dengan UniProt ID unik dan Probability* > 0:")
    print(df_unique.head(20))

    # Simpan ke sheet baru
    with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        df_unique.to_excel(writer, sheet_name='UniProt_ID_unique_filtered', index=False)

    print("\n✅ Data dengan kolom lengkap dan UniProt ID unik berhasil disimpan di sheet 'UniProt_ID_unique_filtered'")

else:
    if 'Uniprot ID' not in df.columns:
        print("⚠️ Kolom 'Uniprot ID' tidak ditemukan di sheet.")
    if 'Probability*' not in df.columns:
        print("⚠️ Kolom 'Probability*' tidak ditemukan di sheet.")


✅ Daftar baris dengan UniProt ID unik dan Probability* > 0:
                                               Target   Common name  \
0                       C-C chemokine receptor type 4          CCR4   
1                       C-C chemokine receptor type 5          CCR5   
2                                                HERG         KCNH2   
3              Sphingosine 1-phosphate receptor Edg-5         S1PR2   
4              G-protein coupled bile acid receptor 1        GPBAR1   
5                                    Integrin alpha-4         ITGA4   
6                 Squalene synthetase   (by homology)         FDFT1   
7                             Integrin alpha-V/beta-3   ITGAV ITGB3   
8                             Integrin alpha-V/beta-3   ITGAV ITGB3   
9                           Integrin alpha-IIb/beta-3  ITGA2B ITGB3   
10                            Integrin alpha-V/beta-5   ITGB5 ITGAV   
11                            Integrin alpha-V/beta-6   ITGAV ITGB6   
12               

In [7]:
import pandas as pd

# File Excel
target_file = 'D:/semhas/Data/target.xlsx'

# --- Baca sheet ---
df_targets = pd.read_excel(target_file, sheet_name='targets_filtered_unique', engine='openpyxl')
df_swiss = pd.read_excel(target_file, sheet_name='UniProt_ID_unique_filtered', engine='openpyxl')

# Nama kolom UniProt ID
col_targets = 'UniProt ID'        # dari sheet targets_filtered_unique
col_swiss = 'Uniprot_ID_single'   # dari sheet UniProt_ID_unique_filtered

# --- Hapus baris kosong (NaN) ---
df_targets = df_targets.dropna(subset=[col_targets])
df_swiss = df_swiss.dropna(subset=[col_swiss])

# --- Ambil hanya kolom UniProt ID masing-masing sheet ---
df_targets_id = df_targets[[col_targets]].rename(columns={col_targets: 'Uniprot ID'})
df_swiss_id = df_swiss[[col_swiss]].rename(columns={col_swiss: 'Uniprot ID'})

# --- Gabungkan keduanya ---
df_combined = pd.concat([df_targets_id, df_swiss_id], ignore_index=True)

# --- Ambil UniProt ID unik ---
df_unique = df_combined.drop_duplicates().reset_index(drop=True)

# --- Tampilkan preview ---
print("✅ UniProt ID unik gabungan (50 baris pertama):")
print(df_unique.head(50))

# --- Statistik ---
print(f"\n📊 Total UniProt ID setelah gabungan (termasuk duplikat): {len(df_combined)}")
print(f"📊 Jumlah UniProt ID unik: {len(df_unique)}")

# --- Simpan ke sheet baru ---
with pd.ExcelWriter(target_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df_unique.to_excel(writer, sheet_name='UniProt_ID_merged_unique', index=False)

print("\n✅ Hasil UniProt ID unik gabungan berhasil disimpan di sheet 'UniProt_ID_merged_unique'")


✅ UniProt ID unik gabungan (50 baris pertama):
   Uniprot ID
0      P16473
1      Q9Y5S8
2      P51151
3      Q13887
4      O15118
5      O15164
6      P20618
7      Q99714
8      P23219
9      P02766
10     P49721
11     P28482
12     P34995
13     Q06124
14     Q92753
15     Q9Y345
16     P19838
17     P30926
18     P27695
19     P24557
20     P11166
21     O15151
22     P42224
23     P02708
24     P23415
25     P30305
26     O14672
27     P07384
28     P09211
29     P21462
30     Q9HC97
31     P08684
32     O00257
33     Q16236
34     Q6V1X1
35     P25090
36     Q16288
37     P67870
38     P51955
39     P08173
40     O75469
41     P17706
42     P11387
43     P43116
44     P22736
45     P08912
46     P07477
47     P42680
48     Q00535
49     O60341

📊 Total UniProt ID setelah gabungan (termasuk duplikat): 282
📊 Jumlah UniProt ID unik: 273

✅ Hasil UniProt ID unik gabungan berhasil disimpan di sheet 'UniProt_ID_merged_unique'


In [None]:
# Jumlah Entrez Gene ID Dari OMIM

In [23]:
import pandas as pd

# 📁 Path ke file Excel
file_path = "D:/semhas/Data/OMIM-Entry.xlsx"

# 🔹 Baca file dari baris ke-5 (karena header berada di baris ke-5)
df = pd.read_excel(file_path, header=4)

# 🔹 Filter baris dengan Entrez Gene ID tidak kosong
df_filtered = df[df["Entrez Gene ID"].notna()].copy()
total_filtered = len(df_filtered)

# 🔹 Cari duplikat berdasarkan Entrez Gene ID
df_duplicates = df_filtered[df_filtered.duplicated(subset=["Entrez Gene ID"], keep=False)].copy()
total_duplikat = len(df_duplicates)

# 🔹 Simpan ke file (Filtered: Sheet2, Duplicates: Sheet3)
with pd.ExcelWriter(file_path, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    df_filtered.to_excel(writer, sheet_name="Filtered", index=False)
    df_duplicates.to_excel(writer, sheet_name="Duplicates", index=False)

# 🔹 Tampilkan ringkasan
print("📊 Ringkasan:")
print(f"🔹 Setelah filter (Entrez Gene ID tidak kosong): {total_filtered}")
print(f"🔹 Jumlah baris duplikat berdasarkan Entrez Gene ID: {total_duplikat}")


📊 Ringkasan:
🔹 Setelah filter (Entrez Gene ID tidak kosong): 176
🔹 Jumlah baris duplikat berdasarkan Entrez Gene ID: 0


In [None]:
# Cek Validasi UniProt ID dengan Taxonomy ID: 9606(Homo sapiens)  dari target

In [37]:
import pandas as pd

# --- File input ---
target_file = "D:/semhas/Data/target.xlsx"
mapping_file = "D:/semhas/Data/HUMAN_9606_idmapping.dat"

# --- Baca sheet "UniProt_ID_merged_unique" ---
df_target = pd.read_excel(target_file, sheet_name="UniProt_ID_merged_unique")

# Normalisasi nama kolom
df_target.columns = df_target.columns.str.strip()

# Pastikan nama kolom UniProt sesuai
col_uniprot = "Uniprot ID"

# --- Baca file mapping UniProt ---
df_map = pd.read_csv(mapping_file, sep="\t", header=None,
                     names=["UniProtKB-AC", "ID_Type", "Value"])

# --- Ambil mapping UniProt → Gene Name ---
df_gene = df_map[df_map["ID_Type"] == "Gene_Name"][["UniProtKB-AC", "Value"]]
df_gene.rename(columns={"UniProtKB-AC": "UniProt", "Value": "Gene Name"}, inplace=True)

# --- Merge dengan gene name ---
df_merge = df_target.merge(df_gene, how="left", left_on=col_uniprot, right_on="UniProt")

# --- Tambahkan Status Verifikasi ---
valid_uniprot_set = set(df_map["UniProtKB-AC"].unique())
df_merge["Status Verifikasi"] = df_merge[col_uniprot].apply(
    lambda x: "✅ Valid (Homo sapiens)" if pd.notna(x) and x in valid_uniprot_set else "❌ Tidak valid"
)

# --- Tambahkan kolom Sumber ---
df_merge["Sumber"] = "SuperPred"

# --- Format final ---
df_final = pd.DataFrame({
    "No.": range(1, len(df_merge) + 1),
    "ID Asal": df_merge[col_uniprot],
    "UniProt": df_merge[col_uniprot],
    "Status Verifikasi": df_merge["Status Verifikasi"],
    "Gene Name": df_merge["Gene Name"],
    "Sumber": df_merge["Sumber"]
})

# --- Simpan ke sheet baru ---
with pd.ExcelWriter(target_file, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    df_final.to_excel(writer, sheet_name="SuperPred_UniProt_validated", index=False)

print("✅ Hasil validasi sudah disimpan ke sheet 'SuperPred_UniProt_validated'")
print(df_final.head(10))


✅ Hasil validasi sudah disimpan ke sheet 'SuperPred_UniProt_validated'
   No. ID Asal UniProt       Status Verifikasi Gene Name     Sumber
0    1  P16473  P16473  ✅ Valid (Homo sapiens)      TSHR  SuperPred
1    2  Q9Y5S8  Q9Y5S8  ✅ Valid (Homo sapiens)      NOX1  SuperPred
2    3  P51151  P51151  ✅ Valid (Homo sapiens)     RAB9A  SuperPred
3    4  Q13887  Q13887  ✅ Valid (Homo sapiens)      KLF5  SuperPred
4    5  O15118  O15118  ✅ Valid (Homo sapiens)      NPC1  SuperPred
5    6  O15164  O15164  ✅ Valid (Homo sapiens)    TRIM24  SuperPred
6    7  P20618  P20618  ✅ Valid (Homo sapiens)     PSMB1  SuperPred
7    8  Q99714  Q99714  ✅ Valid (Homo sapiens)  HSD17B10  SuperPred
8    9  P23219  P23219  ✅ Valid (Homo sapiens)     PTGS1  SuperPred
9   10  P02766  P02766  ✅ Valid (Homo sapiens)       TTR  SuperPred


In [None]:
# Cek Validasi UniProt ID dengan Taxonomy ID: 9606(Homo sapiens)  dari omim

In [35]:
import pandas as pd

# --- File input ---
target_file = "D:/semhas/Data/OMIM-Entry.xlsx"
mapping_file = "D:/semhas/Data/HUMAN_9606_idmapping.dat"

# --- Baca sheet "Filtered" ---
df_target = pd.read_excel(target_file, sheet_name="Filtered")

# Normalisasi nama kolom
df_target.columns = df_target.columns.str.strip()

# Tentukan nama kolom ID Asal
col_id_asal = "Entrez Gene ID"

# Pastikan kolom Entrez di target jadi string
df_target[col_id_asal] = df_target[col_id_asal].astype(str)

# --- Baca file mapping UniProt ---
df_map = pd.read_csv(mapping_file, sep="\t", header=None,
                     names=["UniProtKB-AC", "ID_Type", "Value"])

# --- Ambil mapping Entrez Gene ID → UniProt ---
df_entrez2uniprot = df_map[df_map["ID_Type"] == "GeneID"][["Value", "UniProtKB-AC"]]
df_entrez2uniprot.rename(columns={"Value": col_id_asal, "UniProtKB-AC": "UniProt"}, inplace=True)

# Pastikan kolom Entrez di mapping juga string
df_entrez2uniprot[col_id_asal] = df_entrez2uniprot[col_id_asal].astype(str)

# --- Ambil mapping UniProt → Gene Name ---
df_gene = df_map[df_map["ID_Type"] == "Gene_Name"][["UniProtKB-AC", "Value"]]
df_gene.rename(columns={"UniProtKB-AC": "UniProt", "Value": "Gene Name"}, inplace=True)

# --- Merge Entrez dengan UniProt ---
df_merge = df_target.merge(df_entrez2uniprot, how="left", on=col_id_asal)

# --- Tambahkan Gene Name ---
df_merge = df_merge.merge(df_gene, how="left", on="UniProt")

# --- Tambahkan Status Verifikasi ---
valid_uniprot_set = set(df_map["UniProtKB-AC"].unique())
df_merge["Status Verifikasi"] = df_merge["UniProt"].apply(
    lambda x: "✅ Valid (Homo sapiens)" if pd.notna(x) and x in valid_uniprot_set else "❌ Tidak valid"
)

# --- Tambahkan kolom Sumber ---
df_merge["Sumber"] = "OMIM"

# --- Format final ---
df_final = pd.DataFrame({
    "No.": range(1, len(df_merge) + 1),
    "ID Asal": df_merge[col_id_asal],
    "UniProt": df_merge["UniProt"],
    "Status Verifikasi": df_merge["Status Verifikasi"],
    "Gene Name": df_merge["Gene Name"],
    "Sumber": df_merge["Sumber"]
})

# --- Simpan ke sheet baru ---
with pd.ExcelWriter(target_file, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    df_final.to_excel(writer, sheet_name="OMIM_UniProt_validated", index=False)

print("✅ Hasil validasi sudah disimpan ke sheet 'OMIM_UniProt_validated'")
print(df_final.head(10))


✅ Hasil validasi sudah disimpan ke sheet 'OMIM_UniProt_validated'
   No. ID Asal UniProt       Status Verifikasi Gene Name Sumber
0    1    7350  P25874  ✅ Valid (Homo sapiens)      UCP1   OMIM
1    2    7350  Q4KMT7  ✅ Valid (Homo sapiens)      UCP1   OMIM
2    3   26027  Q8WXI4  ✅ Valid (Homo sapiens)    ACOT11   OMIM
3    4   54976  Q9GZN8  ✅ Valid (Homo sapiens)    ADISSP   OMIM
4    5  388581  Q5T7M4  ✅ Valid (Homo sapiens)  C1QTNF12   OMIM
5    6   56246  Q8TCY5  ✅ Valid (Homo sapiens)      MRAP   OMIM
6    7   51129  Q9BY76  ✅ Valid (Homo sapiens)   ANGPTL4   OMIM
7    8   10891  Q9UBK2  ✅ Valid (Homo sapiens)  PPARGC1A   OMIM
8    9    8841  O15379  ✅ Valid (Homo sapiens)     HDAC3   OMIM
9   10    5743  P35354  ✅ Valid (Homo sapiens)     PTGS2   OMIM


In [None]:
# Mengabungkan omim dan superpred

In [38]:
import pandas as pd

# --- File input ---
omim_file = "D:/semhas/Data/OMIM-Entry.xlsx"
superpred_file = "D:/semhas/Data/target.xlsx"
output_file = "D:/semhas/Data/All_UniProt_validated.xlsx"  # bisa jadi file gabungan baru

# --- Baca masing-masing sheet hasil validasi ---
df_omim = pd.read_excel(omim_file, sheet_name="OMIM_UniProt_validated")
df_superpred = pd.read_excel(superpred_file, sheet_name="SuperPred_UniProt_validated")

# --- Gabungkan keduanya ---
df_combined = pd.concat([df_superpred, df_omim], ignore_index=True)

# --- Reset nomor urut ---
df_combined["No."] = range(1, len(df_combined) + 1)

# --- Simpan ke file baru ---
df_combined.to_excel(output_file, sheet_name="All_UniProt_validated", index=False)

print("✅ Gabungan OMIM + SuperPred sudah disimpan ke file:", output_file)
print(df_combined.head(10))


✅ Gabungan OMIM + SuperPred sudah disimpan ke file: D:/semhas/Data/All_UniProt_validated.xlsx
   No. ID Asal UniProt       Status Verifikasi Gene Name     Sumber
0    1  P16473  P16473  ✅ Valid (Homo sapiens)      TSHR  SuperPred
1    2  Q9Y5S8  Q9Y5S8  ✅ Valid (Homo sapiens)      NOX1  SuperPred
2    3  P51151  P51151  ✅ Valid (Homo sapiens)     RAB9A  SuperPred
3    4  Q13887  Q13887  ✅ Valid (Homo sapiens)      KLF5  SuperPred
4    5  O15118  O15118  ✅ Valid (Homo sapiens)      NPC1  SuperPred
5    6  O15164  O15164  ✅ Valid (Homo sapiens)    TRIM24  SuperPred
6    7  P20618  P20618  ✅ Valid (Homo sapiens)     PSMB1  SuperPred
7    8  Q99714  Q99714  ✅ Valid (Homo sapiens)  HSD17B10  SuperPred
8    9  P23219  P23219  ✅ Valid (Homo sapiens)     PTGS1  SuperPred
9   10  P02766  P02766  ✅ Valid (Homo sapiens)       TTR  SuperPred


In [None]:
#cek duplikat

In [41]:
import pandas as pd

# --- File input/output ---
file_path = "D:/semhas/Data/All_UniProt_validated.xlsx"

# --- Baca sheet gabungan ---
df_all = pd.read_excel(file_path, sheet_name="All_UniProt_validated")

# --- Pastikan kolom yang dipakai ada ---
print("Kolom tersedia:", df_all.columns.tolist())

# --- Cari duplikat berdasarkan kombinasi ID Asal + UniProt ---
dupes = df_all[df_all.duplicated(subset=["ID Asal", "UniProt"], keep=False)]

# --- Data unik (hapus duplikat) ---
df_unique = df_all.drop_duplicates(subset=["ID Asal", "UniProt"]).reset_index(drop=True)
df_unique["No."] = range(1, len(df_unique) + 1)

# --- Statistik ---
total_data = len(df_all)
total_dupes = len(dupes)
total_unique = len(df_unique)

print("\n📊 Statistik Data All_UniProt_validated")
print(f"Total data gabungan : {total_data}")
print(f"Jumlah duplikat     : {total_dupes}")
print(f"Jumlah unik         : {total_unique}")

# --- Buat DataFrame ringkasan statistik ---
df_stats = pd.DataFrame({
    "Keterangan": ["Total Data", "Jumlah Duplikat", "Jumlah Unik"],
    "Jumlah": [total_data, total_dupes, total_unique]
})

# --- Simpan ke file Excel ---
with pd.ExcelWriter(file_path, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    dupes.to_excel(writer, sheet_name="Duplicated", index=False)
    df_unique.to_excel(writer, sheet_name="Unique_UniProt", index=False)
    df_stats.to_excel(writer, sheet_name="Statistik", index=False)

print(f"\n✅ Hasil duplikat, unik, dan statistik sudah disimpan di file {file_path}")


Kolom tersedia: ['No.', 'ID Asal', 'UniProt', 'Status Verifikasi', 'Gene Name', 'Sumber']

📊 Statistik Data All_UniProt_validated
Total data gabungan : 578
Jumlah duplikat     : 0
Jumlah unik         : 578

✅ Hasil duplikat, unik, dan statistik sudah disimpan di file D:/semhas/Data/All_UniProt_validated.xlsx


In [50]:
import pandas as pd
import os

# 📁 Folder sumber & file output
folder_path = "D:/semhas/Data/hasil_Superpred"
output_file = "D:/semhas/Data/Superpred_Dipilih.xlsx"

# Kolom penting
selected_columns = ["Target Name", "UniProt ID", "Min Activity", "Assay type", 
                    "Probability", "Model accuracy"]

dataframes = []
excel_files = [f for f in os.listdir(folder_path) if f.endswith(".xlsx")]

for file in excel_files:
    file_path = os.path.join(folder_path, file)
    print(f"📂 Membaca file: {file_path}")

    try:
        xls = pd.ExcelFile(file_path, engine="openpyxl")
        for sheet_name in xls.sheet_names:
            print(f"   🔹 Sheet: {sheet_name}")
            df = pd.read_excel(xls, sheet_name=sheet_name, header=1, engine="openpyxl")
            df.columns = df.columns.str.strip()

            available_columns = [col for col in selected_columns if col in df.columns]
            if not available_columns:
                continue

            df_filtered = df[available_columns].copy()
            df_filtered["Source_File"] = file
            df_filtered["Source_Sheet"] = sheet_name
            dataframes.append(df_filtered)

    except Exception as e:
        print(f"❌ Gagal membaca {file_path}: {e}")

# Gabungkan dan proses data
if dataframes:
    merged_df = pd.concat(dataframes, ignore_index=True)
    merged_df = merged_df.dropna(subset=["UniProt ID"])

    # Data terpilih berdasarkan dua kondisi:
    # 1. Probability >= 0.5 (jika kolom tersedia)
    # 2. ATAU Assay type == "Potency" dan Min Activity tidak kosong
    cond_probability = (merged_df.get("Probability") >= 0.5) if "Probability" in merged_df.columns else False
    cond_activity = (merged_df["Assay type"].fillna("") == "Potency") & (merged_df["Min Activity"].notna())

    df_dipilih = merged_df[(cond_probability | cond_activity)].copy()

    # Hilangkan duplikat berdasarkan UniProt ID
    df_dipilih = df_dipilih.drop_duplicates(subset=["UniProt ID"], keep="first").reset_index(drop=True)

    # Ringkasan
    unique_proteins = merged_df["UniProt ID"].nunique()
    summary_df = pd.DataFrame({
        "Keterangan": ["Jumlah Total Entri", "Jumlah Protein Target Unik", "Jumlah Terpilih"],
        "Jumlah": [len(merged_df), unique_proteins, len(df_dipilih)]
    })

    # Simpan ke Excel
    with pd.ExcelWriter(output_file, engine="openpyxl", mode="w") as writer:
        merged_df.to_excel(writer, index=False, sheet_name="Data Gabungan")
        df_dipilih.to_excel(writer, index=False, sheet_name="Dipilih")
        summary_df.to_excel(writer, index=False, sheet_name="Ringkasan")

    print(f"✅ Selesai. Data disimpan di: {output_file}")
else:
    print("❌ Tidak ada file Excel valid ditemukan.")


📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets1 - 2025-07-19T054204.825.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets1 - 2025-07-19T054214.688.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets10 - 2025-07-19T055450.251.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets11 - 2025-07-19T055542.195.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets13 - 2025-07-19T055930.578.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets2 - 2025-07-19T054402.358.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets3 - 2025-07-19T054634.502.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets4 - 2025-07-19T054832.108.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets5 - 2025-07-19T054941.371.xlsx
   🔹 Sheet: Sheet1
📂 Membaca file: D:/semhas/Data/hasil_Superpred\Targets5 - 202

In [12]:
print("Kolom yang tersedia:", df.columns.tolist())


Kolom yang tersedia: ['MIM Number', 'Title', 'Included Titles', 'Cytogenetic Location', 'Genomic Coordinates (From NCBI/GRCh38)', 'Entrez Gene ID', 'UniProt', 'Gene_Name', 'Status Verifikasi']


In [None]:
# Cek Validasi UniProt ID dengan Taxonomy ID: 9606(Homo sapiens) dari superpred

In [14]:
import pandas as pd

# File Excel dengan UniProt ID unik
target_file = "D:/semhas/Data/target.xlsx"
sheet_name = "UniProt_ID_merged_unique"

# File mapping UniProt -> Gene Name
mapping_file = "D:/semhas/Data/HUMAN_9606_idmapping.dat"

# --- Baca sheet UniProt ID unik ---
df_uniprot = pd.read_excel(target_file, sheet_name=sheet_name, engine='openpyxl')

# --- Baca file mapping UniProt ---
# File .dat biasanya tab-delimited: UniProtKB-AC  ID-type  ID
df_mapping = pd.read_csv(mapping_file, sep='\t', header=None, names=['UniProt ID', 'ID Type', 'ID'])

# --- Filter mapping hanya untuk Gene Name ---
df_gene = df_mapping[df_mapping['ID Type'] == 'Gene_Name'][['UniProt ID', 'ID']]
df_gene = df_gene.rename(columns={'ID': 'Gene Name'})

# --- Merge dengan UniProt ID unik ---
df_merged = pd.merge(df_uniprot, df_gene, on='Uniprot ID', how='left')

# --- Tambahkan kolom Status Verifikasi ---
df_merged['Status Verifikasi'] = '✅ Valid (Homo sapiens)'

# --- Preview ---
print("✅ Preview hasil dengan Gene Name:")
print(df_merged.head(20))

# --- Simpan ke sheet baru ---
with pd.ExcelWriter(target_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df_merged.to_excel(writer, sheet_name='UniProt_with_GeneName', index=False)

print("\n✅ Hasil berhasil disimpan di sheet 'UniProt_with_GeneName'")


KeyError: 'UniProt ID'

In [9]:
import pandas as pd

# === 1. File sumber ===
superpred_file = "D:/semhas/Data/SuperPred_Dipilih.xlsx"
mapping_file = "D:/semhas/Data/HUMAN_9606_idmapping.dat"

# === 2. Baca data SuperPred ===
df_superpred = pd.read_excel(superpred_file, sheet_name="Dipilih")
df_superpred = df_superpred.rename(columns=lambda x: x.strip())  # hapus spasi tersembunyi

# === 3. Baca data mapping
mapping_df = pd.read_csv(mapping_file, sep='\t', header=None, names=["UniProt", "DB", "ID"])

# === 4. Ambil semua UniProt valid
valid_uniprot = mapping_df["UniProt"].unique()

# === 5. Tambahkan kolom status verifikasi
df_superpred['Status Verifikasi'] = df_superpred['UniProt ID'].isin(valid_uniprot).map(
    lambda x: '✅ Valid (Homo sapiens)' if x else '❌ Tidak valid / Non-Human'
)

# === 6. Tambahkan kolom Gene Name
gene_map = mapping_df[mapping_df["DB"] == "Gene_Name"].copy()
gene_map = gene_map.rename(columns={"UniProt": "UniProt ID", "ID": "Gene Name"}).drop(columns=["DB"])

# Gabungkan dengan DataFrame SuperPred berdasarkan UniProt ID
df_superpred = pd.merge(df_superpred, gene_map, on="UniProt ID", how="left")

# === 7. Simpan ke sheet baru
with pd.ExcelWriter(superpred_file, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    df_superpred.to_excel(writer, sheet_name='Verifikasi-UniProt', index=False)

# === 8. Rekap jumlah hasil verifikasi
rekap = df_superpred['Status Verifikasi'].value_counts()
print("\n📊 Rekapitulasi Status Verifikasi:")
print(rekap)

# === 9. Contoh hasil
print("\n📌 Contoh hasil dengan Gene Name:")
print(df_superpred[['UniProt ID', 'Gene Name', 'Status Verifikasi']].head())



📊 Rekapitulasi Status Verifikasi:
Status Verifikasi
✅ Valid (Homo sapiens)    269
Name: count, dtype: int64

📌 Contoh hasil dengan Gene Name:
  UniProt ID Gene Name       Status Verifikasi
0     P16473      TSHR  ✅ Valid (Homo sapiens)
1     Q9Y5S8      NOX1  ✅ Valid (Homo sapiens)
2     P51151     RAB9A  ✅ Valid (Homo sapiens)
3     Q13887      KLF5  ✅ Valid (Homo sapiens)
4     O15118      NPC1  ✅ Valid (Homo sapiens)


In [None]:
# Gabungkan OMIM dan Superpred

In [11]:
# === Sumber: Proses penggabungan data protein target dari OMIM dan SuperPred ===
# Tujuan: Membuat satu tabel gabungan yang berisi ID asal, UniProt ID, status verifikasi, nama gen, dan sumber data

import pandas as pd

# === 1. File sumber ===
omim_file = "D:/semhas/Data/OMIM-Entry.xlsx"                # Sumber data OMIM (Filtered)
superpred_file = "D:/semhas/Data/SuperPred_Dipilih.xlsx"    # Sumber data hasil verifikasi SuperPred

# === 2. Baca sheet 'Filtered' dari OMIM ===
df_omim = pd.read_excel(omim_file, sheet_name="Filtered")
df_omim = df_omim.rename(columns=lambda x: x.strip())  # Hilangkan spasi tersembunyi pada nama kolom

# Ambil kolom yang dibutuhkan & ganti nama kolom agar konsisten
df_omim = df_omim[['Entrez Gene ID', 'UniProt', 'Gene_Name', 'Status Verifikasi']].copy()
df_omim = df_omim.rename(columns={
    'Entrez Gene ID': 'ID Asal',
    'UniProt': 'UniProt',
    'Gene_Name': 'Gene Name'
})
df_omim['ID Asal'] = df_omim['ID Asal'].astype(str)  # Pastikan ID Asal berupa string
df_omim['Sumber'] = 'OMIM'

# === 3. Baca sheet 'Verifikasi-UniProt' dari SuperPred ===
df_superpred = pd.read_excel(superpred_file, sheet_name="Verifikasi-UniProt")
df_superpred = df_superpred.rename(columns=lambda x: x.strip())  # Hilangkan spasi tersembunyi

# Ambil kolom yang dibutuhkan & samakan nama kolom
df_superpred = df_superpred[['UniProt ID', 'Status Verifikasi', 'Gene Name']].copy()
df_superpred = df_superpred.rename(columns={'UniProt ID': 'UniProt'})
df_superpred['ID Asal'] = df_superpred['UniProt']
df_superpred['Sumber'] = 'SuperPred'

# Susun ulang kolom agar sama
df_superpred = df_superpred[['ID Asal', 'UniProt', 'Status Verifikasi', 'Gene Name', 'Sumber']]
df_omim = df_omim[['ID Asal', 'UniProt', 'Status Verifikasi', 'Gene Name', 'Sumber']]

# === 4. Gabungkan data dari OMIM dan SuperPred ===
df_all = pd.concat([df_omim, df_superpred], ignore_index=True)

# Tambahkan kolom nomor urut di paling kiri
df_all.insert(0, 'No.', range(1, len(df_all) + 1))

# === 5. Tampilkan hasil ===
print(df_all.head(20))  # Tampilkan 20 baris pertama
print(f"\n✅ Total gabungan entri: {len(df_all)}")

# (Opsional) Simpan ke file Excel jika diinginkan:
df_all.to_excel("D:/semhas/Data/Target_Protein.xlsx", index=False)


    No. ID Asal     UniProt       Status Verifikasi Gene Name Sumber
0     1    7350      P25874  ✅ Valid (Homo sapiens)      UCP1   OMIM
1     2    7350      Q4KMT7  ✅ Valid (Homo sapiens)      UCP1   OMIM
2     3   26027      Q8WXI4  ✅ Valid (Homo sapiens)    ACOT11   OMIM
3     4   54976      Q9GZN8  ✅ Valid (Homo sapiens)    ADISSP   OMIM
4     5  388581      Q5T7M4  ✅ Valid (Homo sapiens)  C1QTNF12   OMIM
5     6   56246      Q8TCY5  ✅ Valid (Homo sapiens)      MRAP   OMIM
6     7   51129      Q9BY76  ✅ Valid (Homo sapiens)   ANGPTL4   OMIM
7     8   10891      Q9UBK2  ✅ Valid (Homo sapiens)  PPARGC1A   OMIM
8     9    8841      O15379  ✅ Valid (Homo sapiens)     HDAC3   OMIM
9    10    5743      P35354  ✅ Valid (Homo sapiens)     PTGS2   OMIM
10   11     657      P36894  ✅ Valid (Homo sapiens)    BMPR1A   OMIM
11   12    9572      P20393  ✅ Valid (Homo sapiens)     NR1D1   OMIM
12   13    9572      F1D8S3  ✅ Valid (Homo sapiens)     NR1D1   OMIM
13   14  133522      Q86YN6  ✅ Val

In [12]:
import pandas as pd

# === 1. Baca file Excel ===
input_file = "D:/semhas/Data/Target_Protein.xlsx"

# Baca sheet pertama (diasumsikan gabungan data)
xls = pd.ExcelFile(input_file)
df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
df.columns = df.columns.str.strip()

# Tambahkan kolom sumber berdasarkan nilai ID Asal (OMIM: angka, SuperPred: string UniProt)
df['Sumber'] = df['ID Asal'].apply(lambda x: 'OMIM' if str(x).isdigit() else 'SuperPred')

# === 2. Filter yang valid (case-insensitive) ===
df['Status Verifikasi'] = df['Status Verifikasi'].str.strip()
df_valid = df[df['Status Verifikasi'].str.lower() == '✅ Valid (Homo sapiens)'].copy()

# === 3. Simpan ke sheet baru 'Dipilih' ===
with pd.ExcelWriter(input_file, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    df_valid.to_excel(writer, sheet_name='Dipilih', index=False)

# === 4. Hitung jumlah valid/tidak valid per sumber ===
rekap = df.groupby(['Sumber', 'Status Verifikasi']).size().unstack(fill_value=0)

# === 6. Tampilkan hasil ===
print("📊 Rekapitulasi Status Verifikasi per Sumber:")
print(rekap)


# Hitung total valid
valid_df = df_all[df_all['Status Verifikasi'].str.contains("Valid", na=False)]

# Total valid semua sumber
total_valid = len(valid_df)

# Cetak hasil
print(f"\n🔢 Total Data Valid dari Semua Sumber: {total_valid}")



📊 Rekapitulasi Status Verifikasi per Sumber:
Status Verifikasi  ✅ Valid (Homo sapiens)  ❌ Tidak ditemukan
Sumber                                                      
OMIM                                  301                  4
SuperPred                             269                  0

🔢 Total Data Valid dari Semua Sumber: 570


In [13]:
import pandas as pd

# === 1. Baca file Excel ===
input_file = "D:/semhas/Data/Target_Protein.xlsx"

# Baca sheet pertama (diasumsikan gabungan data)
xls = pd.ExcelFile(input_file)
df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
df.columns = df.columns.str.strip()

# === 2. Tambahkan kolom 'Sumber' berdasarkan ID Asal (OMIM: angka, SuperPred: UniProt)
df['Sumber'] = df['ID Asal'].apply(lambda x: 'OMIM' if str(x).isdigit() else 'SuperPred')

# === 3. Bersihkan kolom Status Verifikasi
df['Status Verifikasi'] = df['Status Verifikasi'].str.strip()

# === 4. Filter data valid
df_valid = df[df['Status Verifikasi'].str.lower().str.contains("valid")].copy()

# === 5. Simpan hasil valid ke sheet baru "Dipilih"
with pd.ExcelWriter(input_file, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    df_valid.to_excel(writer, sheet_name='Dipilih', index=False)
print("✅ Data valid berhasil disimpan ke sheet 'Dipilih'.")

# === 6. Rekap status verifikasi berdasarkan sumber
rekap = df.groupby(['Sumber', 'Status Verifikasi']).size().unstack(fill_value=0)

# === 7. Hitung total valid dari semua sumber
total_valid = len(df_valid)

# === 8. Tampilkan hasil
print("\n📊 Rekapitulasi Status Verifikasi per Sumber:")
print(rekap)
print(f"\n🔢 Total Data Valid dari Semua Sumber: {total_valid}")


✅ Data valid berhasil disimpan ke sheet 'Dipilih'.

📊 Rekapitulasi Status Verifikasi per Sumber:
Status Verifikasi  ✅ Valid (Homo sapiens)  ❌ Tidak ditemukan
Sumber                                                      
OMIM                                  301                  4
SuperPred                             269                  0

🔢 Total Data Valid dari Semua Sumber: 570


In [None]:
# Hapus duplikat Gabungan

In [14]:
import pandas as pd

# Baca file dan sheet
file_path = 'D:/semhas/Data/Target_Protein.xlsx'
sheet_asal = 'Dipilih'
sheet_tujuan = 'Tanpa_Duplikat'

# Baca data
df = pd.read_excel(file_path, sheet_name=sheet_asal)

# Tampilkan kolom yang tersedia
print("Kolom tersedia:", df.columns)

# Nama kolom UniProt
kolom_uniprot = 'UniProt'

# Cek apakah kolom tersedia
if kolom_uniprot in df.columns:
    # Cek dan tampilkan duplikat
    duplikat = df[df.duplicated(subset=[kolom_uniprot], keep=False)]
    print(f"\nJumlah duplikat ditemukan: {duplikat.shape[0]}")

    # Hapus duplikat dan simpan hasil
    df_tanpa_duplikat = df.drop_duplicates(subset=[kolom_uniprot], keep='first')

    # Simpan ke sheet baru di file yang sama
    with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        df_tanpa_duplikat.to_excel(writer, sheet_name=sheet_tujuan, index=False)

    print(f"Hasil tanpa duplikat disimpan ke sheet '{sheet_tujuan}'.")
else:
    print(f"Kolom '{kolom_uniprot}' tidak ditemukan.")


Kolom tersedia: Index(['No.', 'ID Asal', 'UniProt', 'Status Verifikasi', 'Gene Name',
       'Sumber'],
      dtype='object')

Jumlah duplikat ditemukan: 18
Hasil tanpa duplikat disimpan ke sheet 'Tanpa_Duplikat'.


In [16]:
import pandas as pd

# Baca file dan sheet
file_path = 'D:/semhas/Data/Target_Protein.xlsx'
sheet_asal = 'Dipilih'
sheet_tujuan = 'Tanpa_Duplikat'

# Baca data
df = pd.read_excel(file_path, sheet_name=sheet_asal)

# Tampilkan kolom yang tersedia
print("Kolom tersedia:", df.columns)

# Nama kolom UniProt
kolom_uniprot = 'UniProt'

# Cek apakah kolom tersedia
if kolom_uniprot in df.columns:
    print(f"\nJumlah total entri awal: {len(df)}")
    print(f"Contoh data awal:\n{df[[kolom_uniprot]].head(10)}")

    # Cek dan tampilkan duplikat
    duplikat = df[df.duplicated(subset=[kolom_uniprot], keep=False)]
    print(f"\nJumlah duplikat ditemukan: {len(duplikat)}")
    if not duplikat.empty:
        print(f"Contoh duplikat:\n{duplikat[[kolom_uniprot]].drop_duplicates().head(10)}")

    # Hapus duplikat dan simpan hasil
    df_tanpa_duplikat = df.drop_duplicates(subset=[kolom_uniprot], keep='first')
    print(f"\nJumlah entri setelah penghapusan duplikat: {len(df_tanpa_duplikat)}")

    # Simpan ke sheet baru di file yang sama
    with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
        df_tanpa_duplikat.to_excel(writer, sheet_name=sheet_tujuan, index=False)

    print(f"Hasil tanpa duplikat disimpan ke sheet '{sheet_tujuan}'.")
else:
    print(f"Kolom '{kolom_uniprot}' tidak ditemukan.")


Kolom tersedia: Index(['No.', 'ID Asal', 'UniProt', 'Status Verifikasi', 'Gene Name',
       'Sumber'],
      dtype='object')

Jumlah total entri awal: 570
Contoh data awal:
  UniProt
0  P25874
1  Q4KMT7
2  Q8WXI4
3  Q9GZN8
4  Q5T7M4
5  Q8TCY5
6  Q9BY76
7  Q9UBK2
8  O15379
9  P35354

Jumlah duplikat ditemukan: 18
Contoh duplikat:
    UniProt
8    O15379
9    P35354
48   P08235
83   P10275
172  Q8TDU6
240  P10646
247  P05186
248  P00750
279  Q13370

Jumlah entri setelah penghapusan duplikat: 561
Hasil tanpa duplikat disimpan ke sheet 'Tanpa_Duplikat'.


# Data PPI

In [111]:
import pandas as pd
import requests
import time

# Path ke file Excel yang berisi UniProt ID
file_path = "D:/semhas/projek/Target_Protein.xlsx"

# Baca kolom UniProt_ID
df = pd.read_excel(file_path, sheet_name='Dipilih')
uniprot_ids = df["UniProt"].dropna().unique().tolist()

# Fungsi untuk memproses batch dan ambil data PPI dari STRING
def fetch_ppi_batch(batch_ids, batch_number, total_batches):
    print(f"🚀 Memproses batch {batch_number} dari {total_batches} ({len(batch_ids)} ID)...")

    # Format string untuk query
    protein_query = "%0d%0a".join(batch_ids)

    # URL API STRING, dengan parameter 'species=9606' untuk Homo sapiens dan 'required_score=400' untuk skor minimal 0,4
    url = f"https://string-db.org/api/tsv/network?identifiers={protein_query}&species=9606&required_score=400"

    try:
        response = requests.get(url)
        if response.status_code == 200:
            batch_file = f"ppi_batch_{batch_number}.tsv"
            with open(batch_file, "w", encoding="utf-8") as f:
                f.write(response.text)
            print(f"✅ Batch {batch_number} selesai! Data disimpan di {batch_file}")
            return pd.read_csv(batch_file, sep="\t")
        else:
            print(f"❌ Error Batch {batch_number}: {response.status_code} - {response.text}")
            return pd.DataFrame()
    except Exception as e:
        print(f"❌ Gagal mengunduh data pada batch {batch_number}: {e}")
        return pd.DataFrame()

# 🔄 Looping batch
batch_size = 100
all_results = []
total_batches = (len(uniprot_ids) + batch_size - 1) // batch_size

for i in range(0, len(uniprot_ids), batch_size):
    batch_ids = uniprot_ids[i:i+batch_size]
    batch_number = (i // batch_size) + 1
    df_batch = fetch_ppi_batch(batch_ids, batch_number, total_batches)
    if not df_batch.empty:
        all_results.append(df_batch)
    time.sleep(1.5)  # Hindari overload server STRING

# 🔗 Gabungkan semua hasil
if all_results:
    combined_df = pd.concat(all_results, ignore_index=True)
    output_file = "D:/semhas/projek/PPI.tsv"
    combined_df.to_csv(output_file, sep="\t", index=False)
    print(f"\n🎉 Semua batch selesai! Data PPI gabungan disimpan di:\n{output_file}")
else:
    print("⚠️ Tidak ada data PPI yang berhasil diambil.")


🚀 Memproses batch 1 dari 6 (100 ID)...
✅ Batch 1 selesai! Data disimpan di ppi_batch_1.tsv
🚀 Memproses batch 2 dari 6 (100 ID)...
✅ Batch 2 selesai! Data disimpan di ppi_batch_2.tsv
🚀 Memproses batch 3 dari 6 (100 ID)...
✅ Batch 3 selesai! Data disimpan di ppi_batch_3.tsv
🚀 Memproses batch 4 dari 6 (100 ID)...
✅ Batch 4 selesai! Data disimpan di ppi_batch_4.tsv
🚀 Memproses batch 5 dari 6 (100 ID)...
✅ Batch 5 selesai! Data disimpan di ppi_batch_5.tsv
🚀 Memproses batch 6 dari 6 (61 ID)...
✅ Batch 6 selesai! Data disimpan di ppi_batch_6.tsv

🎉 Semua batch selesai! Data PPI gabungan disimpan di:
D:/semhas/projek/PPI.tsv


In [113]:
import pandas as pd
import networkx as nx

# File PPI dan mapping
ppi_file = 'D:/semhas/projek/PPI.tsv'
mapping_file = 'D:/semhas/projek/9606.protein.aliases.v12.0.txt'

# Baca data PPI
df = pd.read_csv(ppi_file, sep='\t')

# Baca mapping STRING ID ke UniProt
mapping_df = pd.read_csv(mapping_file, sep='\t', header=None, names=['string_id', 'alias', 'source'])
uniprot_map = mapping_df[mapping_df['source'].str.contains('UniProt', case=False, na=False)]
string_to_uniprot = dict(zip(uniprot_map['string_id'], uniprot_map['alias']))

# Mapping UniProt ID berdasarkan stringId_A dan stringId_B
df['UniProt_ID_1'] = df['stringId_A'].map(string_to_uniprot)
df['UniProt_ID_2'] = df['stringId_B'].map(string_to_uniprot)

# Hapus baris yang tidak berhasil dimapping
df_clean = df.dropna(subset=['UniProt_ID_1', 'UniProt_ID_2'])

# Buat graph dari pasangan UniProt ID
G = nx.from_pandas_edgelist(df_clean, 'UniProt_ID_1', 'UniProt_ID_2')

# Hitung node dan edge
jumlah_node = G.number_of_nodes()
jumlah_interaksi = G.number_of_edges()

print(f"Jumlah protein (node) yang berinteraksi: {jumlah_node}")
print(f"Jumlah interaksi antar protein (edge): {jumlah_interaksi}")
print(df_clean[['stringId_A', 'stringId_B', 'UniProt_ID_1', 'UniProt_ID_2']].head())


Jumlah protein (node) yang berinteraksi: 332
Jumlah interaksi antar protein (edge): 933
             stringId_A            stringId_B UniProt_ID_1 UniProt_ID_2
0  9606.ENSP00000216180  9606.ENSP00000471477   uc003bei.1   uc002pkn.2
1  9606.ENSP00000216180  9606.ENSP00000287820   uc003bei.1   uc003bwr.4
2  9606.ENSP00000216180  9606.ENSP00000312652   uc003bei.1   uc003vml.3
3  9606.ENSP00000216180  9606.ENSP00000348069   uc003bei.1   uc002grt.3
4  9606.ENSP00000216180  9606.ENSP00000244289   uc003bei.1   uc002otr.4


In [18]:
import pandas as pd

# 📂 File PPI dan mapping
ppi_file = 'D:/semhas/projek/hasil/PPI_Combined_Final.tsv'
mapping_file = 'D:/semhas/projek/hasil/9606.protein.aliases.v12.0.txt'

# 📥 Baca file PPI
df_ppi = pd.read_csv(ppi_file, sep='\t')

# Rename kolom 'score' menjadi 'combined_score' jika perlu
df_ppi = df_ppi.rename(columns={'score': 'combined_score'})

# Filter hanya interaksi dengan combined_score ≥ 0.4
df_filtered = df_ppi[df_ppi['combined_score'] >= 0.4].copy()

# 📥 Baca mapping STRING ID → UniProt ID
df_map = pd.read_csv(mapping_file, sep='\t', header=None, names=['string_id', 'alias', 'source'])
df_map_uniprot = df_map[df_map['source'].str.contains('UniProt', case=False, na=False)]

# Buat dictionary untuk mapping
string_to_uniprot = dict(zip(df_map_uniprot['string_id'], df_map_uniprot['alias']))

# 🧭 Mapping ke UniProt ID
df_filtered['UniProt_ID_1'] = df_filtered['stringId_A'].map(string_to_uniprot)
df_filtered['UniProt_ID_2'] = df_filtered['stringId_B'].map(string_to_uniprot)

# Hapus baris yang tidak berhasil dimapping
df_final = df_filtered.dropna(subset=['UniProt_ID_1', 'UniProt_ID_2'])

# 🎯 Tabel akhir yang ingin ditampilkan
result = df_final[['stringId_A', 'stringId_B', 'UniProt_ID_1', 'UniProt_ID_2', 'combined_score']]
print(result.head(10))

# 📊 Statistik
jumlah_interaksi = len(result)
protein_unik = pd.unique(result[['UniProt_ID_1', 'UniProt_ID_2']].values.ravel())
jumlah_protein_unik = len(protein_unik)

print("\n📊 Ringkasan Interaksi PPI:")
print(f"🔹 Jumlah total interaksi (edge)         : {jumlah_interaksi}")
print(f"🔸 Jumlah protein unik yang berinteraksi : {jumlah_protein_unik}")

# 📝 Parameter ekstraksi dari STRING
print("\n⚙️ Parameter ekstraksi STRING:")
print("- Organisme: Homo sapiens (TaxID: 9606)")
print("- Jenis interaksi: full STRING network (eksperimental, prediktif, dan teks)")
print("- Skor kepercayaan minimum (combined_score): 0.4 (medium confidence)")

# 💾 Simpan hasil
result.to_csv('D:/semhas/projek/hasil/PPI_Table_Filtered_0.4.tsv', sep='\t', index=False)
print("\n✅ Tabel PPI dengan mapping UniProt disimpan.")


             stringId_A            stringId_B UniProt_ID_1 UniProt_ID_2  \
0  9606.ENSP00000215832  9606.ENSP00000381607   uc010gtk.2       V9HWE9   
1  9606.ENSP00000215832  9606.ENSP00000411532   uc010gtk.2   uc002huq.4   
2  9606.ENSP00000215832  9606.ENSP00000380252   uc010gtk.2   uc002ulg.6   
3  9606.ENSP00000215832  9606.ENSP00000351908   uc010gtk.2   uc003qhc.4   
4  9606.ENSP00000215832  9606.ENSP00000216714   uc010gtk.2   uc058yte.1   
5  9606.ENSP00000215832  9606.ENSP00000321656   uc010gtk.2   uc003lcp.3   
6  9606.ENSP00000215832  9606.ENSP00000501150   uc010gtk.2   uc011bmq.4   
7  9606.ENSP00000215832  9606.ENSP00000355966   uc010gtk.2   uc001hir.3   
8  9606.ENSP00000215832  9606.ENSP00000428056   uc010gtk.2   uc003jva.4   
9  9606.ENSP00000215832  9606.ENSP00000354558   uc010gtk.2   uc001asd.4   

   combined_score  
0           0.411  
1           0.421  
2           0.430  
3           0.495  
4           0.513  
5           0.563  
6           0.618  
7           0.

# Data ddi

In [None]:
#UniProt --> Domain PFam

In [14]:
import requests
import pandas as pd
import time
from tqdm import tqdm

# === 1. BACA UNIPROT ID DARI FILE ===
file_path = 'D:/semhas/projek/Target_Protein.xlsx'
df = pd.read_excel(file_path, sheet_name='Tanpa_Duplikat')

all_uniprot_ids = df['UniProt'].dropna().unique().tolist()

# === 2. FUNGSI AMBIL DOMAIN DARI INTERPRO (Pfam) ===
def get_pfam_domains(uniprot_id):
    url = f"https://www.ebi.ac.uk/interpro/api/entry/pfam/protein/uniprot/{uniprot_id}"
    headers = {"Accept": "application/json"}
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            data = response.json()
            domains = [item['metadata']['accession'] for item in data['results']]
            return domains
        else:
            return []
    except Exception as e:
        print(f"❌ Error untuk {uniprot_id}: {e}")
        return []

# === 3. LOOPING MAPPING DENGAN OUTPUT DETAIL ===
results = []

print(f"🔍 Memulai pemetaan {len(all_uniprot_ids)} UniProt ID ke domain Pfam...\n")
for idx, uniprot_id in enumerate(tqdm(all_uniprot_ids, desc="Memproses"), 1):
    domains = get_pfam_domains(uniprot_id)
    
    if domains:
        for domain in domains:
            results.append({'UniProt_ID': uniprot_id, 'Pfam_ID': domain})
        domain_str = ', '.join(domains)
        print(f"[{idx}] {uniprot_id} → {len(domains)} domain: {domain_str}")
    else:
        print(f"[{idx}] {uniprot_id} → Tidak ditemukan domain Pfam.")
    
    time.sleep(1)  # Hindari rate limit

# === 4. SIMPAN HASIL KE FILE EXCEL ===
df_result = pd.DataFrame(results)
output_file = 'D:/semhas/projek/TargetProtein_PFam.xlsx'
df_result.to_excel(output_file, index=False)

print(f"\n✅ Pemetaan selesai! Hasil disimpan di: {output_file}")

# === 5. SIMPAN SEMUA UNIPROT ID KE FILE EXCEL ===
df_all_ids = pd.DataFrame({'UniProt_ID': all_uniprot_ids})
all_uniprot_file = 'D:/semhas/projek/All_UniProt_IDs.xlsx'
df_all_ids.to_excel(all_uniprot_file, index=False)

print(f"✅ Daftar semua UniProt ID disimpan di: {all_uniprot_file}")


🔍 Memulai pemetaan 561 UniProt ID ke domain Pfam...



Memproses:   0%|                                                                               | 0/561 [00:00<?, ?it/s]

[1] P25874 → 1 domain: PF00153


Memproses:   0%|▏                                                                      | 1/561 [00:02<27:53,  2.99s/it]

[2] Q4KMT7 → 1 domain: PF00153


Memproses:   0%|▎                                                                      | 2/561 [00:05<22:48,  2.45s/it]

[3] Q8WXI4 → 2 domain: PF01852, PF03061


Memproses:   1%|▍                                                                      | 3/561 [00:07<21:09,  2.28s/it]

[4] Q9GZN8 → 1 domain: PF15006


Memproses:   1%|▌                                                                      | 4/561 [00:09<20:28,  2.21s/it]

[5] Q5T7M4 → Tidak ditemukan domain Pfam.


Memproses:   1%|▋                                                                      | 5/561 [00:11<21:07,  2.28s/it]

[6] Q8TCY5 → 1 domain: PF15183


Memproses:   1%|▊                                                                      | 6/561 [00:13<20:38,  2.23s/it]

[7] Q9BY76 → 1 domain: PF00147


Memproses:   1%|▉                                                                      | 7/561 [00:15<20:16,  2.20s/it]

[8] Q9UBK2 → 1 domain: PF00076


Memproses:   1%|█                                                                      | 8/561 [00:17<19:56,  2.16s/it]

[9] O15379 → 1 domain: PF00850


Memproses:   2%|█▏                                                                     | 9/561 [00:20<19:40,  2.14s/it]

[10] P35354 → 1 domain: PF03098


Memproses:   2%|█▏                                                                    | 10/561 [00:22<19:33,  2.13s/it]

[11] P36894 → 3 domain: PF00069, PF01064, PF08515


Memproses:   2%|█▎                                                                    | 11/561 [00:24<19:27,  2.12s/it]

[12] P20393 → 2 domain: PF00104, PF00105


Memproses:   2%|█▍                                                                    | 12/561 [00:26<19:20,  2.11s/it]

[13] F1D8S3 → 2 domain: PF00104, PF00105


Memproses:   2%|█▌                                                                    | 13/561 [00:28<19:25,  2.13s/it]

[14] Q86YN6 → 1 domain: PF00076


Memproses:   2%|█▋                                                                    | 14/561 [00:30<19:20,  2.12s/it]

[15] B7ZM40 → 1 domain: PF00076


Memproses:   3%|█▊                                                                    | 15/561 [00:32<19:10,  2.11s/it]

[16] Q9NSA1 → 1 domain: PF00167


Memproses:   3%|█▉                                                                    | 16/561 [00:34<19:00,  2.09s/it]

[17] A0A7U3L5M7 → 1 domain: PF00167


Memproses:   3%|██                                                                    | 17/561 [00:36<18:53,  2.08s/it]

[18] Q92813 → 1 domain: PF00837


Memproses:   3%|██▏                                                                   | 18/561 [00:38<18:53,  2.09s/it]

[19] A8K845 → 1 domain: PF00837


Memproses:   3%|██▎                                                                   | 19/561 [00:41<18:58,  2.10s/it]

[20] O60543 → 1 domain: PF02017


Memproses:   4%|██▍                                                                   | 20/561 [00:43<18:49,  2.09s/it]

[21] Q8N5P9 → 1 domain: PF02017


Memproses:   4%|██▌                                                                   | 21/561 [00:45<18:44,  2.08s/it]

[22] B3KVA2 → Tidak ditemukan domain Pfam.


Memproses:   4%|██▋                                                                   | 22/561 [00:47<18:40,  2.08s/it]

[23] Q8NA29 → 1 domain: PF13347


Memproses:   4%|██▊                                                                   | 23/561 [00:49<18:41,  2.08s/it]

[24] Q71RE4 → 1 domain: PF13347


Memproses:   4%|██▉                                                                   | 24/561 [00:51<18:37,  2.08s/it]

[25] E7EPI8 → 1 domain: PF13347


Memproses:   4%|███                                                                   | 25/561 [00:53<18:36,  2.08s/it]

[26] B4DNN7 → 1 domain: PF13347


Memproses:   5%|███▏                                                                  | 26/561 [00:55<19:33,  2.19s/it]

[27] P51688 → 2 domain: PF00884, PF16347


Memproses:   5%|███▎                                                                  | 27/561 [00:58<19:11,  2.16s/it]

[28] P06213 → 5 domain: PF00041, PF00757, PF01030, PF07714, PF17870


Memproses:   5%|███▍                                                                  | 28/561 [01:00<18:55,  2.13s/it]

[29] P55851 → 1 domain: PF00153


Memproses:   5%|███▌                                                                  | 29/561 [01:02<18:46,  2.12s/it]

[30] P34820 → 2 domain: PF00019, PF00688


Memproses:   5%|███▋                                                                  | 30/561 [01:05<21:06,  2.39s/it]

[31] P55916 → 1 domain: PF00153


Memproses:   6%|███▊                                                                  | 31/561 [01:07<20:14,  2.29s/it]

[32] A0A0S2Z4G5 → 1 domain: PF00153


Memproses:   6%|███▉                                                                  | 32/561 [01:11<25:08,  2.85s/it]

[33] Q9HD89 → 1 domain: PF06954


Memproses:   6%|████                                                                  | 33/561 [01:13<23:02,  2.62s/it]

[34] Q96AD5 → 1 domain: PF01734


Memproses:   6%|████▏                                                                 | 34/561 [01:15<21:32,  2.45s/it]

[35] P35558 → 2 domain: PF00821, PF17297


Memproses:   6%|████▎                                                                 | 35/561 [01:17<20:28,  2.34s/it]

[36] P01137 → 2 domain: PF00019, PF00688


Memproses:   6%|████▍                                                                 | 36/561 [01:19<20:01,  2.29s/it]

[37] A0A499FJK2 → 2 domain: PF00019, PF00688


Memproses:   7%|████▌                                                                 | 37/561 [01:21<19:28,  2.23s/it]

[38] P31040 → 2 domain: PF00890, PF02910


Memproses:   7%|████▋                                                                 | 38/561 [01:23<18:59,  2.18s/it]

[39] D6RFM5 → 1 domain: PF00890


Memproses:   7%|████▊                                                                 | 39/561 [01:26<18:46,  2.16s/it]

[40] P13726 → 2 domain: PF01108, PF09294


Memproses:   7%|████▉                                                                 | 40/561 [01:28<18:28,  2.13s/it]

[41] Q9NST1 → 1 domain: PF01734


Memproses:   7%|█████                                                                 | 41/561 [01:30<18:17,  2.11s/it]

[42] Q9UM22 → 1 domain: PF00811


Memproses:   7%|█████▏                                                                | 42/561 [01:32<18:10,  2.10s/it]

[43] Q96J80 → 1 domain: PF00811


Memproses:   8%|█████▎                                                                | 43/561 [01:34<18:03,  2.09s/it]

[44] P09172 → 3 domain: PF01082, PF03351, PF03712


Memproses:   8%|█████▍                                                                | 44/561 [01:36<18:12,  2.11s/it]

[45] Q13795 → 1 domain: PF00025


Memproses:   8%|█████▌                                                                | 45/561 [01:38<18:07,  2.11s/it]

[46] B7ZKY8 → 1 domain: PF00025


Memproses:   8%|█████▋                                                                | 46/561 [01:40<17:58,  2.09s/it]

[47] A0A384P5U7 → 1 domain: PF00025


Memproses:   8%|█████▊                                                                | 47/561 [01:42<17:58,  2.10s/it]

[48] Q9HB03 → 1 domain: PF01151


Memproses:   9%|█████▉                                                                | 48/561 [01:44<18:00,  2.11s/it]

[49] P08235 → 2 domain: PF00104, PF00105


Memproses:   9%|██████                                                                | 49/561 [01:46<17:52,  2.09s/it]

[50] B0ZBF6 → 2 domain: PF00104, PF00105


Memproses:   9%|██████▏                                                               | 50/561 [01:49<17:46,  2.09s/it]

[51] Q15788 → 7 domain: PF00989, PF07469, PF08815, PF08832, PF14598, PF16665, PF23172


Memproses:   9%|██████▎                                                               | 51/561 [01:51<17:42,  2.08s/it]

[52] Q15596 → 8 domain: PF00989, PF07469, PF08815, PF08832, PF14598, PF16279, PF16665, PF23172


Memproses:   9%|██████▍                                                               | 52/561 [01:53<17:38,  2.08s/it]

[53] B4DPW8 → 2 domain: PF07469, PF08815


Memproses:   9%|██████▌                                                               | 53/561 [01:55<17:34,  2.08s/it]

[54] O95140 → 2 domain: PF00350, PF04799


Memproses:  10%|██████▋                                                               | 54/561 [01:57<17:37,  2.09s/it]

[55] Q8N912 → 1 domain: PF15555


Memproses:  10%|██████▊                                                               | 55/561 [01:59<17:35,  2.09s/it]

[56] J3KNP8 → 1 domain: PF15555


Memproses:  10%|██████▉                                                               | 56/561 [02:01<17:31,  2.08s/it]

[57] B4DN93 → 1 domain: PF15555


Memproses:  10%|███████                                                               | 57/561 [02:03<17:27,  2.08s/it]

[58] Q8N6M3 → 1 domain: PF10261


Memproses:  10%|███████▏                                                              | 58/561 [02:06<19:46,  2.36s/it]

[59] Q9NTG7 → 1 domain: PF02146


Memproses:  11%|███████▎                                                              | 59/561 [02:08<18:58,  2.27s/it]

[60] P41159 → 1 domain: PF02024


Memproses:  11%|███████▍                                                              | 60/561 [02:10<18:35,  2.23s/it]

[61] A4D0Y8 → 1 domain: PF02024


Memproses:  11%|███████▌                                                              | 61/561 [02:12<18:10,  2.18s/it]

[62] P84996 → Tidak ditemukan domain Pfam.


Memproses:  11%|███████▋                                                              | 62/561 [02:15<18:01,  2.17s/it]

[63] Q5JWF2 → 1 domain: PF00503


Memproses:  11%|███████▊                                                              | 63/561 [02:17<17:59,  2.17s/it]

[64] P63092 → 1 domain: PF00503


Memproses:  11%|███████▉                                                              | 64/561 [02:19<17:41,  2.14s/it]

[65] O95467 → 1 domain: PF06390


Memproses:  12%|████████                                                              | 65/561 [02:21<17:41,  2.14s/it]

[66] A0A590UJF0 → 1 domain: PF00503


Memproses:  12%|████████▏                                                             | 66/561 [02:23<17:28,  2.12s/it]

[67] B0AZR9 → 1 domain: PF00503


Memproses:  12%|████████▎                                                             | 67/561 [02:25<17:19,  2.10s/it]

[68] A0A0S2Z3S5 → 1 domain: PF00503


Memproses:  12%|████████▍                                                             | 68/561 [02:27<17:15,  2.10s/it]

[69] Q5JWD1 → 1 domain: PF00503


Memproses:  12%|████████▌                                                             | 69/561 [02:29<17:14,  2.10s/it]

[70] Q14455 → 1 domain: PF00503


Memproses:  12%|████████▋                                                             | 70/561 [02:31<17:11,  2.10s/it]

[71] A0A7I2V5R6 → 1 domain: PF00503


Memproses:  13%|████████▊                                                             | 71/561 [02:33<17:07,  2.10s/it]

[72] Q5FWY2 → 1 domain: PF00503


Memproses:  13%|████████▉                                                             | 72/561 [02:36<17:10,  2.11s/it]

[73] A0A0S2Z3H8 → 1 domain: PF00503


Memproses:  13%|█████████                                                             | 73/561 [02:38<17:00,  2.09s/it]

[74] P37231 → 3 domain: PF00104, PF00105, PF12577


Memproses:  13%|█████████▏                                                            | 74/561 [02:40<17:01,  2.10s/it]

[75] E9PFV2 → 3 domain: PF00104, PF00105, PF12577


Memproses:  13%|█████████▎                                                            | 75/561 [02:42<16:52,  2.08s/it]

[76] E9PFX5 → 2 domain: PF00105, PF12577


Memproses:  14%|█████████▍                                                            | 76/561 [02:44<16:48,  2.08s/it]

[77] A0A494C1F9 → 2 domain: PF00105, PF12577


Memproses:  14%|█████████▌                                                            | 77/561 [02:46<16:43,  2.07s/it]

[78] D2KUA6 → 3 domain: PF00104, PF00105, PF12577


Memproses:  14%|█████████▋                                                            | 78/561 [02:48<16:42,  2.08s/it]

[79] Q8IY47 → 3 domain: PF00651, PF01344, PF07707


Memproses:  14%|█████████▊                                                            | 79/561 [02:50<16:38,  2.07s/it]

[80] A0A024RA38 → 3 domain: PF00651, PF01344, PF07707


Memproses:  14%|█████████▉                                                            | 80/561 [02:52<17:16,  2.15s/it]

[81] Q15672 → 1 domain: PF00010


Memproses:  14%|██████████                                                            | 81/561 [02:54<17:00,  2.13s/it]

[82] Q76M96 → 1 domain: PF13778


Memproses:  15%|██████████▏                                                           | 82/561 [02:57<16:53,  2.12s/it]

[83] Q9HAZ2 → 3 domain: PF00096, PF13912, PF21549


Memproses:  15%|██████████▎                                                           | 83/561 [02:59<16:47,  2.11s/it]

[84] P10275 → 3 domain: PF00104, PF00105, PF02166


Memproses:  15%|██████████▍                                                           | 84/561 [03:01<16:38,  2.09s/it]

[85] F1D8N5 → 2 domain: PF00104, PF00105


Memproses:  15%|██████████▌                                                           | 85/561 [03:03<16:31,  2.08s/it]

[86] A0A087WUX9 → 1 domain: PF02166


Memproses:  15%|██████████▋                                                           | 86/561 [03:05<16:30,  2.09s/it]

[87] G4VV16 → 1 domain: PF02166


Memproses:  16%|██████████▊                                                           | 87/561 [03:07<17:02,  2.16s/it]

[88] Q9NUA2 → 1 domain: PF02166


Memproses:  16%|██████████▉                                                           | 88/561 [03:09<16:47,  2.13s/it]

[89] P05112 → 1 domain: PF00727


Memproses:  16%|███████████                                                           | 89/561 [03:11<16:35,  2.11s/it]

[90] Q5FC01 → 1 domain: PF00727


Memproses:  16%|███████████▏                                                          | 90/561 [03:13<16:27,  2.10s/it]

[91] D4HNR6 → 1 domain: PF00727


Memproses:  16%|███████████▎                                                          | 91/561 [03:15<16:21,  2.09s/it]

[92] Q641Q3 → Tidak ditemukan domain Pfam.


Memproses:  16%|███████████▍                                                          | 92/561 [03:18<16:19,  2.09s/it]

[93] Q15546 → 1 domain: PF03006


Memproses:  17%|███████████▌                                                          | 93/561 [03:20<16:13,  2.08s/it]

[94] Q9H9B1 → 5 domain: PF00856, PF05033, PF12796, PF13637, PF21533


Memproses:  17%|███████████▋                                                          | 94/561 [03:22<16:14,  2.09s/it]

[95] A0A1B0GV09 → 5 domain: PF00856, PF05033, PF12796, PF13637, PF21533


Memproses:  17%|███████████▊                                                          | 95/561 [03:24<16:25,  2.11s/it]

[96] Q05469 → 2 domain: PF06350, PF07859


Memproses:  17%|███████████▉                                                          | 96/561 [03:26<16:29,  2.13s/it]

[97] A8K8W7 → 2 domain: PF06350, PF07859


Memproses:  17%|████████████                                                          | 97/561 [03:29<17:34,  2.27s/it]

[98] P36956 → 1 domain: PF00010


Memproses:  17%|████████████▏                                                         | 98/561 [03:32<19:22,  2.51s/it]

[99] B3KR77 → 1 domain: PF00010


Memproses:  18%|████████████▎                                                         | 99/561 [03:34<18:25,  2.39s/it]

[100] Q9BQT9 → 1 domain: PF19699


Memproses:  18%|████████████▎                                                        | 100/561 [03:36<17:49,  2.32s/it]

[101] Q15848 → 2 domain: PF00386, PF01391


Memproses:  18%|████████████▍                                                        | 101/561 [03:38<17:14,  2.25s/it]

[102] A8K660 → 2 domain: PF00386, PF01391


Memproses:  18%|████████████▌                                                        | 102/561 [03:40<16:51,  2.20s/it]

[103] B2R773 → 2 domain: PF00386, PF01391


Memproses:  18%|████████████▋                                                        | 103/561 [03:42<16:35,  2.17s/it]

[104] P49715 → 1 domain: PF07716


Memproses:  19%|████████████▊                                                        | 104/561 [03:44<16:20,  2.15s/it]

[105] Q9UMX1 → 2 domain: PF05076, PF12470


Memproses:  19%|████████████▉                                                        | 105/561 [03:46<16:13,  2.14s/it]

[106] A0A223LX15 → 2 domain: PF05076, PF12470


Memproses:  19%|█████████████                                                        | 106/561 [03:49<16:02,  2.12s/it]

[107] Q9BXJ8 → 1 domain: PF07851


Memproses:  19%|█████████████▏                                                       | 107/561 [03:51<15:54,  2.10s/it]

[108] A0A087X266 → 1 domain: PF07851


Memproses:  19%|█████████████▎                                                       | 108/561 [03:53<15:47,  2.09s/it]

[109] Q99958 → 1 domain: PF00250


Memproses:  19%|█████████████▍                                                       | 109/561 [03:55<15:46,  2.09s/it]

[110] Q8N4Q0 → 2 domain: PF00107, PF08240


Memproses:  20%|█████████████▌                                                       | 110/561 [03:57<15:43,  2.09s/it]

[111] Q4G1C4 → 2 domain: PF00107, PF08240


Memproses:  20%|█████████████▋                                                       | 111/561 [03:59<15:39,  2.09s/it]

[112] Q86YS7 → 3 domain: PF00168, PF23025, PF23028


Memproses:  20%|█████████████▊                                                       | 112/561 [04:01<15:40,  2.09s/it]

[113] B7ZLK1 → 3 domain: PF00168, PF23025, PF23028


Memproses:  20%|█████████████▉                                                       | 113/561 [04:03<15:41,  2.10s/it]

[114] P23560 → 1 domain: PF00243


Memproses:  20%|██████████████                                                       | 114/561 [04:05<15:49,  2.12s/it]

[115] A0A0E3SU01 → 1 domain: PF00243


Memproses:  20%|██████████████▏                                                      | 115/561 [04:08<17:51,  2.40s/it]

[116] Q9H0K1 → 2 domain: PF00069, PF23312


Memproses:  21%|██████████████▎                                                      | 116/561 [04:11<17:16,  2.33s/it]

[117] Q8TAT2 → 1 domain: PF06473


Memproses:  21%|██████████████▍                                                      | 117/561 [04:13<16:38,  2.25s/it]

[118] Q6UXH0 → Tidak ditemukan domain Pfam.


Memproses:  21%|██████████████▌                                                      | 118/561 [04:15<16:15,  2.20s/it]

[119] Q96S06 → 2 domain: PF06762, PF25179


Memproses:  21%|██████████████▋                                                      | 119/561 [04:17<16:00,  2.17s/it]

[120] O15240 → Tidak ditemukan domain Pfam.


Memproses:  21%|██████████████▊                                                      | 120/561 [04:19<16:40,  2.27s/it]

[121] Q6UXT9 → Tidak ditemukan domain Pfam.


Memproses:  22%|██████████████▉                                                      | 121/561 [04:21<16:17,  2.22s/it]

[122] Q9UBU3 → 2 domain: PF04643, PF04644


Memproses:  22%|███████████████                                                      | 122/561 [04:23<15:57,  2.18s/it]

[123] Q7Z4L0 → 1 domain: PF02285


Memproses:  22%|███████████████▏                                                     | 123/561 [04:26<15:45,  2.16s/it]

[124] Q53EU6 → 1 domain: PF01553


Memproses:  22%|███████████████▎                                                     | 124/561 [04:28<15:33,  2.14s/it]

[125] A0A024RDG5 → 1 domain: PF01553


Memproses:  22%|███████████████▎                                                     | 125/561 [04:30<16:13,  2.23s/it]

[126] Q9UEF7 → 1 domain: PF00232


Memproses:  22%|███████████████▍                                                     | 126/561 [04:32<15:56,  2.20s/it]

[127] Q8TD20 → 1 domain: PF00083


Memproses:  23%|███████████████▌                                                     | 127/561 [04:34<15:36,  2.16s/it]

[128] Q96G97 → 1 domain: PF06775


Memproses:  23%|███████████████▋                                                     | 128/561 [04:36<15:22,  2.13s/it]

[129] A0A024R549 → 1 domain: PF06775


Memproses:  23%|███████████████▊                                                     | 129/561 [04:38<15:15,  2.12s/it]

[130] Q9H3Z7 → 1 domain: PF00561


Memproses:  23%|███████████████▉                                                     | 130/561 [04:41<15:06,  2.10s/it]

[131] P13945 → 1 domain: PF00001


Memproses:  23%|████████████████                                                     | 131/561 [04:43<15:00,  2.09s/it]

[132] A8KAG8 → 1 domain: PF00001


Memproses:  24%|████████████████▏                                                    | 132/561 [04:45<15:19,  2.14s/it]

[133] Q96AQ7 → 1 domain: PF02017


Memproses:  24%|████████████████▎                                                    | 133/561 [04:47<15:08,  2.12s/it]

[134] A0A0A0MRY9 → 1 domain: PF02017


Memproses:  24%|████████████████▍                                                    | 134/561 [04:49<15:02,  2.11s/it]

[135] P23945 → 4 domain: PF00001, PF01462, PF12369, PF13306


Memproses:  24%|████████████████▌                                                    | 135/561 [04:51<14:57,  2.11s/it]

[136] P49765 → 1 domain: PF00341


Memproses:  24%|████████████████▋                                                    | 136/561 [04:53<14:54,  2.10s/it]

[137] Q7LAP4 → 1 domain: PF00341


Memproses:  24%|████████████████▊                                                    | 137/561 [04:55<14:52,  2.10s/it]

[138] P01225 → 1 domain: PF00007


Memproses:  25%|████████████████▉                                                    | 138/561 [04:57<14:45,  2.09s/it]

[139] A0A0F7RQE8 → 1 domain: PF00007


Memproses:  25%|█████████████████                                                    | 139/561 [04:59<14:40,  2.09s/it]

[140] Q8IV16 → 1 domain: PF00087


Memproses:  25%|█████████████████▏                                                   | 140/561 [05:02<14:41,  2.09s/it]

[141] Q86X10 → 1 domain: PF20412


Memproses:  25%|█████████████████▎                                                   | 141/561 [05:04<14:36,  2.09s/it]

[142] Q6MZJ2 → Tidak ditemukan domain Pfam.


Memproses:  25%|█████████████████▍                                                   | 142/561 [05:06<14:32,  2.08s/it]

[143] P01033 → 1 domain: PF00965


Memproses:  25%|█████████████████▌                                                   | 143/561 [05:08<14:29,  2.08s/it]

[144] Q6FGX5 → 1 domain: PF00965


Memproses:  26%|█████████████████▋                                                   | 144/561 [05:12<18:24,  2.65s/it]

[145] Q05516 → 3 domain: PF00096, PF00651, PF13912


Memproses:  26%|█████████████████▊                                                   | 145/561 [05:14<17:06,  2.47s/it]

[146] A0A024R3C6 → 3 domain: PF00096, PF00651, PF13912


Memproses:  26%|█████████████████▉                                                   | 146/561 [05:16<16:17,  2.36s/it]

[147] Q6GTS8 → 2 domain: PF01546, PF07687


Memproses:  26%|██████████████████                                                   | 147/561 [05:18<15:41,  2.28s/it]

[148] Q5SV97 → Tidak ditemukan domain Pfam.


Memproses:  26%|██████████████████▏                                                  | 148/561 [05:20<15:14,  2.21s/it]

[149] Q15306 → 2 domain: PF00605, PF10401


Memproses:  27%|██████████████████▎                                                  | 149/561 [05:22<14:59,  2.18s/it]

[150] Q6ZSB9 → 3 domain: PF00096, PF00651, PF13912


Memproses:  27%|██████████████████▍                                                  | 150/561 [05:25<15:27,  2.26s/it]

[151] Q96EC8 → 1 domain: PF04893


Memproses:  27%|██████████████████▌                                                  | 151/561 [05:27<15:05,  2.21s/it]

[152] P06400 → 4 domain: PF01857, PF01858, PF08934, PF11934


Memproses:  27%|██████████████████▋                                                  | 152/561 [05:29<14:45,  2.17s/it]

[153] O14520 → 1 domain: PF00230


Memproses:  27%|██████████████████▊                                                  | 153/561 [05:31<14:31,  2.14s/it]

[154] Q5T5M0 → 1 domain: PF00230


Memproses:  27%|██████████████████▉                                                  | 154/561 [05:33<14:21,  2.12s/it]

[155] B7Z4U2 → 1 domain: PF00230


Memproses:  28%|███████████████████                                                  | 155/561 [05:35<14:14,  2.11s/it]

[156] Q6P5T0 → 1 domain: PF00230


Memproses:  28%|███████████████████▏                                                 | 156/561 [05:37<14:09,  2.10s/it]

[157] Q96B42 → 1 domain: PF14770


Memproses:  28%|███████████████████▎                                                 | 157/561 [05:39<14:07,  2.10s/it]

[158] Q8IW75 → 1 domain: PF00079


Memproses:  28%|███████████████████▍                                                 | 158/561 [05:41<14:04,  2.10s/it]

[159] Q53EP0 → 1 domain: PF00041


Memproses:  28%|███████████████████▌                                                 | 159/561 [05:43<14:02,  2.10s/it]

[160] Q15270 → 1 domain: PF00046


Memproses:  29%|███████████████████▋                                                 | 160/561 [05:45<13:59,  2.09s/it]

[161] Q8IXB3 → 1 domain: PF04505


Memproses:  29%|███████████████████▊                                                 | 161/561 [05:48<13:57,  2.09s/it]

[162] Q9BVG9 → 1 domain: PF03034


Memproses:  29%|███████████████████▉                                                 | 162/561 [05:50<13:52,  2.09s/it]

[163] P31323 → 2 domain: PF00027, PF02197


Memproses:  29%|████████████████████                                                 | 163/561 [05:52<13:51,  2.09s/it]

[164] B3KY43 → 2 domain: PF00027, PF02197


Memproses:  29%|████████████████████▏                                                | 164/561 [05:54<13:47,  2.09s/it]

[165] P28330 → 3 domain: PF00441, PF02770, PF02771


Memproses:  29%|████████████████████▎                                                | 165/561 [05:56<13:47,  2.09s/it]

[166] P29474 → 4 domain: PF00175, PF00258, PF00667, PF02898


Memproses:  30%|████████████████████▍                                                | 166/561 [05:58<13:47,  2.10s/it]

[167] A0S0A6 → 2 domain: PF00258, PF02898


Memproses:  30%|████████████████████▌                                                | 167/561 [06:00<13:44,  2.09s/it]

[168] Q99541 → 1 domain: PF03036


Memproses:  30%|████████████████████▋                                                | 168/561 [06:03<15:31,  2.37s/it]

[169] Q6FHZ7 → 1 domain: PF03036


Memproses:  30%|████████████████████▊                                                | 169/561 [06:05<14:56,  2.29s/it]

[170] O43320 → 1 domain: PF00167


Memproses:  30%|████████████████████▉                                                | 170/561 [06:07<14:29,  2.22s/it]

[171] A0A7U3L5H2 → 1 domain: PF00167


Memproses:  30%|█████████████████████                                                | 171/561 [06:09<14:11,  2.18s/it]

[172] Q9NTX5 → 1 domain: PF00378


Memproses:  31%|█████████████████████▏                                               | 172/561 [06:12<15:44,  2.43s/it]

[173] Q8TDU6 → 1 domain: PF00001


Memproses:  31%|█████████████████████▎                                               | 173/561 [06:14<14:57,  2.31s/it]

[174] Q99835 → 2 domain: PF01392, PF01534


Memproses:  31%|█████████████████████▍                                               | 174/561 [06:17<14:36,  2.26s/it]

[175] Q8N5D0 → 1 domain: PF00400


Memproses:  31%|█████████████████████▌                                               | 175/561 [06:19<14:54,  2.32s/it]

[176] P13349 → 3 domain: PF00010, PF01586, PF12232


Memproses:  31%|█████████████████████▋                                               | 176/561 [06:21<14:26,  2.25s/it]

[177] Q9NPJ3 → 1 domain: PF03061


Memproses:  32%|█████████████████████▊                                               | 177/561 [06:23<14:06,  2.20s/it]

[178] Q86SG2 → 2 domain: PF00023, PF12796


Memproses:  32%|█████████████████████▉                                               | 178/561 [06:25<13:49,  2.17s/it]

[179] Q96H78 → 1 domain: PF00153


Memproses:  32%|██████████████████████                                               | 179/561 [06:27<13:38,  2.14s/it]

[180] E9PGQ0 → 1 domain: PF00153


Memproses:  32%|██████████████████████▏                                              | 180/561 [06:29<13:28,  2.12s/it]

[181] P11233 → 1 domain: PF00071


Memproses:  32%|██████████████████████▎                                              | 181/561 [06:32<13:22,  2.11s/it]

[182] Q687X5 → 2 domain: PF01794, PF03807


Memproses:  32%|██████████████████████▍                                              | 182/561 [06:34<13:26,  2.13s/it]

[183] O43572 → 1 domain: PF00615


Memproses:  33%|██████████████████████▌                                              | 183/561 [06:36<14:04,  2.24s/it]

[184] A0A0S2Z4Z7 → 1 domain: PF00615


Memproses:  33%|██████████████████████▋                                              | 184/561 [06:39<15:10,  2.41s/it]

[185] E7EMD6 → 1 domain: PF00615


Memproses:  33%|██████████████████████▊                                              | 185/561 [06:41<15:09,  2.42s/it]

[186] Q969Z0 → 3 domain: PF06743, PF08368, PF08373


Memproses:  33%|██████████████████████▉                                              | 186/561 [06:44<15:13,  2.44s/it]

[187] B3KRS4 → 3 domain: PF06743, PF08368, PF08373


Memproses:  33%|███████████████████████                                              | 187/561 [06:47<15:54,  2.55s/it]

[188] B3KM73 → 2 domain: PF06743, PF08368


Memproses:  34%|███████████████████████                                              | 188/561 [06:49<16:11,  2.61s/it]

[189] B4DU42 → 3 domain: PF06743, PF08368, PF08373


Memproses:  34%|███████████████████████▏                                             | 189/561 [06:52<16:48,  2.71s/it]

[190] Q53R41 → 3 domain: PF06743, PF08368, PF08373


Memproses:  34%|███████████████████████▎                                             | 190/561 [06:55<16:31,  2.67s/it]

[191] Q05D57 → Tidak ditemukan domain Pfam.


Memproses:  34%|███████████████████████▍                                             | 191/561 [06:57<15:37,  2.53s/it]

[192] Q9UKU7 → 3 domain: PF00441, PF02770, PF02771


Memproses:  34%|███████████████████████▌                                             | 192/561 [07:00<15:50,  2.57s/it]

[193] O60266 → 2 domain: PF00211, PF16214


Memproses:  34%|███████████████████████▋                                             | 193/561 [07:02<15:26,  2.52s/it]

[194] A0A0A0MSC1 → 2 domain: PF00211, PF16214


Memproses:  35%|███████████████████████▊                                             | 194/561 [07:05<15:30,  2.53s/it]

[195] B7ZLX9 → 2 domain: PF00211, PF16214


Memproses:  35%|███████████████████████▉                                             | 195/561 [07:07<15:23,  2.52s/it]

[196] Q8N6T7 → 1 domain: PF02146


Memproses:  35%|████████████████████████                                             | 196/561 [07:09<14:30,  2.38s/it]

[197] M0R1N9 → 1 domain: PF02146


Memproses:  35%|████████████████████████▏                                            | 197/561 [07:12<15:10,  2.50s/it]

[198] B4DDV3 → 1 domain: PF02146


Memproses:  35%|████████████████████████▎                                            | 198/561 [07:15<16:19,  2.70s/it]

[199] M0QXA0 → Tidak ditemukan domain Pfam.


Memproses:  35%|████████████████████████▍                                            | 199/561 [07:18<15:50,  2.63s/it]

[200] Q7KZI7 → 3 domain: PF00069, PF00627, PF02149


Memproses:  36%|████████████████████████▌                                            | 200/561 [07:20<14:49,  2.46s/it]

[201] A8K2S4 → 3 domain: PF00069, PF00627, PF02149


Memproses:  36%|████████████████████████▋                                            | 201/561 [07:23<15:07,  2.52s/it]

[202] A0A140VJP1 → 3 domain: PF00069, PF00627, PF02149


Memproses:  36%|████████████████████████▊                                            | 202/561 [07:26<16:23,  2.74s/it]

[203] O14686 → 6 domain: PF00628, PF00856, PF05964, PF05965, PF13771, PF13832


Memproses:  36%|████████████████████████▉                                            | 203/561 [07:28<15:08,  2.54s/it]

[204] Q6PIA1 → 3 domain: PF00856, PF05964, PF05965


Memproses:  36%|█████████████████████████                                            | 204/561 [07:31<15:30,  2.61s/it]

[205] Q59FG6 → 4 domain: PF00856, PF05964, PF05965, PF13832


Memproses:  37%|█████████████████████████▏                                           | 205/561 [07:33<15:42,  2.65s/it]

[206] Q7Z2T5 → 1 domain: PF02005


Memproses:  37%|█████████████████████████▎                                           | 206/561 [07:36<15:33,  2.63s/it]

[207] B4DXX1 → 1 domain: PF02005


Memproses:  37%|█████████████████████████▍                                           | 207/561 [07:38<14:58,  2.54s/it]

[208] P11117 → 1 domain: PF00328


Memproses:  37%|█████████████████████████▌                                           | 208/561 [07:41<14:44,  2.51s/it]

[209] B7Z7D2 → 1 domain: PF00328


Memproses:  37%|█████████████████████████▋                                           | 209/561 [07:43<14:29,  2.47s/it]

[210] E9PHY0 → 1 domain: PF00328


Memproses:  37%|█████████████████████████▊                                           | 210/561 [07:46<14:33,  2.49s/it]

[211] B7Z6T8 → 1 domain: PF00328


Memproses:  38%|█████████████████████████▉                                           | 211/561 [07:54<24:26,  4.19s/it]

[212] E9PQY3 → 1 domain: PF00328


Memproses:  38%|██████████████████████████                                           | 212/561 [07:56<21:07,  3.63s/it]

[213] B7Z8T9 → 1 domain: PF00328


Memproses:  38%|██████████████████████████▏                                          | 213/561 [07:58<18:55,  3.26s/it]

[214] B7Z6L8 → 1 domain: PF00328


Memproses:  38%|██████████████████████████▎                                          | 214/561 [08:01<17:26,  3.02s/it]

[215] B7Z552 → 1 domain: PF00328


Memproses:  38%|██████████████████████████▍                                          | 215/561 [08:03<16:22,  2.84s/it]

[216] B7Z4Z2 → 1 domain: PF00328


Memproses:  39%|██████████████████████████▌                                          | 216/561 [08:06<15:45,  2.74s/it]

[217] P35625 → 1 domain: PF00965


Memproses:  39%|██████████████████████████▋                                          | 217/561 [08:08<14:33,  2.54s/it]

[218] P04066 → 2 domain: PF01120, PF16757


Memproses:  39%|██████████████████████████▊                                          | 218/561 [08:10<13:46,  2.41s/it]

[219] Q08043 → 3 domain: PF00307, PF00435, PF08726


Memproses:  39%|██████████████████████████▉                                          | 219/561 [08:13<13:49,  2.43s/it]

[220] B4DZQ2 → 3 domain: PF00307, PF00435, PF08726


Memproses:  39%|███████████████████████████                                          | 220/561 [08:15<13:57,  2.46s/it]

[221] A0A087WSZ2 → 3 domain: PF00307, PF00435, PF08726


Memproses:  39%|███████████████████████████▏                                         | 221/561 [08:20<17:59,  3.18s/it]

[222] P31629 → 1 domain: PF00096


Memproses:  40%|███████████████████████████▎                                         | 222/561 [08:22<16:51,  2.98s/it]

[223] Q96PD6 → 1 domain: PF03982


Memproses:  40%|███████████████████████████▍                                         | 223/561 [08:25<15:43,  2.79s/it]

[224] P61366 → 1 domain: PF11037


Memproses:  40%|███████████████████████████▌                                         | 224/561 [08:27<14:53,  2.65s/it]

[225] Q4G176 → 2 domain: PF00501, PF13193


Memproses:  40%|███████████████████████████▋                                         | 225/561 [08:30<14:33,  2.60s/it]

[226] F5H5A1 → 2 domain: PF00501, PF13193


Memproses:  40%|███████████████████████████▊                                         | 226/561 [08:32<14:15,  2.55s/it]

[227] Q9H4I9 → 1 domain: PF10161


Memproses:  40%|███████████████████████████▉                                         | 227/561 [08:35<14:30,  2.61s/it]

[228] A0PK00 → 1 domain: PF07851


Memproses:  41%|████████████████████████████                                         | 228/561 [08:37<14:11,  2.56s/it]

[229] A6NFX1 → 1 domain: PF13347


Memproses:  41%|████████████████████████████▏                                        | 229/561 [08:40<14:01,  2.54s/it]

[230] Q9UK39 → 1 domain: PF03372


Memproses:  41%|████████████████████████████▎                                        | 230/561 [08:42<13:52,  2.52s/it]

[231] O43184 → 4 domain: PF00200, PF01421, PF01562, PF08516


Memproses:  41%|████████████████████████████▍                                        | 231/561 [08:44<13:26,  2.44s/it]

[232] Q5JRP2 → 4 domain: PF00200, PF01421, PF01562, PF08516


Memproses:  41%|████████████████████████████▌                                        | 232/561 [08:47<14:16,  2.60s/it]

[233] A8K6G4 → 4 domain: PF00200, PF01421, PF01562, PF08516


Memproses:  42%|████████████████████████████▋                                        | 233/561 [08:50<14:38,  2.68s/it]

[234] O95258 → 1 domain: PF00153


Memproses:  42%|████████████████████████████▊                                        | 234/561 [08:53<14:15,  2.61s/it]

[235] F6SL11 → 1 domain: PF00153


Memproses:  42%|████████████████████████████▉                                        | 235/561 [08:55<14:10,  2.61s/it]

[236] B4DMK1 → 1 domain: PF00153


Memproses:  42%|█████████████████████████████                                        | 236/561 [08:58<13:56,  2.57s/it]

[237] P06870 → 1 domain: PF00089


Memproses:  42%|█████████████████████████████▏                                       | 237/561 [09:00<13:05,  2.42s/it]

[238] A0A1R3UCD2 → 1 domain: PF00089


Memproses:  42%|█████████████████████████████▎                                       | 238/561 [09:02<13:06,  2.43s/it]

[239] P29622 → 1 domain: PF00079


Memproses:  43%|█████████████████████████████▍                                       | 239/561 [09:05<13:10,  2.45s/it]

[240] A0A024R6I9 → 1 domain: PF00079


Memproses:  43%|█████████████████████████████▌                                       | 240/561 [09:07<13:02,  2.44s/it]

[241] P10646 → 1 domain: PF00014


Memproses:  43%|█████████████████████████████▋                                       | 241/561 [09:10<13:06,  2.46s/it]

[242] P15086 → 2 domain: PF00246, PF02244


Memproses:  43%|█████████████████████████████▊                                       | 242/561 [09:12<13:31,  2.54s/it]

[243] P29279 → 4 domain: PF00007, PF00093, PF00219, PF19035


Memproses:  43%|█████████████████████████████▉                                       | 243/561 [09:15<12:46,  2.41s/it]

[244] Q5M8T4 → 4 domain: PF00007, PF00093, PF00219, PF19035


Memproses:  43%|██████████████████████████████                                       | 244/561 [09:18<13:44,  2.60s/it]

[245] P02775 → 1 domain: PF00048


Memproses:  44%|██████████████████████████████▏                                      | 245/561 [09:21<14:55,  2.83s/it]

[246] Q13630 → 1 domain: PF01370


Memproses:  44%|██████████████████████████████▎                                      | 246/561 [09:24<14:38,  2.79s/it]

[247] A0A140VKC8 → 1 domain: PF01370


Memproses:  44%|██████████████████████████████▍                                      | 247/561 [09:26<14:19,  2.74s/it]

[248] P05186 → 1 domain: PF00245


Memproses:  44%|██████████████████████████████▌                                      | 248/561 [09:28<13:21,  2.56s/it]

[249] P00750 → 4 domain: PF00008, PF00039, PF00051, PF00089


Memproses:  44%|██████████████████████████████▋                                      | 249/561 [09:31<12:33,  2.41s/it]

[250] B4DN26 → 4 domain: PF00008, PF00039, PF00051, PF00089


Memproses:  45%|██████████████████████████████▋                                      | 250/561 [09:33<13:07,  2.53s/it]

[251] P16035 → 1 domain: PF00965


Memproses:  45%|██████████████████████████████▊                                      | 251/561 [09:35<12:22,  2.39s/it]

[252] A0A140VK57 → 1 domain: PF00965


Memproses:  45%|██████████████████████████████▉                                      | 252/561 [09:38<12:24,  2.41s/it]

[253] P10644 → 2 domain: PF00027, PF02197


Memproses:  45%|███████████████████████████████                                      | 253/561 [09:40<11:51,  2.31s/it]

[254] B2R5T5 → 2 domain: PF00027, PF02197


Memproses:  45%|███████████████████████████████▏                                     | 254/561 [09:43<12:17,  2.40s/it]

[255] P48307 → 1 domain: PF00014


Memproses:  45%|███████████████████████████████▎                                     | 255/561 [09:45<12:20,  2.42s/it]

[256] Q99727 → 1 domain: PF00965


Memproses:  46%|███████████████████████████████▍                                     | 256/561 [09:48<12:37,  2.48s/it]

[257] O00585 → 1 domain: PF00048


Memproses:  46%|███████████████████████████████▌                                     | 257/561 [09:50<11:57,  2.36s/it]

[258] Q9UDY8 → 4 domain: PF00656, PF13895, PF13927, PF18703


Memproses:  46%|███████████████████████████████▋                                     | 258/561 [09:52<11:33,  2.29s/it]

[259] A8K5S1 → 4 domain: PF00656, PF13895, PF13927, PF18703


Memproses:  46%|███████████████████████████████▊                                     | 259/561 [09:55<12:19,  2.45s/it]

[260] Q96KP4 → 2 domain: PF01546, PF07687


Memproses:  46%|███████████████████████████████▉                                     | 260/561 [09:57<12:23,  2.47s/it]

[261] Q9BS40 → 1 domain: PF06907


Memproses:  47%|████████████████████████████████                                     | 261/561 [10:00<12:18,  2.46s/it]

[262] Q92963 → 1 domain: PF00071


Memproses:  47%|████████████████████████████████▏                                    | 262/561 [10:02<11:44,  2.36s/it]

[263] Q969Z4 → 1 domain: PF12606


Memproses:  47%|████████████████████████████████▎                                    | 263/561 [10:04<11:55,  2.40s/it]

[264] P21980 → 3 domain: PF00868, PF00927, PF01841


Memproses:  47%|████████████████████████████████▍                                    | 264/561 [10:06<11:23,  2.30s/it]

[265] V9HWG3 → 3 domain: PF00868, PF00927, PF01841


Memproses:  47%|████████████████████████████████▌                                    | 265/561 [10:09<11:52,  2.41s/it]

[266] B4DIT7 → 3 domain: PF00868, PF00927, PF01841


Memproses:  47%|████████████████████████████████▋                                    | 266/561 [10:12<12:10,  2.48s/it]

[267] B4DTN7 → 3 domain: PF00868, PF00927, PF01841


Memproses:  48%|████████████████████████████████▊                                    | 267/561 [10:14<12:22,  2.53s/it]

[268] Q8WWW0 → 3 domain: PF00130, PF00788, PF16517


Memproses:  48%|████████████████████████████████▉                                    | 268/561 [10:16<11:39,  2.39s/it]

[269] A8K5F3 → 3 domain: PF00130, PF00788, PF16517


Memproses:  48%|█████████████████████████████████                                    | 269/561 [10:19<12:10,  2.50s/it]

[270] Q96IX5 → 1 domain: PF14960


Memproses:  48%|█████████████████████████████████▏                                   | 270/561 [10:22<13:15,  2.73s/it]

[271] Q86Z23 → 1 domain: PF00386


Memproses:  48%|█████████████████████████████████▎                                   | 271/561 [10:25<12:34,  2.60s/it]

[272] A0A3B0INP7 → 1 domain: PF00386


Memproses:  48%|█████████████████████████████████▍                                   | 272/561 [10:30<16:42,  3.47s/it]

[273] Q9H5J4 → 1 domain: PF01151


Memproses:  49%|█████████████████████████████████▌                                   | 273/561 [10:34<16:51,  3.51s/it]

[274] Q86Z14 → 1 domain: PF00232


Memproses:  49%|█████████████████████████████████▋                                   | 274/561 [10:36<15:41,  3.28s/it]

[275] B4DYH5 → 1 domain: PF00232


Memproses:  49%|█████████████████████████████████▊                                   | 275/561 [10:39<14:43,  3.09s/it]

[276] Q9UBY9 → 1 domain: PF00011


Memproses:  49%|█████████████████████████████████▉                                   | 276/561 [10:42<13:47,  2.91s/it]

[277] P18075 → 2 domain: PF00019, PF00688


Memproses:  49%|██████████████████████████████████                                   | 277/561 [10:44<12:34,  2.66s/it]

[278] A8K571 → 2 domain: PF00019, PF00688


Memproses:  50%|██████████████████████████████████▏                                  | 278/561 [10:47<12:50,  2.72s/it]

[279] Q14865 → 1 domain: PF01388


Memproses:  50%|██████████████████████████████████▎                                  | 279/561 [10:49<12:23,  2.64s/it]

[280] Q13370 → 1 domain: PF00233


Memproses:  50%|██████████████████████████████████▍                                  | 280/561 [10:51<11:34,  2.47s/it]

[281] A7E2E5 → 1 domain: PF00233


Memproses:  50%|██████████████████████████████████▌                                  | 281/561 [10:54<11:32,  2.47s/it]

[282] Q00169 → 1 domain: PF02121


Memproses:  50%|██████████████████████████████████▋                                  | 282/561 [10:56<10:55,  2.35s/it]

[283] V9HWC5 → 1 domain: PF02121


Memproses:  50%|██████████████████████████████████▊                                  | 283/561 [10:58<10:53,  2.35s/it]

[284] P03372 → 4 domain: PF00104, PF00105, PF02159, PF12743


Memproses:  51%|██████████████████████████████████▉                                  | 284/561 [11:00<10:32,  2.28s/it]

[285] Q9UBT1 → Tidak ditemukan domain Pfam.


Memproses:  51%|███████████████████████████████████                                  | 285/561 [11:02<10:22,  2.26s/it]

[286] H0Y4W6 → 2 domain: PF00104, PF00105


Memproses:  51%|███████████████████████████████████▏                                 | 286/561 [11:05<10:41,  2.33s/it]

[287] G4XH65 → 4 domain: PF00104, PF00105, PF02159, PF12743


Memproses:  51%|███████████████████████████████████▎                                 | 287/561 [11:08<11:10,  2.45s/it]

[288] A8KAF4 → 4 domain: PF00104, PF00105, PF02159, PF12743


Memproses:  51%|███████████████████████████████████▍                                 | 288/561 [11:10<11:32,  2.54s/it]

[289] A0A125SXW3 → 3 domain: PF00104, PF00105, PF02159


Memproses:  52%|███████████████████████████████████▌                                 | 289/561 [11:13<12:16,  2.71s/it]

[290] P43304 → 3 domain: PF01266, PF13499, PF16901


Memproses:  52%|███████████████████████████████████▋                                 | 290/561 [11:16<12:11,  2.70s/it]

[291] Q03181 → 2 domain: PF00104, PF00105


Memproses:  52%|███████████████████████████████████▊                                 | 291/561 [11:18<11:18,  2.51s/it]

[292] F1D8S7 → 2 domain: PF00104, PF00105


Memproses:  52%|███████████████████████████████████▉                                 | 292/561 [11:21<11:27,  2.56s/it]

[293] Q9Y6K1 → 5 domain: PF00145, PF00855, PF17980, PF21255, PF22855


Memproses:  52%|████████████████████████████████████                                 | 293/561 [11:24<11:38,  2.60s/it]

[294] A0A0C4DG02 → 5 domain: PF00145, PF00855, PF17980, PF21255, PF22855


Memproses:  52%|████████████████████████████████████▏                                | 294/561 [11:26<12:02,  2.71s/it]

[295] F8WE91 → 5 domain: PF00145, PF00855, PF17980, PF21255, PF22855


Memproses:  53%|████████████████████████████████████▎                                | 295/561 [11:30<12:30,  2.82s/it]

[296] Q59HC6 → 5 domain: PF00145, PF00855, PF17980, PF21255, PF22855


Memproses:  53%|████████████████████████████████████▍                                | 296/561 [11:32<12:18,  2.79s/it]

[297] P80370 → 1 domain: PF00008


Memproses:  53%|████████████████████████████████████▌                                | 297/561 [11:34<11:21,  2.58s/it]

[298] A8K019 → 1 domain: PF00008


Memproses:  53%|████████████████████████████████████▋                                | 298/561 [11:37<11:20,  2.59s/it]

[299] Q96P20 → 6 domain: PF02758, PF05729, PF13516, PF14484, PF17776, PF17779


Memproses:  53%|████████████████████████████████████▊                                | 299/561 [11:39<10:41,  2.45s/it]

[300] A0A7I2R3P8 → 6 domain: PF02758, PF05729, PF13516, PF14484, PF17776, PF17779


Memproses:  53%|████████████████████████████████████▉                                | 300/561 [11:42<11:18,  2.60s/it]

[301] P42127 → 1 domain: PF05039


Memproses:  54%|█████████████████████████████████████                                | 301/561 [11:44<11:01,  2.54s/it]

[302] P16473 → 2 domain: PF00001, PF13306


Memproses:  54%|█████████████████████████████████████▏                               | 302/561 [11:47<10:23,  2.41s/it]

[303] Q9Y5S8 → 3 domain: PF01794, PF08022, PF08030


Memproses:  54%|█████████████████████████████████████▎                               | 303/561 [11:49<09:58,  2.32s/it]

[304] P51151 → 1 domain: PF00071


Memproses:  54%|█████████████████████████████████████▍                               | 304/561 [11:51<10:16,  2.40s/it]

[305] Q13887 → 1 domain: PF00096


Memproses:  54%|█████████████████████████████████████▌                               | 305/561 [11:53<09:49,  2.30s/it]

[306] O15118 → 3 domain: PF12349, PF16414, PF22314


Memproses:  55%|█████████████████████████████████████▋                               | 306/561 [11:55<09:32,  2.24s/it]

[307] O15164 → 3 domain: PF00439, PF00628, PF00643


Memproses:  55%|█████████████████████████████████████▊                               | 307/561 [11:58<09:19,  2.20s/it]

[308] P20618 → 1 domain: PF00227


Memproses:  55%|█████████████████████████████████████▉                               | 308/561 [12:00<09:08,  2.17s/it]

[309] Q99714 → 1 domain: PF00106


Memproses:  55%|██████████████████████████████████████                               | 309/561 [12:02<08:59,  2.14s/it]

[310] P23219 → 1 domain: PF03098


Memproses:  55%|██████████████████████████████████████▏                              | 310/561 [12:04<08:58,  2.15s/it]

[311] P02766 → 1 domain: PF00576


Memproses:  55%|██████████████████████████████████████▎                              | 311/561 [12:06<08:51,  2.13s/it]

[312] P49721 → 1 domain: PF00227


Memproses:  56%|██████████████████████████████████████▎                              | 312/561 [12:08<08:45,  2.11s/it]

[313] P28482 → 1 domain: PF00069


Memproses:  56%|██████████████████████████████████████▍                              | 313/561 [12:10<08:41,  2.10s/it]

[314] P34995 → 1 domain: PF00001


Memproses:  56%|██████████████████████████████████████▌                              | 314/561 [12:12<08:37,  2.09s/it]

[315] Q06124 → 2 domain: PF00017, PF00102


Memproses:  56%|██████████████████████████████████████▋                              | 315/561 [12:14<08:39,  2.11s/it]

[316] Q92753 → 2 domain: PF00104, PF00105


Memproses:  56%|██████████████████████████████████████▊                              | 316/561 [12:16<08:36,  2.11s/it]

[317] Q9Y345 → 1 domain: PF00209


Memproses:  57%|██████████████████████████████████████▉                              | 317/561 [12:18<08:30,  2.09s/it]

[318] P19838 → 5 domain: PF00023, PF00531, PF00554, PF12796, PF16179


Memproses:  57%|███████████████████████████████████████                              | 318/561 [12:21<08:34,  2.12s/it]

[319] P30926 → 2 domain: PF02931, PF02932


Memproses:  57%|███████████████████████████████████████▏                             | 319/561 [12:23<08:29,  2.10s/it]

[320] P27695 → 1 domain: PF03372


Memproses:  57%|███████████████████████████████████████▎                             | 320/561 [12:26<09:32,  2.38s/it]

[321] P24557 → 1 domain: PF00067


Memproses:  57%|███████████████████████████████████████▍                             | 321/561 [12:28<09:12,  2.30s/it]

[322] P11166 → 1 domain: PF00083


Memproses:  57%|███████████████████████████████████████▌                             | 322/561 [12:30<08:52,  2.23s/it]

[323] O15151 → 2 domain: PF00641, PF13920


Memproses:  58%|███████████████████████████████████████▋                             | 323/561 [12:32<08:38,  2.18s/it]

[324] P42224 → 6 domain: PF00017, PF01017, PF02864, PF02865, PF12162, PF21354


Memproses:  58%|███████████████████████████████████████▊                             | 324/561 [12:34<08:47,  2.23s/it]

[325] P02708 → 2 domain: PF02931, PF02932


Memproses:  58%|███████████████████████████████████████▉                             | 325/561 [12:36<08:35,  2.19s/it]

[326] P23415 → 2 domain: PF02931, PF02932


Memproses:  58%|████████████████████████████████████████                             | 326/561 [12:38<08:26,  2.15s/it]

[327] P30305 → 2 domain: PF00581, PF06617


Memproses:  58%|████████████████████████████████████████▏                            | 327/561 [12:41<08:17,  2.12s/it]

[328] O14672 → 3 domain: PF00200, PF13574, PF21299


Memproses:  58%|████████████████████████████████████████▎                            | 328/561 [12:43<08:30,  2.19s/it]

[329] P07384 → 3 domain: PF00648, PF01067, PF13833


Memproses:  59%|████████████████████████████████████████▍                            | 329/561 [12:45<08:19,  2.15s/it]

[330] P09211 → 2 domain: PF02798, PF14497


Memproses:  59%|████████████████████████████████████████▌                            | 330/561 [12:47<08:11,  2.13s/it]

[331] P21462 → 1 domain: PF00001


Memproses:  59%|████████████████████████████████████████▋                            | 331/561 [12:49<08:06,  2.11s/it]

[332] Q9HC97 → 1 domain: PF00001


Memproses:  59%|████████████████████████████████████████▊                            | 332/561 [12:51<08:02,  2.11s/it]

[333] P08684 → 1 domain: PF00067


Memproses:  59%|████████████████████████████████████████▉                            | 333/561 [12:53<07:57,  2.09s/it]

[334] O00257 → 2 domain: PF00385, PF17218


Memproses:  60%|█████████████████████████████████████████                            | 334/561 [12:55<07:56,  2.10s/it]

[335] Q16236 → 1 domain: PF03131


Memproses:  60%|█████████████████████████████████████████▏                           | 335/561 [12:58<08:09,  2.17s/it]

[336] Q6V1X1 → 3 domain: PF00326, PF00930, PF19520


Memproses:  60%|█████████████████████████████████████████▎                           | 336/561 [13:00<08:02,  2.15s/it]

[337] P25090 → 1 domain: PF00001


Memproses:  60%|█████████████████████████████████████████▍                           | 337/561 [13:02<07:55,  2.12s/it]

[338] Q16288 → 6 domain: PF00047, PF01462, PF07679, PF07714, PF13855, PF16920


Memproses:  60%|█████████████████████████████████████████▌                           | 338/561 [13:04<07:51,  2.12s/it]

[339] P67870 → 1 domain: PF01214


Memproses:  60%|█████████████████████████████████████████▋                           | 339/561 [13:06<07:47,  2.11s/it]

[340] P51955 → 1 domain: PF00069


Memproses:  61%|█████████████████████████████████████████▊                           | 340/561 [13:09<08:09,  2.22s/it]

[341] P08173 → 1 domain: PF00001


Memproses:  61%|█████████████████████████████████████████▉                           | 341/561 [13:11<08:00,  2.18s/it]

[342] O75469 → 2 domain: PF00104, PF00105


Memproses:  61%|██████████████████████████████████████████                           | 342/561 [13:13<08:13,  2.26s/it]

[343] P17706 → 1 domain: PF00102


Memproses:  61%|██████████████████████████████████████████▏                          | 343/561 [13:15<08:02,  2.21s/it]

[344] P11387 → 3 domain: PF01028, PF02919, PF14370


Memproses:  61%|██████████████████████████████████████████▎                          | 344/561 [13:17<07:52,  2.18s/it]

[345] P43116 → 1 domain: PF00001


Memproses:  61%|██████████████████████████████████████████▍                          | 345/561 [13:19<07:43,  2.15s/it]

[346] P22736 → 2 domain: PF00104, PF00105


Memproses:  62%|██████████████████████████████████████████▌                          | 346/561 [13:21<07:37,  2.13s/it]

[347] P08912 → 1 domain: PF00001


Memproses:  62%|██████████████████████████████████████████▋                          | 347/561 [13:23<07:31,  2.11s/it]

[348] P07477 → 1 domain: PF00089


Memproses:  62%|██████████████████████████████████████████▊                          | 348/561 [13:26<08:16,  2.33s/it]

[349] P42680 → 5 domain: PF00017, PF00018, PF00169, PF00779, PF07714


Memproses:  62%|██████████████████████████████████████████▉                          | 349/561 [13:28<07:56,  2.25s/it]

[350] Q00535 → 1 domain: PF00069


Memproses:  62%|███████████████████████████████████████████                          | 350/561 [13:31<08:14,  2.34s/it]

[351] O60341 → 2 domain: PF01593, PF04433


Memproses:  63%|███████████████████████████████████████████▏                         | 351/561 [13:33<07:55,  2.26s/it]

[352] Q96RR4 → 1 domain: PF00069


Memproses:  63%|███████████████████████████████████████████▎                         | 352/561 [13:35<07:40,  2.20s/it]

[353] P49862 → 1 domain: PF00089


Memproses:  63%|███████████████████████████████████████████▍                         | 353/561 [13:37<07:30,  2.16s/it]

[354] P16083 → 1 domain: PF02525


Memproses:  63%|███████████████████████████████████████████▌                         | 354/561 [13:39<07:24,  2.15s/it]

[355] P42785 → 1 domain: PF05577


Memproses:  63%|███████████████████████████████████████████▋                         | 355/561 [13:41<07:20,  2.14s/it]

[356] P54132 → 8 domain: PF00270, PF00271, PF00570, PF08072, PF09382, PF16124, PF16202, PF16204


Memproses:  63%|███████████████████████████████████████████▊                         | 356/561 [13:43<07:13,  2.12s/it]

[357] Q14680 → 3 domain: PF00069, PF02149, PF21594


Memproses:  64%|███████████████████████████████████████████▉                         | 357/561 [13:46<07:08,  2.10s/it]

[358] Q16513 → 3 domain: PF00069, PF00433, PF02185


Memproses:  64%|████████████████████████████████████████████                         | 358/561 [13:48<07:06,  2.10s/it]

[359] Q86TI2 → 3 domain: PF00326, PF00930, PF19520


Memproses:  64%|████████████████████████████████████████████▏                        | 359/561 [13:50<07:05,  2.11s/it]

[360] Q9Y5N1 → 1 domain: PF00001


Memproses:  64%|████████████████████████████████████████████▎                        | 360/561 [13:52<07:02,  2.10s/it]

[361] P07339 → 2 domain: PF00026, PF07966


Memproses:  64%|████████████████████████████████████████████▍                        | 361/561 [13:54<07:02,  2.11s/it]

[362] Q16512 → 3 domain: PF00069, PF00433, PF02185


Memproses:  65%|████████████████████████████████████████████▌                        | 362/561 [13:59<09:27,  2.85s/it]

[363] Q96L34 → 3 domain: PF00069, PF00627, PF02149


Memproses:  65%|████████████████████████████████████████████▋                        | 363/561 [14:01<08:38,  2.62s/it]

[364] P27986 → 3 domain: PF00017, PF00620, PF16454


Memproses:  65%|████████████████████████████████████████████▊                        | 364/561 [14:03<08:04,  2.46s/it]

[365] P21452 → 1 domain: PF00001


Memproses:  65%|████████████████████████████████████████████▉                        | 365/561 [14:05<07:38,  2.34s/it]

[366] Q9NY46 → 4 domain: PF00520, PF06512, PF11933, PF24609


Memproses:  65%|█████████████████████████████████████████████                        | 366/561 [14:07<07:22,  2.27s/it]

[367] P11388 → 6 domain: PF00204, PF00521, PF01751, PF02518, PF08070, PF16898


Memproses:  65%|█████████████████████████████████████████████▏                       | 367/561 [14:12<10:11,  3.15s/it]

[368] Q6PHR2 → 2 domain: PF00069, PF04212


Memproses:  66%|█████████████████████████████████████████████▎                       | 368/561 [14:14<09:05,  2.83s/it]

[369] Q9Y4P1 → 2 domain: PF03416, PF20166


Memproses:  66%|█████████████████████████████████████████████▍                       | 369/561 [14:16<08:24,  2.63s/it]

[370] P35236 → 1 domain: PF00102


Memproses:  66%|█████████████████████████████████████████████▌                       | 370/561 [14:18<07:50,  2.46s/it]

[371] P08172 → 1 domain: PF00001


Memproses:  66%|█████████████████████████████████████████████▋                       | 371/561 [14:21<07:44,  2.45s/it]

[372] Q9H228 → 1 domain: PF00001


Memproses:  66%|█████████████████████████████████████████████▊                       | 372/561 [14:23<07:20,  2.33s/it]

[373] Q96P65 → 1 domain: PF00001


Memproses:  66%|█████████████████████████████████████████████▉                       | 373/561 [14:25<07:08,  2.28s/it]

[374] P00488 → 3 domain: PF00868, PF00927, PF01841


Memproses:  67%|██████████████████████████████████████████████                       | 374/561 [14:28<07:37,  2.45s/it]

[375] O95665 → 1 domain: PF00001


Memproses:  67%|██████████████████████████████████████████████                       | 375/561 [14:30<07:18,  2.36s/it]

[376] P55212 → 1 domain: PF00656


Memproses:  67%|██████████████████████████████████████████████▏                      | 376/561 [14:32<07:00,  2.27s/it]

[377] Q9HCG7 → 2 domain: PF04685, PF12215


Memproses:  67%|██████████████████████████████████████████████▎                      | 377/561 [14:34<06:46,  2.21s/it]

[378] Q9NZ08 → 3 domain: PF01433, PF11838, PF17900


Memproses:  67%|██████████████████████████████████████████████▍                      | 378/561 [14:36<06:37,  2.17s/it]

[379] P35414 → 1 domain: PF00001


Memproses:  68%|██████████████████████████████████████████████▌                      | 379/561 [14:38<06:31,  2.15s/it]

[380] P42345 → 6 domain: PF00454, PF02259, PF02260, PF08771, PF11865, PF23593


Memproses:  68%|██████████████████████████████████████████████▋                      | 380/561 [14:40<06:25,  2.13s/it]

[381] O14744 → 3 domain: PF05185, PF17285, PF17286


Memproses:  68%|██████████████████████████████████████████████▊                      | 381/561 [14:42<06:19,  2.11s/it]

[382] P36896 → 3 domain: PF00069, PF01064, PF08515


Memproses:  68%|██████████████████████████████████████████████▉                      | 382/561 [14:45<06:16,  2.10s/it]

[383] P19634 → 2 domain: PF00999, PF16644


Memproses:  68%|███████████████████████████████████████████████                      | 383/561 [14:47<06:13,  2.10s/it]

[384] Q13085 → 7 domain: PF00289, PF00364, PF01039, PF02785, PF02786, PF08326, PF21385


Memproses:  68%|███████████████████████████████████████████████▏                     | 384/561 [14:49<06:10,  2.09s/it]

[385] P17931 → 1 domain: PF00337


Memproses:  69%|███████████████████████████████████████████████▎                     | 385/561 [14:51<06:06,  2.08s/it]

[386] P30307 → 2 domain: PF00581, PF06617


Memproses:  69%|███████████████████████████████████████████████▍                     | 386/561 [14:53<06:04,  2.08s/it]

[387] P18054 → 2 domain: PF00305, PF01477


Memproses:  69%|███████████████████████████████████████████████▌                     | 387/561 [14:55<06:21,  2.19s/it]

[388] P51452 → 1 domain: PF00782


Memproses:  69%|███████████████████████████████████████████████▋                     | 388/561 [14:57<06:14,  2.17s/it]

[389] O00763 → 7 domain: PF00289, PF00364, PF01039, PF02785, PF02786, PF08326, PF21385


Memproses:  69%|███████████████████████████████████████████████▊                     | 389/561 [15:00<06:09,  2.15s/it]

[390] P14174 → 1 domain: PF01187


Memproses:  70%|███████████████████████████████████████████████▉                     | 390/561 [15:02<06:06,  2.14s/it]

[391] P42262 → 3 domain: PF00060, PF01094, PF10613


Memproses:  70%|████████████████████████████████████████████████                     | 391/561 [15:04<06:01,  2.13s/it]

[392] P15428 → 1 domain: PF00106


Memproses:  70%|████████████████████████████████████████████████▏                    | 392/561 [15:06<05:56,  2.11s/it]

[393] Q9UHL4 → 1 domain: PF05577


Memproses:  70%|████████████████████████████████████████████████▎                    | 393/561 [15:08<05:52,  2.10s/it]

[394] P05556 → 7 domain: PF00362, PF07965, PF07974, PF08725, PF17205, PF18372, PF23105


Memproses:  70%|████████████████████████████████████████████████▍                    | 394/561 [15:10<05:49,  2.09s/it]

[395] P22413 → 3 domain: PF01033, PF01223, PF01663


Memproses:  70%|████████████████████████████████████████████████▌                    | 395/561 [15:12<05:46,  2.09s/it]

[396] P28335 → 1 domain: PF00001


Memproses:  71%|████████████████████████████████████████████████▋                    | 396/561 [15:14<05:45,  2.10s/it]

[397] Q00975 → 3 domain: PF00520, PF08763, PF16905


Memproses:  71%|████████████████████████████████████████████████▊                    | 397/561 [15:16<05:41,  2.08s/it]

[398] Q9HAZ1 → 1 domain: PF00069


Memproses:  71%|████████████████████████████████████████████████▉                    | 398/561 [15:18<05:39,  2.08s/it]

[399] P16234 → 3 domain: PF07679, PF07714, PF25305


Memproses:  71%|█████████████████████████████████████████████████                    | 399/561 [15:20<05:39,  2.09s/it]

[400] P24864 → 2 domain: PF00134, PF02984


Memproses:  71%|█████████████████████████████████████████████████▏                   | 400/561 [15:23<05:52,  2.19s/it]

[401] Q92769 → 1 domain: PF00850


Memproses:  71%|█████████████████████████████████████████████████▎                   | 401/561 [15:25<05:48,  2.18s/it]

[402] P21730 → 1 domain: PF00001


Memproses:  72%|█████████████████████████████████████████████████▍                   | 402/561 [15:27<05:40,  2.14s/it]

[403] P09619 → 4 domain: PF00047, PF07714, PF13927, PF25305


Memproses:  72%|█████████████████████████████████████████████████▌                   | 403/561 [15:30<06:09,  2.34s/it]

[404] Q9HCR9 → 2 domain: PF00233, PF01590


Memproses:  72%|█████████████████████████████████████████████████▋                   | 404/561 [15:32<05:53,  2.25s/it]

[405] P15144 → 3 domain: PF01433, PF11838, PF17900


Memproses:  72%|█████████████████████████████████████████████████▊                   | 405/561 [15:34<05:41,  2.19s/it]

[406] P55899 → 2 domain: PF00129, PF07654


Memproses:  72%|█████████████████████████████████████████████████▉                   | 406/561 [15:36<05:38,  2.18s/it]

[407] Q96GG9 → 2 domain: PF03556, PF14555


Memproses:  73%|██████████████████████████████████████████████████                   | 407/561 [15:38<05:30,  2.15s/it]

[408] P47901 → 1 domain: PF00001


Memproses:  73%|██████████████████████████████████████████████████▏                  | 408/561 [15:40<05:24,  2.12s/it]

[409] Q16665 → 5 domain: PF00989, PF08447, PF08778, PF11413, PF23171


Memproses:  73%|██████████████████████████████████████████████████▎                  | 409/561 [15:42<05:19,  2.10s/it]

[410] Q5VWK5 → Tidak ditemukan domain Pfam.


Memproses:  73%|██████████████████████████████████████████████████▍                  | 410/561 [15:44<05:15,  2.09s/it]

[411] P43003 → 1 domain: PF00375


Memproses:  73%|██████████████████████████████████████████████████▌                  | 411/561 [15:46<05:12,  2.08s/it]

[412] P07195 → 2 domain: PF00056, PF02866


Memproses:  73%|██████████████████████████████████████████████████▋                  | 412/561 [15:48<05:10,  2.08s/it]

[413] P17948 → 8 domain: PF00047, PF07679, PF07714, PF13927, PF17988, PF21339, PF22854, PF22971


Memproses:  74%|██████████████████████████████████████████████████▊                  | 413/561 [15:51<05:08,  2.09s/it]

[414] P05121 → 1 domain: PF00079


Memproses:  74%|██████████████████████████████████████████████████▉                  | 414/561 [15:53<05:09,  2.11s/it]

[415] O00767 → Tidak ditemukan domain Pfam.


Memproses:  74%|███████████████████████████████████████████████████                  | 415/561 [15:55<05:06,  2.10s/it]

[416] P30530 → 3 domain: PF00041, PF07714, PF13927


Memproses:  74%|███████████████████████████████████████████████████▏                 | 416/561 [15:57<05:06,  2.11s/it]

[417] P55201 → 5 domain: PF00439, PF00855, PF10513, PF13831, PF13832


Memproses:  74%|███████████████████████████████████████████████████▎                 | 417/561 [15:59<05:03,  2.10s/it]

[418] Q9HBX9 → 3 domain: PF00001, PF00057, PF13855


Memproses:  75%|███████████████████████████████████████████████████▍                 | 418/561 [16:01<04:59,  2.09s/it]

[419] P06493 → 1 domain: PF00069


Memproses:  75%|███████████████████████████████████████████████████▌                 | 419/561 [16:03<04:55,  2.08s/it]

[420] P35790 → 1 domain: PF01633


Memproses:  75%|███████████████████████████████████████████████████▋                 | 420/561 [16:05<04:52,  2.08s/it]

[421] P56524 → 2 domain: PF00850, PF12203


Memproses:  75%|███████████████████████████████████████████████████▊                 | 421/561 [16:07<04:51,  2.08s/it]

[422] P43490 → 2 domain: PF04095, PF18127


Memproses:  75%|███████████████████████████████████████████████████▉                 | 422/561 [16:09<04:48,  2.08s/it]

[423] P21728 → 1 domain: PF00001


Memproses:  75%|████████████████████████████████████████████████████                 | 423/561 [16:11<04:47,  2.08s/it]

[424] P32246 → 1 domain: PF00001


Memproses:  76%|████████████████████████████████████████████████████▏                | 424/561 [16:14<04:48,  2.11s/it]

[425] Q08881 → 5 domain: PF00017, PF00018, PF00169, PF00779, PF07714


Memproses:  76%|████████████████████████████████████████████████████▎                | 425/561 [16:16<04:45,  2.10s/it]

[426] Q14432 → 1 domain: PF00233


Memproses:  76%|████████████████████████████████████████████████████▍                | 426/561 [16:18<04:42,  2.09s/it]

[427] Q5NUL3 → 1 domain: PF00001


Memproses:  76%|████████████████████████████████████████████████████▌                | 427/561 [16:20<04:43,  2.11s/it]

[428] Q92793 → 8 domain: PF00439, PF00569, PF02135, PF02172, PF06001, PF08214, PF09030, PF23570


Memproses:  76%|████████████████████████████████████████████████████▋                | 428/561 [16:22<04:40,  2.11s/it]

[429] P13569 → 3 domain: PF00005, PF00664, PF14396


Memproses:  76%|████████████████████████████████████████████████████▊                | 429/561 [16:24<04:37,  2.10s/it]

[430] Q99558 → 1 domain: PF00069


Memproses:  77%|████████████████████████████████████████████████████▉                | 430/561 [16:26<04:37,  2.12s/it]

[431] P43403 → 2 domain: PF00017, PF07714


Memproses:  77%|█████████████████████████████████████████████████████                | 431/561 [16:28<04:33,  2.10s/it]

[432] P52732 → 2 domain: PF00225, PF13931


Memproses:  77%|█████████████████████████████████████████████████████▏               | 432/561 [16:31<04:48,  2.23s/it]

[433] P49759 → 1 domain: PF00069


Memproses:  77%|█████████████████████████████████████████████████████▎               | 433/561 [16:33<04:38,  2.18s/it]

[434] Q9NUW8 → 1 domain: PF06087


Memproses:  77%|█████████████████████████████████████████████████████▍               | 434/561 [16:35<04:31,  2.14s/it]

[435] P22460 → 2 domain: PF00520, PF02214


Memproses:  78%|█████████████████████████████████████████████████████▌               | 435/561 [16:37<04:28,  2.13s/it]

[436] P21589 → 2 domain: PF00149, PF02872


Memproses:  78%|█████████████████████████████████████████████████████▋               | 436/561 [16:39<04:24,  2.12s/it]

[437] Q02127 → 1 domain: PF01180


Memproses:  78%|█████████████████████████████████████████████████████▋               | 437/561 [16:41<04:20,  2.10s/it]

[438] O75460 → 2 domain: PF00069, PF06479


Memproses:  78%|█████████████████████████████████████████████████████▊               | 438/561 [16:43<04:16,  2.09s/it]

[439] Q9UQL6 → 2 domain: PF00850, PF12203


Memproses:  78%|█████████████████████████████████████████████████████▉               | 439/561 [16:45<04:14,  2.08s/it]

[440] Q8WUI4 → 1 domain: PF00850


Memproses:  78%|██████████████████████████████████████████████████████               | 440/561 [16:48<04:13,  2.09s/it]

[441] P34947 → 2 domain: PF00069, PF00615


Memproses:  79%|██████████████████████████████████████████████████████▏              | 441/561 [16:50<04:11,  2.10s/it]

[442] O94925 → 3 domain: PF04960, PF12796, PF17959


Memproses:  79%|██████████████████████████████████████████████████████▎              | 442/561 [16:52<04:10,  2.10s/it]

[443] Q9UKV0 → 2 domain: PF00850, PF12203


Memproses:  79%|██████████████████████████████████████████████████████▍              | 443/561 [16:54<04:06,  2.09s/it]

[444] P00374 → 1 domain: PF00186


Memproses:  79%|██████████████████████████████████████████████████████▌              | 444/561 [16:56<04:04,  2.09s/it]

[445] P29275 → 1 domain: PF00001


Memproses:  79%|██████████████████████████████████████████████████████▋              | 445/561 [16:58<04:03,  2.10s/it]

[446] Q9Y5X4 → 2 domain: PF00104, PF00105


Memproses:  80%|██████████████████████████████████████████████████████▊              | 446/561 [17:00<04:01,  2.10s/it]

[447] P48736 → 5 domain: PF00454, PF00613, PF00792, PF00794, PF19710


Memproses:  80%|██████████████████████████████████████████████████████▉              | 447/561 [17:02<03:59,  2.10s/it]

[448] P08588 → 1 domain: PF00001


Memproses:  80%|███████████████████████████████████████████████████████              | 448/561 [17:04<03:57,  2.11s/it]

[449] P36507 → 1 domain: PF00069


Memproses:  80%|███████████████████████████████████████████████████████▏             | 449/561 [17:06<03:55,  2.10s/it]

[450] P49840 → 1 domain: PF00069


Memproses:  80%|███████████████████████████████████████████████████████▎             | 450/561 [17:09<03:53,  2.10s/it]

[451] Q9H244 → 1 domain: PF00001


Memproses:  80%|███████████████████████████████████████████████████████▍             | 451/561 [17:11<03:50,  2.09s/it]

[452] Q96GD4 → 1 domain: PF00069


Memproses:  81%|███████████████████████████████████████████████████████▌             | 452/561 [17:13<03:47,  2.08s/it]

[453] Q9Y2T6 → 1 domain: PF00001


Memproses:  81%|███████████████████████████████████████████████████████▋             | 453/561 [17:15<03:44,  2.08s/it]

[454] Q13627 → 1 domain: PF00069


Memproses:  81%|███████████████████████████████████████████████████████▊             | 454/561 [17:17<03:41,  2.07s/it]

[455] P19784 → 1 domain: PF00069


Memproses:  81%|███████████████████████████████████████████████████████▉             | 455/561 [17:19<03:39,  2.07s/it]

[456] Q05586 → 3 domain: PF00060, PF01094, PF10613


Memproses:  81%|████████████████████████████████████████████████████████             | 456/561 [17:21<03:36,  2.07s/it]

[457] Q9Y337 → 1 domain: PF00089


Memproses:  81%|████████████████████████████████████████████████████████▏            | 457/561 [17:23<03:35,  2.07s/it]

[458] P34972 → 1 domain: PF00001


Memproses:  82%|████████████████████████████████████████████████████████▎            | 458/561 [17:25<03:34,  2.08s/it]

[459] P00519 → 4 domain: PF00017, PF00018, PF07714, PF08919


Memproses:  82%|████████████████████████████████████████████████████████▍            | 459/561 [17:27<03:32,  2.09s/it]

[460] P08236 → 3 domain: PF00703, PF02836, PF02837


Memproses:  82%|████████████████████████████████████████████████████████▌            | 460/561 [17:29<03:31,  2.09s/it]

[461] P80365 → 1 domain: PF00106


Memproses:  82%|████████████████████████████████████████████████████████▋            | 461/561 [17:32<03:44,  2.25s/it]

[462] P50579 → 1 domain: PF00557


Memproses:  82%|████████████████████████████████████████████████████████▊            | 462/561 [17:34<03:36,  2.18s/it]

[463] Q14145 → 4 domain: PF00651, PF01344, PF07707, PF24681


Memproses:  83%|████████████████████████████████████████████████████████▉            | 463/561 [17:36<03:30,  2.14s/it]

[464] P10827 → 2 domain: PF00104, PF00105


Memproses:  83%|█████████████████████████████████████████████████████████            | 464/561 [17:38<03:26,  2.13s/it]

[465] Q99250 → 4 domain: PF00520, PF06512, PF11933, PF24609


Memproses:  83%|█████████████████████████████████████████████████████████▏           | 465/561 [17:40<03:22,  2.11s/it]

[466] Q58F21 → 3 domain: PF00439, PF17035, PF17105


Memproses:  83%|█████████████████████████████████████████████████████████▎           | 466/561 [17:42<03:19,  2.10s/it]

[467] Q9NS75 → 1 domain: PF00001


Memproses:  83%|█████████████████████████████████████████████████████████▍           | 467/561 [17:44<03:17,  2.10s/it]

[468] P35557 → 2 domain: PF00349, PF03727


Memproses:  83%|█████████████████████████████████████████████████████████▌           | 468/561 [17:46<03:14,  2.09s/it]

[469] O00206 → 4 domain: PF01582, PF12799, PF13516, PF13855


Memproses:  84%|█████████████████████████████████████████████████████████▋           | 469/561 [17:48<03:11,  2.09s/it]

[470] O00329 → 5 domain: PF00454, PF00613, PF00792, PF00794, PF02192


Memproses:  84%|█████████████████████████████████████████████████████████▊           | 470/561 [17:51<03:10,  2.09s/it]

[471] P24941 → 1 domain: PF00069


Memproses:  84%|█████████████████████████████████████████████████████████▉           | 471/561 [17:53<03:08,  2.09s/it]

[472] Q9UBE0 → 1 domain: PF00899


Memproses:  84%|██████████████████████████████████████████████████████████           | 472/561 [17:55<03:06,  2.10s/it]

[473] P33527 → 3 domain: PF00005, PF00664, PF24357


Memproses:  84%|██████████████████████████████████████████████████████████▏          | 473/561 [17:57<03:04,  2.10s/it]

[474] P28223 → 1 domain: PF00001


Memproses:  84%|██████████████████████████████████████████████████████████▎          | 474/561 [17:59<03:02,  2.10s/it]

[475] P11597 → 2 domain: PF01273, PF02886


Memproses:  85%|██████████████████████████████████████████████████████████▍          | 475/561 [18:01<03:00,  2.09s/it]

[476] O00519 → 1 domain: PF01425


Memproses:  85%|██████████████████████████████████████████████████████████▌          | 476/561 [18:03<02:59,  2.11s/it]

[477] Q14790 → 2 domain: PF00656, PF01335


Memproses:  85%|██████████████████████████████████████████████████████████▋          | 477/561 [18:05<02:58,  2.12s/it]

[478] P41597 → 1 domain: PF00001


Memproses:  85%|██████████████████████████████████████████████████████████▊          | 478/561 [18:07<02:54,  2.11s/it]

[479] O15054 → 3 domain: PF02373, PF21322, PF21326


Memproses:  85%|██████████████████████████████████████████████████████████▉          | 479/561 [18:10<02:52,  2.10s/it]

[480] P42338 → 5 domain: PF00454, PF00613, PF00792, PF00794, PF02192


Memproses:  86%|███████████████████████████████████████████████████████████          | 480/561 [18:12<02:48,  2.09s/it]

[481] Q9NP59 → 1 domain: PF06963


Memproses:  86%|███████████████████████████████████████████████████████████▏         | 481/561 [18:15<03:17,  2.47s/it]

[482] Q99683 → 5 domain: PF00069, PF13281, PF19039, PF20302, PF20309


Memproses:  86%|███████████████████████████████████████████████████████████▎         | 482/561 [18:17<03:05,  2.35s/it]

[483] P78540 → 1 domain: PF00491


Memproses:  86%|███████████████████████████████████████████████████████████▍         | 483/561 [18:19<03:03,  2.36s/it]

[484] P14416 → 1 domain: PF00001


Memproses:  86%|███████████████████████████████████████████████████████████▌         | 484/561 [18:21<02:54,  2.27s/it]

[485] Q92523 → 2 domain: PF00755, PF16484


Memproses:  86%|███████████████████████████████████████████████████████████▋         | 485/561 [18:24<02:48,  2.22s/it]

[486] P52895 → 1 domain: PF00248


Memproses:  87%|███████████████████████████████████████████████████████████▊         | 486/561 [18:26<02:44,  2.20s/it]

[487] P28065 → 1 domain: PF00227


Memproses:  87%|███████████████████████████████████████████████████████████▉         | 487/561 [18:28<02:41,  2.18s/it]

[488] Q09472 → 8 domain: PF00439, PF00569, PF02135, PF02172, PF06001, PF08214, PF09030, PF23570


Memproses:  87%|████████████████████████████████████████████████████████████         | 488/561 [18:30<02:37,  2.16s/it]

[489] Q12851 → 2 domain: PF00069, PF00780


Memproses:  87%|████████████████████████████████████████████████████████████▏        | 489/561 [18:32<02:35,  2.15s/it]

[490] P61964 → 1 domain: PF25175


Memproses:  87%|████████████████████████████████████████████████████████████▎        | 490/561 [18:34<02:32,  2.15s/it]

[491] P46095 → 1 domain: PF00001


Memproses:  88%|████████████████████████████████████████████████████████████▍        | 491/561 [18:36<02:28,  2.13s/it]

[492] Q16853 → 3 domain: PF01179, PF02727, PF02728


Memproses:  88%|████████████████████████████████████████████████████████████▌        | 492/561 [18:38<02:26,  2.13s/it]

[493] Q13133 → 2 domain: PF00104, PF00105


Memproses:  88%|████████████████████████████████████████████████████████████▋        | 493/561 [18:41<02:38,  2.33s/it]

[494] P04629 → 4 domain: PF07714, PF13855, PF16920, PF18613


Memproses:  88%|████████████████████████████████████████████████████████████▊        | 494/561 [18:43<02:30,  2.25s/it]

[495] O95977 → 1 domain: PF00001


Memproses:  88%|████████████████████████████████████████████████████████████▉        | 495/561 [18:45<02:25,  2.20s/it]

[496] P11509 → 1 domain: PF00067


Memproses:  88%|█████████████████████████████████████████████████████████████        | 496/561 [18:48<02:21,  2.18s/it]

[497] P51681 → 1 domain: PF00001


Memproses:  89%|█████████████████████████████████████████████████████████████▏       | 497/561 [18:50<02:18,  2.17s/it]

[498] Q9UK32 → 2 domain: PF00069, PF00433


Memproses:  89%|█████████████████████████████████████████████████████████████▎       | 498/561 [18:52<02:14,  2.13s/it]

[499] Q6L5J4 → 1 domain: PF00001


Memproses:  89%|█████████████████████████████████████████████████████████████▎       | 499/561 [18:55<02:28,  2.40s/it]

[500] P08908 → 1 domain: PF00001


Memproses:  89%|█████████████████████████████████████████████████████████████▍       | 500/561 [18:57<02:19,  2.29s/it]

[501] P62993 → 2 domain: PF00017, PF00018


Memproses:  89%|█████████████████████████████████████████████████████████████▌       | 501/561 [18:59<02:13,  2.23s/it]

[502] Q13526 → 2 domain: PF00397, PF00639


Memproses:  89%|█████████████████████████████████████████████████████████████▋       | 502/561 [19:01<02:08,  2.18s/it]

[503] Q8IU80 → 3 domain: PF00057, PF00089, PF01390


Memproses:  90%|█████████████████████████████████████████████████████████████▊       | 503/561 [19:03<02:05,  2.16s/it]

[504] P11229 → 1 domain: PF00001


Memproses:  90%|█████████████████████████████████████████████████████████████▉       | 504/561 [19:05<02:01,  2.13s/it]

[505] Q86WV6 → 2 domain: PF15009, PF23417


Memproses:  90%|██████████████████████████████████████████████████████████████       | 505/561 [19:07<01:57,  2.11s/it]

[506] Q2M2I8 → 1 domain: PF00069


Memproses:  90%|██████████████████████████████████████████████████████████████▏      | 506/561 [19:09<01:55,  2.11s/it]

[507] Q9NPC2 → 1 domain: PF07885


Memproses:  90%|██████████████████████████████████████████████████████████████▎      | 507/561 [19:11<01:53,  2.09s/it]

[508] P08473 → 2 domain: PF01431, PF05649


Memproses:  91%|██████████████████████████████████████████████████████████████▍      | 508/561 [19:13<01:50,  2.08s/it]

[509] Q9NYA1 → 1 domain: PF00781


Memproses:  91%|██████████████████████████████████████████████████████████████▌      | 509/561 [19:15<01:48,  2.08s/it]

[510] P17612 → 1 domain: PF00069


Memproses:  91%|██████████████████████████████████████████████████████████████▋      | 510/561 [19:18<01:45,  2.08s/it]

[511] O75385 → 3 domain: PF00069, PF12063, PF21127


Memproses:  91%|██████████████████████████████████████████████████████████████▊      | 511/561 [19:20<01:44,  2.09s/it]

[512] P43004 → 1 domain: PF00375


Memproses:  91%|██████████████████████████████████████████████████████████████▉      | 512/561 [19:22<01:42,  2.09s/it]

[513] Q9H2K8 → 1 domain: PF00069


Memproses:  91%|███████████████████████████████████████████████████████████████      | 513/561 [19:24<01:39,  2.08s/it]

[514] Q9NR97 → 2 domain: PF01582, PF13855


Memproses:  92%|███████████████████████████████████████████████████████████████▏     | 514/561 [19:26<01:37,  2.08s/it]

[515] P37173 → 2 domain: PF07714, PF08917


Memproses:  92%|███████████████████████████████████████████████████████████████▎     | 515/561 [19:28<01:35,  2.09s/it]

[516] Q9UIQ6 → 3 domain: PF01433, PF11838, PF17900


Memproses:  92%|███████████████████████████████████████████████████████████████▍     | 516/561 [19:30<01:33,  2.07s/it]

[517] Q9NSY1 → 2 domain: PF00069, PF15282


Memproses:  92%|███████████████████████████████████████████████████████████████▌     | 517/561 [19:32<01:31,  2.07s/it]

[518] P14867 → 2 domain: PF02931, PF02932


Memproses:  92%|███████████████████████████████████████████████████████████████▋     | 518/561 [19:35<01:38,  2.29s/it]

[519] Q9UKE5 → 2 domain: PF00069, PF00780


Memproses:  93%|███████████████████████████████████████████████████████████████▊     | 519/561 [19:37<01:33,  2.22s/it]

[520] Q16584 → 2 domain: PF07714, PF14604


Memproses:  93%|███████████████████████████████████████████████████████████████▉     | 520/561 [19:39<01:32,  2.25s/it]

[521] P12268 → 2 domain: PF00478, PF00571


Memproses:  93%|████████████████████████████████████████████████████████████████     | 521/561 [19:41<01:27,  2.20s/it]

[522] Q96DB2 → 1 domain: PF00850


Memproses:  93%|████████████████████████████████████████████████████████████████▏    | 522/561 [19:43<01:24,  2.16s/it]

[523] P34969 → 1 domain: PF00001


Memproses:  93%|████████████████████████████████████████████████████████████████▎    | 523/561 [19:45<01:21,  2.14s/it]

[524] O95819 → 2 domain: PF00069, PF00780


Memproses:  93%|████████████████████████████████████████████████████████████████▍    | 524/561 [19:48<01:18,  2.12s/it]

[525] P46098 → 2 domain: PF02931, PF02932


Memproses:  94%|████████████████████████████████████████████████████████████████▌    | 525/561 [19:50<01:16,  2.12s/it]

[526] Q13188 → 2 domain: PF00069, PF11629


Memproses:  94%|████████████████████████████████████████████████████████████████▋    | 526/561 [19:52<01:14,  2.12s/it]

[527] Q9BY41 → 1 domain: PF00850


Memproses:  94%|████████████████████████████████████████████████████████████████▊    | 527/561 [19:54<01:11,  2.11s/it]

[528] O00311 → 1 domain: PF00069


Memproses:  94%|████████████████████████████████████████████████████████████████▉    | 528/561 [19:56<01:09,  2.11s/it]

[529] P49760 → 1 domain: PF00069


Memproses:  94%|█████████████████████████████████████████████████████████████████    | 529/561 [19:58<01:07,  2.10s/it]

[530] P27361 → 1 domain: PF00069


Memproses:  94%|█████████████████████████████████████████████████████████████████▏   | 530/561 [20:00<01:07,  2.19s/it]

[531] P23975 → 1 domain: PF00209


Memproses:  95%|█████████████████████████████████████████████████████████████████▎   | 531/561 [20:03<01:04,  2.16s/it]

[532] P02545 → 2 domain: PF00038, PF00932


Memproses:  95%|█████████████████████████████████████████████████████████████████▍   | 532/561 [20:05<01:01,  2.14s/it]

[533] P31751 → 3 domain: PF00069, PF00169, PF00433


Memproses:  95%|█████████████████████████████████████████████████████████████████▌   | 533/561 [20:07<00:59,  2.12s/it]

[534] Q9Y243 → 3 domain: PF00069, PF00169, PF00433


Memproses:  95%|█████████████████████████████████████████████████████████████████▋   | 534/561 [20:09<00:56,  2.10s/it]

[535] Q969S8 → 1 domain: PF00850


Memproses:  95%|█████████████████████████████████████████████████████████████████▊   | 535/561 [20:11<00:54,  2.10s/it]

[536] P12821 → 1 domain: PF01401


Memproses:  96%|█████████████████████████████████████████████████████████████████▉   | 536/561 [20:13<00:54,  2.16s/it]

[537] P62937 → 1 domain: PF00160


Memproses:  96%|██████████████████████████████████████████████████████████████████   | 537/561 [20:15<00:51,  2.14s/it]

[538] P21397 → 1 domain: PF01593


Memproses:  96%|██████████████████████████████████████████████████████████████████▏  | 538/561 [20:17<00:48,  2.13s/it]

[539] P00492 → 1 domain: PF00156


Memproses:  96%|██████████████████████████████████████████████████████████████████▎  | 539/561 [20:19<00:46,  2.11s/it]

[540] P14902 → 1 domain: PF01231


Memproses:  96%|██████████████████████████████████████████████████████████████████▍  | 540/561 [20:22<00:44,  2.10s/it]

[541] P40763 → 5 domain: PF00017, PF01017, PF02864, PF02865, PF21354


Memproses:  96%|██████████████████████████████████████████████████████████████████▌  | 541/561 [20:24<00:42,  2.10s/it]

[542] O75762 → 3 domain: PF00023, PF00520, PF12796


Memproses:  97%|██████████████████████████████████████████████████████████████████▋  | 542/561 [20:26<00:39,  2.09s/it]

[543] Q9GZT9 → 2 domain: PF01753, PF13640


Memproses:  97%|██████████████████████████████████████████████████████████████████▊  | 543/561 [20:28<00:37,  2.10s/it]

[544] O15111 → 3 domain: PF00069, PF12179, PF18397


Memproses:  97%|██████████████████████████████████████████████████████████████████▉  | 544/561 [20:30<00:35,  2.09s/it]

[545] O95551 → 2 domain: PF03372, PF14555


Memproses:  97%|███████████████████████████████████████████████████████████████████  | 545/561 [20:32<00:35,  2.19s/it]

[546] P04070 → 3 domain: PF00089, PF00594, PF14670


Memproses:  97%|███████████████████████████████████████████████████████████████████▏ | 546/561 [20:34<00:32,  2.16s/it]

[547] Q15418 → 2 domain: PF00069, PF00433


Memproses:  98%|███████████████████████████████████████████████████████████████████▎ | 547/561 [20:37<00:33,  2.37s/it]

[548] O95180 → 1 domain: PF00520


Memproses:  98%|███████████████████████████████████████████████████████████████████▍ | 548/561 [20:39<00:29,  2.29s/it]

[549] P08238 → 2 domain: PF00183, PF13589


Memproses:  98%|███████████████████████████████████████████████████████████████████▌ | 549/561 [20:41<00:26,  2.21s/it]

[550] O15552 → 1 domain: PF00001


Memproses:  98%|███████████████████████████████████████████████████████████████████▋ | 550/561 [20:43<00:23,  2.18s/it]

[551] P48067 → 1 domain: PF00209


Memproses:  98%|███████████████████████████████████████████████████████████████████▊ | 551/561 [20:46<00:21,  2.14s/it]

[552] P27338 → 1 domain: PF01593


Memproses:  98%|███████████████████████████████████████████████████████████████████▉ | 552/561 [20:48<00:19,  2.13s/it]

[553] O00748 → 1 domain: PF00135


Memproses:  99%|████████████████████████████████████████████████████████████████████ | 553/561 [20:50<00:16,  2.10s/it]

[554] Q92847 → 1 domain: PF00001


Memproses:  99%|████████████████████████████████████████████████████████████████████▏| 554/561 [20:52<00:15,  2.19s/it]

[555] O60725 → 1 domain: PF04140


Memproses:  99%|████████████████████████████████████████████████████████████████████▎| 555/561 [20:54<00:13,  2.18s/it]

[556] P32245 → 1 domain: PF00001


Memproses:  99%|████████████████████████████████████████████████████████████████████▍| 556/561 [20:56<00:10,  2.15s/it]

[557] Q6P988 → 1 domain: PF03283


Memproses:  99%|████████████████████████████████████████████████████████████████████▌| 557/561 [20:58<00:08,  2.13s/it]

[558] P17252 → 4 domain: PF00069, PF00130, PF00168, PF00433


Memproses:  99%|████████████████████████████████████████████████████████████████████▋| 558/561 [21:00<00:06,  2.12s/it]

[559] P09467 → 2 domain: PF00316, PF18913


Memproses: 100%|████████████████████████████████████████████████████████████████████▊| 559/561 [21:03<00:04,  2.11s/it]

[560] P00746 → 1 domain: PF00089


Memproses: 100%|████████████████████████████████████████████████████████████████████▉| 560/561 [21:05<00:02,  2.11s/it]

[561] P29375 → 7 domain: PF00628, PF01388, PF02373, PF02375, PF02928, PF08429, PF21323


Memproses: 100%|█████████████████████████████████████████████████████████████████████| 561/561 [21:07<00:00,  2.26s/it]



✅ Pemetaan selesai! Hasil disimpan di: D:/semhas/projek/TargetProtein_PFam.xlsx
✅ Daftar semua UniProt ID disimpan di: D:/semhas/projek/All_UniProt_IDs.xlsx


In [6]:
# periksa duplikat UniPOrtId --> PfamId

In [22]:
import pandas as pd

# 1. Baca file Excel
file_path = 'D:/semhas/projek/TargetProtein_PFam.xlsx'
df = pd.read_excel(file_path)

# 2. Tampilkan nama kolom untuk memastikan struktur
print("Kolom tersedia:", df.columns.tolist())

# Misalnya kolom bernama 'UniProt_ID' dan 'Pfam_ID' (ubah sesuai nama aktual kolom)
uniprot_col = 'UniProt_ID'
pfam_col = 'Pfam_ID'

# 3. Simpan kolom Pfam_ID asli ke kolom baru
df['Pfam_ID_Original'] = df[pfam_col]

# 4. Bersihkan kolom Pfam_ID → Hapus versi (contoh: PF00069.23 → PF00069)
df[pfam_col] = df[pfam_col].astype(str).str.extract(r'^(PF\d{5})')

# 5. Pindahkan 'Pfam_ID_Original' sebelum 'Pfam_ID'
cols = df.columns.tolist()
pfam_idx = cols.index(pfam_col)
# Hapus kolom dulu agar bisa disisipkan ulang
cols.remove('Pfam_ID_Original')
# Sisipkan di posisi sebelum 'Pfam_ID'
cols.insert(pfam_idx, 'Pfam_ID_Original')
df = df[cols]

# 6. Tampilkan jumlah total baris
print("Jumlah total relasi (termasuk duplikat):", len(df))

# 7. Cek jumlah relasi unik
unique_pairs = df[[uniprot_col, pfam_col]].drop_duplicates()
print("Jumlah relasi unik UniProt–Pfam:", len(unique_pairs))

# 8. Tampilkan duplikat (jika ingin dicek lebih lanjut)
duplicates = df[df.duplicated(subset=[uniprot_col, pfam_col], keep=False)]
print("Jumlah relasi duplikat:", len(duplicates))
print("\nContoh relasi duplikat:")
print(duplicates.head())

# 9. (Opsional) Simpan relasi unik ke file baru
unique_output = df.drop_duplicates(subset=[uniprot_col, pfam_col])
unique_output.to_excel('D:/semhas/projek/TargetProtein_PFam_Unique.xlsx', index=False)


Kolom tersedia: ['UniProt_ID', 'Pfam_ID']
Jumlah total relasi (termasuk duplikat): 1056
Jumlah relasi unik UniProt–Pfam: 1056
Jumlah relasi duplikat: 0

Contoh relasi duplikat:
Empty DataFrame
Columns: [UniProt_ID, Pfam_ID_Original, Pfam_ID]
Index: []


In [None]:
# Mapping PFAM --> DDI

In [5]:
import pandas as pd
import re

# === 1. BACA DAN BERSIHKAN DOMAIN ===
file_domain = "D:/semhas/projek/TargetProtein_PFam.xlsx"
df_pfam = pd.read_excel(file_domain)

# Ambil dan bersihkan ID domain (PFxxxxx)
df_pfam['Pfam_ID_Clean'] = df_pfam['Pfam_ID'].astype(str).str.extract(r'(PF\d{5})')
domain_unik = set(df_pfam['Pfam_ID_Clean'].dropna().unique())

print(f"Jumlah domain unik dari protein target: {len(domain_unik)}")

# === 2. PARSE FILE 3DID ===
file_3did = "D:/semhas/projek/3did_flat_Mar_3_2025.dat"
ddi_all = []

with open(file_3did, "r") as f:
    for line in f:
        if line.startswith("#=ID"):
            match = re.findall(r'PF\d{5}', line)
            if len(match) == 2:
                d1, d2 = match
                ddi_all.append((d1, d2))

print(f"Jumlah total pasangan DDI di 3DID: {len(ddi_all)}")

# === 3. COCOKKAN DENGAN DOMAIN UNIK ===
ddi_relevan = [(d1, d2) for d1, d2 in ddi_all if d1 in domain_unik and d2 in domain_unik]

print(f"Jumlah pasangan DDI yang relevan: {len(ddi_relevan)}")

# === 4. SIMPAN KE EXCEL ===
df_ddi_relevan = pd.DataFrame(ddi_relevan, columns=["Domain_1", "Domain_2"])
output_path = "D:/semhas/projek/DDI_Relevan.xlsx"
df_ddi_relevan.to_excel(output_path, index=False)

print(f"✅ DDI relevan disimpan di: {output_path}")


Jumlah domain unik dari protein target: 478
Jumlah total pasangan DDI di 3DID: 20644
Jumlah pasangan DDI yang relevan: 827
✅ DDI relevan disimpan di: D:/semhas/projek/DDI_Relevan.xlsx


In [4]:
import pandas as pd
import re

# === 1. BACA DAN BERSIHKAN DOMAIN ===
file_domain = "D:/semhas/projek/TargetProtein_PFam.xlsx"
df_pfam = pd.read_excel(file_domain)

# Ambil dan bersihkan ID domain (PFxxxxx)
df_pfam['Pfam_ID_Clean'] = df_pfam['Pfam_ID'].astype(str).str.extract(r'(PF\d{5})')
domain_unik = df_pfam['Pfam_ID_Clean'].dropna().unique()

print(f"Jumlah domain unik dari protein target: {len(domain_unik)}")

# === Simpan domain unik ke Excel ===
df_domain_unik = pd.DataFrame(domain_unik, columns=['Domain_Unik'])
output_domain = "D:/semhas/projek/Domain_Unik_Protein.xlsx"
df_domain_unik.to_excel(output_domain, index=False)
print(f"✅ Domain unik disimpan di: {output_domain}")

# === 2. PARSE FILE 3DID ===
file_3did = "D:/semhas/projek/3did_flat_Mar_3_2025.dat"
ddi_all = []

with open(file_3did, "r") as f:
    for line in f:
        if line.startswith("#=ID"):
            match = re.findall(r'PF\d{5}', line)
            if len(match) == 2:
                d1, d2 = match
                ddi_all.append((d1, d2))

print(f"Jumlah total pasangan DDI di 3DID: {len(ddi_all)}")

# === 3. COCOKKAN DENGAN DOMAIN UNIK ===
set_domain_unik = set(domain_unik)
ddi_relevan = [(d1, d2) for d1, d2 in ddi_all if d1 in set_domain_unik and d2 in set_domain_unik]

print(f"Jumlah pasangan DDI yang relevan: {len(ddi_relevan)}")

# === 4. SIMPAN DDI RELEVAN KE EXCEL ===
df_ddi_relevan = pd.DataFrame(ddi_relevan, columns=["Domain_1", "Domain_2"])
output_ddi = "D:/semhas/projek/DDI_Relevan.xlsx"
df_ddi_relevan.to_excel(output_ddi, index=False)

print(f"✅ DDI relevan disimpan di: {output_ddi}")


Jumlah domain unik dari protein target: 478
✅ Domain unik disimpan di: D:/semhas/projek/Domain_Unik_Protein.xlsx
Jumlah total pasangan DDI di 3DID: 20644
Jumlah pasangan DDI yang relevan: 827
✅ DDI relevan disimpan di: D:/semhas/projek/DDI_Relevan.xlsx


In [None]:
# Menentukan PDA

In [19]:
import pandas as pd
from itertools import product

# 1. Load data
ppi_df = pd.read_csv("D:/semhas/projek/string_interactions_short.tsv", sep="\t")
pfam_df = pd.read_excel("D:/semhas/projek/TargetProtein_PFam.xlsx")
ddi_df = pd.read_excel("D:/semhas/projek/DDI_Pasangan_Relevan.xlsx")
mapping_df = pd.read_csv("D:/semhas/projek/9606.protein.aliases.v12.0.txt", sep='\t', header=None, names=["string_id", "alias", "source"])

# 2. Filter mapping dari UniProt
mapping_df = mapping_df[mapping_df['source'] == 'Ensembl_UniProt']
enspid_to_uniprot = dict(zip(mapping_df["string_id"], mapping_df["alias"]))

# 3. Mapping UniProt ke PPI
ppi_df["Protein_1"] = ppi_df["node1_string_id"].map(enspid_to_uniprot)
ppi_df["Protein_2"] = ppi_df["node2_string_id"].map(enspid_to_uniprot)
ppi_df = ppi_df.dropna(subset=["Protein_1", "Protein_2"])

# 4. Preprocessing domain dan DDI
pfam_df['Pfam_ID'] = pfam_df['Pfam_ID'].str.split('.').str[0]
ddi_df['Domain_1'] = ddi_df['Domain_1'].str.split('.').str[0]
ddi_df['Domain_2'] = ddi_df['Domain_2'].str.split('.').str[0]
ddi_pairs = set(tuple(sorted([a, b])) for a, b in zip(ddi_df['Domain_1'], ddi_df['Domain_2']))

# 5. Mapping protein ke daftar domain
pfam_dict = pfam_df.groupby("UniProt_ID")["Pfam_ID"].apply(list).to_dict()

# 6. Gabungkan semua PPI + cek interaksi domain
rows = []
for _, row in ppi_df.iterrows():
    p1, p2 = row["Protein_1"], row["Protein_2"]
    domains1 = pfam_dict.get(p1, [])
    domains2 = pfam_dict.get(p2, [])
    
    # Cek apakah minimal satu pasangan domain saling berinteraksi
    interaksi_ditemukan = any(tuple(sorted([d1, d2])) in ddi_pairs for d1, d2 in product(domains1, domains2))
    
    rows.append({
        "Protein_1": p1,
        "Protein_2": p2,
        "Domains_Protein_1": ", ".join(domains1) if domains1 else "-",
        "Domains_Protein_2": ", ".join(domains2) if domains2 else "-",
        "Interaksi_Domain": "Yes" if interaksi_ditemukan else "No"
    })

# 7. Simpan hasil
hasil_df = pd.DataFrame(rows)
output_file = "D:/semhas/projek/PPI_DDI_Merged.xlsx"
hasil_df.to_excel(output_file, index=False)

print(f"Data gabungan disimpan di: {output_file}")
print("Jumlah total pasangan PPI:", len(hasil_df))
print("Jumlah pasangan dengan interaksi domain:", (hasil_df["Interaksi_Domain"] == "Yes").sum())


Data gabungan disimpan di: D:/semhas/projek/PPI_DDI_Merged.xlsx
Jumlah total pasangan PPI: 3880
Jumlah pasangan dengan interaksi domain: 340


In [18]:
import pandas as pd
from itertools import product

# 1. Load data
ppi_df = pd.read_csv("D:/semhas/projek/string_interactions_short.tsv", sep="\t")
pfam_df = pd.read_excel("D:/semhas/projek/TargetProtein_PFam.xlsx")
ddi_df = pd.read_excel("D:/semhas/projek/DDI_Pasangan_Relevan.xlsx")
mapping_df = pd.read_csv("D:/semhas/projek/9606.protein.aliases.v12.0.txt", sep='\t', header=None, names=["string_id", "alias", "source"])

# 2. Filter mapping hanya dari UniProt
mapping_df = mapping_df[mapping_df['source'] == 'Ensembl_UniProt']

# 3. Buat mapping ENSP → UniProt
enspid_to_uniprot = dict(zip(mapping_df["string_id"], mapping_df["alias"]))

# 4. Tambahkan UniProt ID ke PPI
ppi_df["UniProt_1"] = ppi_df["node1_string_id"].map(enspid_to_uniprot)
ppi_df["UniProt_2"] = ppi_df["node2_string_id"].map(enspid_to_uniprot)

# 5. Preprocessing domain
ddi_df['Domain_1'] = ddi_df['Domain_1'].str.split('.').str[0]
ddi_df['Domain_2'] = ddi_df['Domain_2'].str.split('.').str[0]
pfam_df['Pfam_ID'] = pfam_df['Pfam_ID'].str.split('.').str[0]

# 6. Buat set DDI
ddi_pairs = set(tuple(sorted([a, b])) for a, b in zip(ddi_df['Domain_1'], ddi_df['Domain_2']))

# 7. Buat dict protein → domain
pfam_dict = pfam_df.groupby("UniProt_ID")["Pfam_ID"].apply(list).to_dict()

# 8. Cek PDA
pda_rows = []
for _, row in ppi_df.iterrows():
    p1, p2 = row["UniProt_1"], row["UniProt_2"]
    domains1 = pfam_dict.get(p1, [])
    domains2 = pfam_dict.get(p2, [])
    for d1, d2 in product(domains1, domains2):
        if tuple(sorted([d1, d2])) in ddi_pairs:
            pda_rows.append({
                "Protein_1": p1,
                "Protein_2": p2,
                "Domain_1": d1,
                "Domain_2": d2
            })
            break

# 9. Hasil
pda_df = pd.DataFrame(pda_rows)

# Hitung jumlah pasangan protein unik
jumlah_pasangan_protein = len(pda_df[['Protein_1', 'Protein_2']].drop_duplicates())

# Hitung jumlah pasangan domain unik
jumlah_pasangan_domain = len(pda_df[['Domain_1', 'Domain_2']].drop_duplicates())

print("Jumlah pasangan PDA yang valid (protein-domain):", len(pda_df))
print("Jumlah pasangan protein–protein unik:", jumlah_pasangan_protein)
print("Jumlah pasangan domain–domain unik:", jumlah_pasangan_domain)
print(pda_df.head())

# 10. Simpan ke file Excel
output_path = "D:/semhas/projek/Hasil_PDA.xlsx"
pda_df.to_excel(output_path, index=False)
print(f"Hasil PDA disimpan di: {output_path}")



Jumlah pasangan PDA yang valid (protein-domain): 340
Jumlah pasangan protein–protein unik: 340
Jumlah pasangan domain–domain unik: 79
  Protein_1 Protein_2 Domain_1 Domain_2
0    Q2M2I8    P08172  PF00069  PF00001
1    Q9UKU7    P28330  PF00441  PF00441
2    Q8WXI4    Q9NPJ3  PF03061  PF03061
3    P36896    P36894  PF00069  PF00069
4    P36896    P18075  PF01064  PF00019
Hasil PDA disimpan di: D:/semhas/projek/Hasil_PDA.xlsx
