In [1]:
import pandas as pd
import requests
import re

In [2]:
mapped_ids = pd.read_csv('uniprot-entry-names.csv')
mapped_ids.head()

Unnamed: 0,gene_name,entry_name
0,PLA2G10,PA2GX_HUMAN
1,PLA2G10,H3BRW4_HUMAN
2,PLA2G10,L8E7V2_HUMAN
3,FDFT1,FDFT_HUMAN
4,FDFT1,E9PNM1_HUMAN


#### Selecting only Uniprot IDs that are also SwissProt IDs i.e. have been manually reviewed

In [3]:
def filter_dataframe(df):
    base_url = "https://www.genome.jp/entry/sp:"
    # list to keep track of rows to keep
    rows_to_keep = []
    df = df.dropna()
    for index, row in df.iterrows():
        uniprot_id = row['entry_name']
        url = f"{base_url}{uniprot_id}"
        response = requests.get(url)
        if "No such data was found." not in response.text:
            rows_to_keep.append(index)
            print("SwissProt exists for: ", uniprot_id)
        # else:
            # print("SwissProt DOES NOT exist for: ", uniprot_id)
    # filter the DataFrame to keep only the desired rows
    filtered_df = df.loc[rows_to_keep].reset_index(drop=True)
    return filtered_df

In [4]:
mapped_ids_copy = mapped_ids.copy()

In [5]:
mapped_ids_swissprot = filter_dataframe(mapped_ids_copy)

SwissProt exists for:  PA2GX_HUMAN
SwissProt exists for:  FDFT_HUMAN
SwissProt exists for:  CEGT_HUMAN
SwissProt exists for:  CP1A2_HUMAN
SwissProt exists for:  SYRM_HUMAN
SwissProt exists for:  GBA3_HUMAN
SwissProt exists for:  APT_HUMAN
SwissProt exists for:  METH_HUMAN
SwissProt exists for:  UDB11_HUMAN
SwissProt exists for:  HMOX1_HUMAN
SwissProt exists for:  CPT1A_HUMAN
SwissProt exists for:  GSH0_HUMAN
SwissProt exists for:  SIA7A_HUMAN
SwissProt exists for:  SNAT_HUMAN
SwissProt exists for:  PCCA_HUMAN
SwissProt exists for:  PPA6_HUMAN
SwissProt exists for:  SYTM_HUMAN
SwissProt exists for:  CP1B1_HUMAN
SwissProt exists for:  DOPP1_HUMAN
SwissProt exists for:  GSTA2_HUMAN
SwissProt exists for:  EBP_HUMAN
SwissProt exists for:  UDB28_HUMAN
SwissProt exists for:  KHK_HUMAN
SwissProt exists for:  MVD1_HUMAN
SwissProt exists for:  PRI2_HUMAN
SwissProt exists for:  PTSS1_HUMAN
SwissProt exists for:  PPT1_HUMAN
SwissProt exists for:  RIFK_HUMAN
SwissProt exists for:  KITH_HUMAN
SwissP

SwissProt exists for:  PA24B_HUMAN
SwissProt exists for:  CPT1C_HUMAN
SwissProt exists for:  PDE8B_HUMAN
SwissProt exists for:  ST1A3_HUMAN
SwissProt exists for:  HEXA_HUMAN
SwissProt exists for:  ST2A1_HUMAN
SwissProt exists for:  OAT_HUMAN
SwissProt exists for:  KBL_HUMAN
SwissProt exists for:  ADHX_HUMAN
SwissProt exists for:  HEM0_HUMAN
SwissProt exists for:  MOGS_HUMAN
SwissProt exists for:  ACOX2_HUMAN
SwissProt exists for:  ALDOC_HUMAN
SwissProt exists for:  LDH6A_HUMAN
SwissProt exists for:  DPOE3_HUMAN
SwissProt exists for:  NSDHL_HUMAN
SwissProt exists for:  ADH1A_HUMAN
SwissProt exists for:  S27A5_HUMAN
SwissProt exists for:  5NT1A_HUMAN
SwissProt exists for:  ACER3_HUMAN
SwissProt exists for:  FTCD_HUMAN
SwissProt exists for:  ADPPT_HUMAN
SwissProt exists for:  KAT1_HUMAN
SwissProt exists for:  THTPA_HUMAN
SwissProt exists for:  INMT_HUMAN
SwissProt exists for:  MAON_HUMAN
SwissProt exists for:  ALG2_HUMAN
SwissProt exists for:  EST5A_HUMAN
SwissProt exists for:  CLAT_HUMAN

SwissProt exists for:  ODBA_HUMAN
SwissProt exists for:  DLDH_HUMAN
SwissProt exists for:  MGT5B_HUMAN
SwissProt exists for:  CP7A1_HUMAN
SwissProt exists for:  GLSK_HUMAN
SwissProt exists for:  ACADM_HUMAN
SwissProt exists for:  GSTK1_HUMAN
SwissProt exists for:  AL3A1_HUMAN
SwissProt exists for:  TRXR2_HUMAN
SwissProt exists for:  PGM2L_HUMAN
SwissProt exists for:  CP27A_HUMAN
SwissProt exists for:  PI42B_HUMAN
SwissProt exists for:  BODG_HUMAN
SwissProt exists for:  MAOX_HUMAN
SwissProt exists for:  DPOE1_HUMAN
SwissProt exists for:  UD15_HUMAN
SwissProt exists for:  IMDH2_HUMAN
SwissProt exists for:  B4GT2_HUMAN
SwissProt exists for:  PANK1_HUMAN
SwissProt exists for:  PGM1_HUMAN
SwissProt exists for:  RPIA_HUMAN
SwissProt exists for:  ENTP3_HUMAN
SwissProt exists for:  MA1B1_HUMAN
SwissProt exists for:  UXS1_HUMAN
SwissProt exists for:  CP17A_HUMAN
SwissProt exists for:  B4GN1_HUMAN
SwissProt exists for:  GALC_HUMAN
SwissProt exists for:  PIGC_HUMAN
SwissProt exists for:  SIA10_HU

SwissProt exists for:  EXT1_HUMAN
SwissProt exists for:  GMDS_HUMAN
SwissProt exists for:  HGNAT_HUMAN
SwissProt exists for:  AMPL_HUMAN
SwissProt exists for:  CGAT2_HUMAN
SwissProt exists for:  PA2GC_HUMAN
SwissProt exists for:  TYRO_HUMAN
SwissProt exists for:  OXLA_HUMAN
SwissProt exists for:  CDIPT_HUMAN
SwissProt exists for:  DHB12_HUMAN
SwissProt exists for:  ADH1B_HUMAN
SwissProt exists for:  SAST_HUMAN
SwissProt exists for:  ITPA_HUMAN
SwissProt exists for:  PLPP3_HUMAN
SwissProt exists for:  DDC_HUMAN
SwissProt exists for:  SIA7C_HUMAN
SwissProt exists for:  PDE4B_HUMAN
SwissProt exists for:  SYIM_HUMAN
SwissProt exists for:  SRR_HUMAN
SwissProt exists for:  PI4KB_HUMAN
SwissProt exists for:  GPD1L_HUMAN
SwissProt exists for:  MGT5A_HUMAN
SwissProt exists for:  CPT2_HUMAN
SwissProt exists for:  HUTH_HUMAN
SwissProt exists for:  NEUR3_HUMAN
SwissProt exists for:  KYNU_HUMAN
SwissProt exists for:  B4GT1_HUMAN
SwissProt exists for:  DR4L2_HUMAN
SwissProt exists for:  STT3B_HUMAN


In [8]:
print(len(mapped_ids_swissprot))
print(len(mapped_ids))

871
6413


In [9]:
mapped_ids_swissprot.head()

Unnamed: 0,gene_name,entry_name
0,PLA2G10,PA2GX_HUMAN
1,FDFT1,FDFT_HUMAN
2,UGCG,CEGT_HUMAN
3,CYP1A2,CP1A2_HUMAN
4,RARS2,SYRM_HUMAN


In [11]:
mapped_ids_swissprot.to_csv('mapped_ids_reviewed.csv',sep='\t',index=False)