In [1]:
import pandas as pd
from collections import defaultdict
import os
from tqdm import tqdm

In [2]:
directory = "miTargets/"
if not os.path.exists(directory):
    os.makedirs(directory)

In [3]:
IDs = list(set(open('Input_list.txt').read().splitlines()))
IDs.append('hsa-miR-1275')
IDs

['hsa-let-7a-2', 'hsa-let-7b', 'hsa-let-7a-1', 'hsa-let-7a-3', 'hsa-miR-1275']

In [4]:
# ID = "hsa-let-7a-1"


def Fetch_miRNA_Target(ID):

    URL = f"https://mirbase.org/cgi-bin/mirna_entry.pl?acc={ID}"
    ##################3

    dfs = pd.read_html(URL)
    
    # print(dfs)

    #########################


    # ID = {'ID':''}
    Mat = defaultdict(dict)

    Mat_temp = ''

    for i in range(len(dfs)):
        # print(">", i)

        M3 = ''
        M5 = ''

        df = dfs[i]
        Data = df.values.tolist()
        for J in range(len(Data)):
            j = Data[J]

            if len(j)==2:
                if "Stem-loop sequence" in j[0] and "Stem-loop sequence" in j[1]:
                    Mat['ID'] = j[0].replace('Stem-loop sequence ', '')
                if "Mature sequence" in j[0] and "Mature sequence" in j[1]:
                    Mat_temp = j[0].replace('Mature sequence ', '')
                    # print(Mat_temp)
                    Acc = Data[J+1][1]
                    # print(Acc)
                    Mat[Mat_temp] = Acc

                    if Mat_temp.endswith('5p'):
                        Mat['5p'] = Acc
                    if Mat_temp.endswith('3p'):
                        Mat['3p'] = Acc
    ########################

    df = pd.DataFrame.from_dict(Mat, orient='index')
    
    if '5p' in Mat and '3p' in Mat:

        URL_5 = f"https://mirdb.org/cgi-bin/search.cgi?searchType=miRNA&full=mirbase&searchBox={Mat['5p']}"
        URL_3 = f"https://mirdb.org/cgi-bin/search.cgi?searchType=miRNA&full=mirbase&searchBox={Mat['3p']}"


        df_5 = pd.read_html(URL_5)[0]
        df_3 = pd.read_html(URL_3)[0]

        df_3.columns = df_3.iloc[0]
        df_3 = df_3[1:]
        # df_3

        df_5.columns = df_5.iloc[0]
        df_5 = df_5[1:]
        # df_5

        # df_5 = df_5.columns = df.iloc[0]
        # df_3 = df_3.columns = df.iloc[0]

        return df, df_3, df_5, Mat
    elif len(Mat) == 2:
        Key = [i for i in Mat if i != 'ID'][0]
        MirID = Mat[Key]
        URL = f"https://mirdb.org/cgi-bin/search.cgi?searchType=miRNA&full=mirbase&searchBox={MirID}"
        df_m = pd.read_html(URL)[0]
        df_m.columns = df_m.iloc[0]
        df_m = df_m[1:]
        
        return df, df_m, Mat

# _ = Fetch_miRNA_Target(ID)

# df, df_3, df_5, Mat = Fetch_miRNA_Target('hsa-miR-1275')

# df, df_3, df_5, Mat = Fetch_miRNA_Target('hsa-miR-1275')

In [5]:
Dic = defaultdict(list)

def To_dic(df):
    LOL = df[['Target Score', 'miRNA Name', 'Gene Symbol']].values
    for S,M,G  in LOL:
        Dic[G].append((M,S))
    return Dic

In [6]:
def main(ID):
    print(ID)
    try:
        Data = Fetch_miRNA_Target(ID)
        if len(Data) == 4:
            df, df_3, df_5, Mat = Data
            if len(df):
                Info_F = f"{directory}{Mat['ID']}_info.csv"
                df.to_csv(Info_F, index=True, header=False)

                p3_F = f"{directory}{Mat['ID']}_3p_{Mat['3p']}_targets.tsv"
                df_3.to_csv(p3_F, index=False, sep="\t")
                To_dic(df_3)


                p5_F = f"{directory}{Mat['ID']}_5p_{Mat['5p']}_targets.tsv"
                df_5.to_csv(p5_F, index=False, sep="\t")
                To_dic(df_5)
                
            else:
                print(f"{ID} Error")
                
        if len(Data) == 3:
            df, df_M, Mat = Data
            Key = [i for i in Mat if i != 'ID'][0]
            MirID = Mat[Key]
            if len(df):
                Info_F = f"{directory}{Mat['ID']}_info.csv"
                df.to_csv(Info_F, index=True, header=False)

                pm_F = f"{directory}{Mat['ID']}_{MirID}_targets.tsv"
                df_M.to_csv(pm_F, index=False, sep="\t")
                To_dic(df_M)

            else:
                print(f"{ID} Error")
                
    except Exception as e: # work on python 2.x
        print(e)
    # except:
    #     print(f"Error {ID}")
    
# main('hsa-miR-1275')

In [7]:
# df, df_3, df_5, Mat = Fetch_miRNA_Target('hsa-let-7a-3')
# list(df.index.tolist())

In [8]:
# df, df_m, Mat = Fetch_miRNA_Target('hsa-miR-1275')

In [9]:
for i in tqdm(range(len(IDs))):
    ID = IDs[i]
    main(ID)   

  0%|                                                     | 0/5 [00:00<?, ?it/s]

hsa-let-7a-2


 20%|█████████                                    | 1/5 [00:03<00:14,  3.54s/it]

hsa-let-7b


 40%|██████████████████                           | 2/5 [00:05<00:08,  2.82s/it]

hsa-let-7a-1


 60%|███████████████████████████                  | 3/5 [00:08<00:05,  2.61s/it]

hsa-let-7a-3


 80%|████████████████████████████████████         | 4/5 [00:10<00:02,  2.52s/it]

hsa-miR-1275


100%|█████████████████████████████████████████████| 5/5 [00:12<00:00,  2.40s/it]


In [10]:
!ls {directory}

hsa-let-7a-1_3p_MIMAT0004481_targets.tsv
hsa-let-7a-1_5p_MIMAT0000062_targets.tsv
hsa-let-7a-1_info.csv
hsa-let-7a-2_3p_MIMAT0010195_targets.tsv
hsa-let-7a-2_5p_MIMAT0000062_targets.tsv
hsa-let-7a-2_info.csv
hsa-let-7a-3_3p_MIMAT0004481_targets.tsv
hsa-let-7a-3_5p_MIMAT0000062_targets.tsv
hsa-let-7a-3_info.csv
hsa-let-7b_3p_MIMAT0004482_targets.tsv
hsa-let-7b_5p_MIMAT0000063_targets.tsv
hsa-let-7b_info.csv
hsa-mir-1275_info.csv
hsa-mir-1275_MIMAT0005929_targets.tsv


In [13]:
import json

with open("miRNA_map.json", "w") as outfile:
    json.dump(Dic, outfile)

In [14]:
print(len(Dic))

4246


In [15]:
Dic['ARID4B']

[('hsa-let-7a-2-3p', '80'),
 ('hsa-let-7b-3p', '100'),
 ('hsa-let-7a-3p', '100'),
 ('hsa-let-7a-3p', '100')]

In [None]:
## Network

In [21]:
fh = open('miRNA_Target_network.csv', 'w')
print("Symbol", 'miRNA', 'Score', sep = ",", file=fh)


for Gene in Dic:
    for MI,S in Dic[Gene]:
        # print(Gene, MI, S)
        print(Gene, MI, S, sep = ",", file=fh)
fh.close()

# END