In [None]:
import json
import requests
import pandas as pd

In [None]:
# AlphaFind endpoint 
api='https://api.stage.alphafind-ted.dyn.cloud.e-infra.cz/search'

# TED endpoint
tedapi='https://ted.cathdb.info/api/v1' 
tedfiles=tedapi+'/files'

In [None]:
# limit the AlphaFind search
domain_limit=20

In [None]:
# our sample query
query='AF-A0A7L0KP91-F1-model_v4'

In [None]:
# look into TED database how many domains there are for qry
def count_domains(qry):
    up = qry.split('-')[1]
    j = requests.get(f'{tedapi}/uniprot/summary/{up}').json()
    return len(j['data'])

In [None]:
domains = count_domains(query)

In [None]:
# query AlphaFind
# run repeatedly until all results are returned
bag = [ 
    requests.get(f'{api}?query={query}_TED{domain:02d}&limit={domain_limit}').json()['results']
    for domain in range(1,domains+1)
]
[ len(b) for b in bag ]

In [None]:
# arrange results into a dict with target structures as key, listing matching domain pairs for each
tdom = {}
for i,qd in enumerate(bag):
    for td in qd:
        s = td['object_id'].split('_')
        target = '_'.join(s[:-1])
        domain=s[-1]
        if not target in tdom: tdom[target] = {}
        tdom[target][domain] = { 'query_domain' : f'TED{i+1:02d}',
                                 'tm_score' : td['tm_score'],
                                 'rmsd' : td['rmsd']
                               }
    
#tdom

In [None]:
# count total target domains for TM score calculation
tdomains = { t : count_domains(t) for t in tdom.keys() }
#tdomains

In [None]:
# target side TM score (reflecting common vs. target size)
bag_tm_t = { t : 1./tdomains[t] * sum([
                1./(1.+v['rmsd']**2)
                for d,v in doms.items()
            ])
           for t,doms in tdom.items()
         }
#bag_tm

In [None]:
# query side TM score (commmon vs. query size)
bag_tm_q = { t : 1./domains * sum([
                1./(1.+v['rmsd']**2)
                for d,v in doms.items()
            ])
           for t,doms in tdom.items()
         }

In [None]:
# sort and pretty print results
result = pd.DataFrame({
    'target' : tdom.keys(),
    'target tm score' : [ bag_tm_t[t] for t in tdom.keys()],
    'query tm score' : [ bag_tm_q[t] for t in tdom.keys()],
    'target domains #' : [ tdomains[t] for t in tdom.keys()],
    'common domains #' : [ len(tdom[t]) for t in tdom.keys()],
    'domain pairs' : [ ','.join([ f'{v['query_domain']}-{k}' for k,v in tdom[t].items()]) for t in tdom.keys()]
}).set_index('target').sort_values('query tm score',ascending=False)
result