In [86]:
import requests
import simplejson as json
import pandas as pd
import numpy as np
import os
import json
import math

In [87]:
notebook_path = os.path.abspath("OLS matching.ipynb")
config_path = os.path.join(os.path.dirname(notebook_path), "Data/config0.json")

In [88]:
with open(config_path) as config_file:
    config= json.load(config_file)
config

{'asctb_sid': '1tK916JyG5ZSXW_cXfsyZnzXfjyoN-8B2GXLbYD6_vF0',
 'references': [{'name': 'lung',
   'url': 'https://hubmapconsortium.github.io/asctb-azimuth-data-comparison/lung.csv',
   'organ_name': 'lung',
   'asctb_sheet_name': 'Lung_v1.1'},
  {'name': 'pancreas',
   'url': 'https://hubmapconsortium.github.io/asctb-azimuth-data-comparison/pancreas.csv',
   'organ_name': 'pancreas',
   'asctb_sheet_name': 'Pancreas_v1.0'},
  {'name': 'kidney',
   'url': 'https://hubmapconsortium.github.io/asctb-azimuth-data-comparison/kidney.csv',
   'organ_name': 'kidney',
   'asctb_sheet_name': 'Kidney_v1.1'},
  {'name': 'brain',
   'url': 'https://hubmapconsortium.github.io/asctb-azimuth-data-comparison/motor_cortex.csv',
   'organ_name': 'brain',
   'asctb_sheet_name': 'Brain_v1.1'},
  {'name': 'bone_marrow',
   'url': 'https://hubmapconsortium.github.io/asctb-azimuth-data-comparison/bone_marrow.csv',
   'organ_name': 'bone_marrow',
   'asctb_sheet_name': 'Bone_Marrow_v1.1'},
  {'name': 'blood_pmb

In [89]:
asctb_sheet_id = config["asctb_sid"]

In [90]:
# Fetch Azimuth Data
def fetch_azimuth(az_url):
    azimuth_df= pd.read_csv (az_url,skiprows=10)
    azimuth_all_cts=[]
    azimuth_all_label=[]
    azimuth_ct = azimuth_df.filter(regex=("ID"))
    azimuth_label = azimuth_df.filter(regex=("AS/[0-9]/LABEL$"))
    for col in azimuth_ct:
        azimuth_all_cts.extend(azimuth_ct[col].tolist())
    azimuth_all_cts=pd.DataFrame(azimuth_all_cts)
    azimuth_all_cts.rename(columns = {0:"CT/ID"},inplace = True)
    for col in azimuth_label:
        azimuth_all_label.extend(azimuth_label[col].tolist())
    azimuth_all_label=pd.DataFrame(azimuth_all_label)
    azimuth_all_label.rename(columns = {0:"CT/LABEL"},inplace = True)
    
    azimuth_all_cts_label=pd.concat([azimuth_all_cts,azimuth_all_label],axis=1)
    azimuth_all_cts_label_unique=azimuth_all_cts_label.drop_duplicates()
    azimuth_all_cts_label_unique.reset_index(drop=True, inplace=True)
    
    return azimuth_all_cts_label,azimuth_all_cts_label_unique
    

In [91]:
# Fetch Asctb Data
def fetch_asctb(sheet_id,asctb_sheet_name):
    
    asctb_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={asctb_sheet_name}",skiprows=3) 
    #print(f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={asctb_sheet_name}")
    
    asctb_ct = asctb_df.filter(regex=("^CT.*ID$"))
    asctb_label = asctb_df.filter(regex=("CT/[0-9]/LABEL$"))
    asctb_all_cts=[]
    asctb_all_label=[]

    for col in asctb_ct:
        asctb_all_cts.extend(asctb_ct[col].tolist())
    asctb_all_cts=pd.DataFrame(asctb_all_cts)
    asctb_all_cts.rename(columns = {0:"CT/ID"},inplace = True)

    for col in asctb_label:
        asctb_all_label.extend(asctb_label[col].tolist())
    asctb_all_label=pd.DataFrame(asctb_all_label)
    asctb_all_label.rename(columns = {0:"CT/LABEL"},inplace = True)
    
    asctb_all_cts_label=pd.concat([asctb_all_cts,asctb_all_label],axis=1)
    asctb_all_cts_label_unique=asctb_all_cts_label.drop_duplicates()
    asctb_all_cts_label_unique.reset_index(drop=True, inplace=True)
    
    return asctb_all_cts_label,asctb_all_cts_label_unique

In [92]:
# Find incorrect azimuth CT
def incorrect_ct(azimuth_all_cts_label):
    inc_cl=[]
    az_cts=[i[0] for i in azimuth_all_cts_label.filter(regex=("^CT.*ID$")).values.tolist()]

    
    for i in range(len(az_cts)):
        if str(az_cts[i])[:3]!="CL:":# and str(az_cts[i])[:6]!="UBERON":
            inc_cl.append(i)

    az_incorrect_ct_id=azimuth_all_cts_label.loc[inc_cl]
    az_incorrect_ct_id=az_incorrect_ct_id.drop_duplicates()
    az_incorrect_ct_id.reset_index(drop=True, inplace=True)
    
    return az_incorrect_ct_id

In [93]:
def check_in_asctb(cl_az,i,asctb_kidney_all_cts_label_unique,az_row_all,asctb_row_all,not_matching_all):    
    flag=0
    for j in range(len(asctb_kidney_all_cts_label_unique['CT/ID'])):
        if cl_az == asctb_kidney_all_cts_label_unique['CT/ID'][j]:
            az_row_all.append(i)
            asctb_row_all.append(j)
            flag=1
            #print(cl_az,asctb_kidney_all_cts_label_unique['CT/ID'][j])
    if flag==0:
        not_matching_all.append(i)

In [94]:
def check_in_az(cl_asctb,i,az_kidney_all_cts_label_unique,az_row,asctb_row,not_matching):    
    flag=0
    for j in range(len(az_kidney_all_cts_label_unique['CT/ID'])):
        if cl_asctb == az_kidney_all_cts_label_unique['CT/ID'][j]:
            az_row.append(j)
            asctb_row.append(i)
            flag=1
            break
    if flag==0:
        not_matching.append(i)

In [95]:
def perfect_match_for_azimuthct_in_asctb(azimuth_all_cts_label_unique,asctb_all_cts_label_unique):
    az_row_all=[]
    asctb_row_all=[]
    not_matching_all=[]

    for i in range(len(azimuth_all_cts_label_unique['CT/ID'])):  
        if type(azimuth_all_cts_label_unique['CT/ID'][i])!=np.float64 and type(azimuth_all_cts_label_unique['CT/ID'][i])!=float and azimuth_all_cts_label_unique['CT/ID'][i][:3]=="CL:":
            check_in_asctb(azimuth_all_cts_label_unique['CT/ID'][i],i,asctb_all_cts_label_unique,az_row_all,asctb_row_all,not_matching_all)
        else:
            not_matching_all.append(i)
    
    az_matches_all=azimuth_all_cts_label_unique.loc[az_row_all]
    asctb_matches_all=asctb_all_cts_label_unique.loc[asctb_row_all]

    az_matches_all.reset_index(drop=True,inplace=True)
    asctb_matches_all.reset_index(drop=True,inplace=True)
    
    az_matches_all.rename(columns = {"CT/ID":"AZ.CT/ID","CT/LABEL":"AZ.CT/LABEL"},inplace = True)
    asctb_matches_all.rename(columns = {"CT/ID":"ASCTB.CT/ID","CT/LABEL":"ASCTB.CT/LABEL"},inplace = True)

    perfect_matches_all=pd.concat([az_matches_all,asctb_matches_all],axis=1)
    perfect_matches_all=perfect_matches_all.drop_duplicates()
    perfect_matches_all.reset_index(drop=True, inplace=True)
    
    az_mismatches_all=azimuth_all_cts_label_unique.loc[not_matching_all]
    az_mismatches_all=az_mismatches_all.drop_duplicates()
    az_mismatches_all.reset_index(drop=True, inplace=True)
    
    return perfect_matches_all,az_mismatches_all

In [96]:
def perfect_match_for_asctbct_in_azimuth(azimuth_all_cts_label_unique,asctb_kidney_all_cts_label_unique):
    az_row=[]
    asctb_row=[]
    not_matching=[]

    for i in range(len(asctb_kidney_all_cts_label_unique['CT/ID'])):
        if type(asctb_kidney_all_cts_label_unique['CT/ID'][i])!=np.float64 and type(asctb_kidney_all_cts_label_unique['CT/ID'][i])!=float and asctb_kidney_all_cts_label_unique['CT/ID'][i][:3]=="CL:":
            check_in_az(asctb_kidney_all_cts_label_unique['CT/ID'][i],i,azimuth_all_cts_label_unique,az_row,asctb_row,not_matching)
        else:
            not_matching.append(i)

    az_matches=azimuth_all_cts_label_unique.loc[az_row]
    asctb_matches=asctb_kidney_all_cts_label_unique.loc[asctb_row]

    az_matches.reset_index(drop=True,inplace=True)
    asctb_matches.reset_index(drop=True,inplace=True)

    az_matches.rename(columns = {"CT/ID":"AZ.CT/ID","CT/LABEL":"AZ.CT/LABEL"},inplace = True)
    asctb_matches.rename(columns = {"CT/ID":"ASCTB.CT/ID","CT/LABEL":"ASCTB.CT/LABEL"},inplace = True)

    perfect_matches=pd.concat([asctb_matches,az_matches],axis=1)

    asctb_mismatches=asctb_kidney_all_cts_label_unique.loc[not_matching]
    asctb_mismatches.reset_index(drop=True,inplace=True)
    
    return asctb_mismatches
    

In [97]:
def incorrect_cts_ebi(mismatches):
    found_in_ols=[]
    not_found_in_ols=[]
    
    for i in range(len(mismatches['CT/ID'])):
        if type(mismatches['CT/ID'][i])!=np.float64 and type(mismatches['CT/ID'][i])!=float:
            cl_az=mismatches['CT/ID'][i].replace(":","_")
            url = "http://www.ebi.ac.uk/ols/api/ontologies/cl/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F"
            payload={}
            headers = {
                  'Accept': 'application/json'
                }
            response = requests.request("GET", url+cl_az, headers=headers, data=payload)

            if response.status_code!=200:
                not_found_in_ols.append(i)
            else:
                found_in_ols.append(i)
        else:
            not_found_in_ols.append(i)

    
    az_not_found_in_ols=mismatches.loc[not_found_in_ols]
    az_not_found_in_ols.reset_index(drop=True,inplace=True)
    
    az_mismatch_asctb_all = mismatches.loc[found_in_ols]
    az_mismatch_asctb_all.reset_index(drop=True,inplace=True)

    return az_not_found_in_ols,az_mismatch_asctb_all

In [98]:
def add_hier(azimuth_matches_tree,hierarchy_list_all): 
    found_match=[]
    hier=[]
    len_hier=[]
    asctb_ct=[]
    asctb_label=[]
    for i in range(len(hierarchy_list_all)):
        if len(hierarchy_list_all[i])==3:
            found_match.append("Yes")
            asctb_ct.append(list(hierarchy_list_all[i][0][0].items())[-1][0])
            asctb_label.append(list(hierarchy_list_all[i][0][0].items())[-1][1])
            
        else:
            found_match.append("No")
            asctb_ct.append("Not found")
            asctb_label.append("Not found")
        len_hier.append((len(hierarchy_list_all[i][0][0])))
        x=[]
        for k,v in hierarchy_list_all[i][0][0].items():
            abc=str(k + " (" + v + ")")
            x.append(abc)
        hier.append(x)
    hier_1=[]
    for item in hier:
        hier_1.append(str(" >> ".join(item)))
    hier_1=pd.DataFrame(hier_1,columns=["Hierarchy"])
    found_match=pd.DataFrame(found_match,columns=["Match Found"])
    len_hier=pd.DataFrame(len_hier,columns=["Hierarchy Length"])
    asctb_ct = pd.DataFrame(asctb_ct,columns=["ASCTB.CT/ID"])
    asctb_label = pd.DataFrame(asctb_label,columns=["ASCTB.CT/LABEL"])
    
    azimuth_matches_tree.rename(columns = {"CT/ID":"AZ.CT/ID","CT/LABEL":"AZ.CT/LABEL"},inplace = True)
    df_hier=pd.concat([azimuth_matches_tree,found_match,asctb_ct,asctb_label,len_hier,hier_1],axis=1)
    return df_hier
    

In [126]:
def add_hier_1(azimuth_matches_tree,hierarchy_list_all): 
    found_match=[]
    hier=[]
    len_hier=[]
    asctb_ct=[]
    asctb_label=[]
    for i in range(len(hierarchy_list_all)):
        if len(hierarchy_list_all[i])==3:
            found_match.append("Yes")
            asctb_ct.append(list(hierarchy_list_all[i][0][0].items())[-1][0])
            asctb_label.append(list(hierarchy_list_all[i][0][0].items())[-1][1])
            
        else:
            found_match.append("No")
            asctb_ct.append("Not found")
            asctb_label.append("Not found")
        len_hier.append((len(hierarchy_list_all[i][0][0])))
        x=[]
        for k,v in hierarchy_list_all[i][0][0].items():
            abc=str(k + " (" + v + ")")
            x.append(abc)
        hier.append(x)
    hier_1=[]
    for item in hier:
        hier_1.append(str(" >> ".join(item)))
    hier_1=pd.DataFrame(hier_1,columns=["Hierarchy"])
    found_match=pd.DataFrame(found_match,columns=["Match Found"])
    len_hier=pd.DataFrame(len_hier,columns=["Hierarchy Length"])
    asctb_ct = pd.DataFrame(asctb_ct,columns=["AZ.CT/ID"])
    asctb_label = pd.DataFrame(asctb_label,columns=["AZ.CT/LABEL"])
    
    azimuth_matches_tree.rename(columns = {"CT/ID":"ASCTB.CT/ID","CT/LABEL":"ASCTB.CT/LABEL"},inplace = True)
    df_hier=pd.concat([azimuth_matches_tree,found_match,asctb_ct,asctb_label,len_hier,hier_1],axis=1)
    return df_hier
    

############

In [99]:
def check_in_asctb_2(asctb_kidney_all_cts_label_unique,cl_az,i,all_links_az,hierarchy_all):    
    flag=0
    for j in range(len(asctb_kidney_all_cts_label_unique['CT/ID'])):
        if cl_az == asctb_kidney_all_cts_label_unique['CT/ID'][j]:
            tree_match_asctb_all.append(j)
            tree_match_az_all.append(i)
            flag=1
            hierarchy_list_all.append([[hierarchy_all],[i],[j]])
            print(cl_az,asctb_kidney_all_cts_label_unique['CT/ID'][j],"Match found")
            
    if flag==0:
        print(cl_az)
        ols_call_1(asctb_kidney_all_cts_label_unique,cl_az,i,all_links_az,hierarchy_all)

In [100]:
def ols_call_1(asctb_kidney_all_cts_label_unique,cl_az,i,all_links_az,hierarchy_all):
    url = "http://www.ebi.ac.uk/ols/api/ontologies/cl/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F"
    payload={}
    headers = {
      'Accept': 'application/json'
    }
    #Azimuth
    try:
        response = requests.request("GET", all_links_az['parents']['href'], headers=headers, data=payload)
    except:
        print("No parent")
        tree_not_match_all.append(i)
        hierarchy_list_all.append([[hierarchy_all],[i]])
        return

    if response.status_code!=200:
        print("Status !=200")
        tree_not_match_all.append(i)
        hierarchy_list_all.append([[hierarchy_all],[i]])

    else:
        result_az= json.loads(response.text)
        all_links_az=result_az['_embedded']['terms'][0]['_links']
        ct_id_az=result_az['_embedded']['terms'][0]['obo_id']
        label_az=result_az['_embedded']['terms'][0]['label']
        hierarchy_all[ct_id_az]=label_az
        if ct_id_az[:-8:-1]=='0000000':
            hierarchy_all[ct_id_az]= label_az
            tree_not_match_all.append(i)
            hierarchy_list_all.append([[hierarchy_all],[i]])
            print(ct_id_az, "No match")
        else:
            hierarchy_all[ct_id_az]= label_az

            check_in_asctb_2(asctb_kidney_all_cts_label_unique,ct_id_az,i,all_links_az,hierarchy_all)

In [101]:
def tree_traversal_azimuth(az_mismatch_asctb_all,asctb_kidney_all_cts_label_unique):
    url = "http://www.ebi.ac.uk/ols/api/ontologies/cl/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F"
    payload={}

    headers = {
      'Accept': 'application/json'
    }
    for i in range(len(az_mismatch_asctb_all['CT/ID'])):
        hierarchy_all={}
        cl_az=az_mismatch_asctb_all['CT/ID'][i]
        print(cl_az,"Original")
        hierarchy_all[cl_az]=az_mismatch_asctb_all['CT/LABEL'][i]
        cl_az=cl_az.replace(":","_")
        response = requests.request("GET", url+cl_az, headers=headers, data=payload)
        if response.status_code!=200:
            tree_not_match_all.append(i)
            hierarchy_list_all.append([hierarchy_all,i])
        else:
            result_az= json.loads(response.text)
            all_links_az=result_az['_embedded']['terms'][0]['_links']
            ols_call_1(asctb_kidney_all_cts_label_unique,cl_az,i,all_links_az,hierarchy_all)
        
    az_matches_tree_all=az_mismatch_asctb_all.loc[tree_match_az_all]
    az_matches_tree_all.reset_index(drop=True,inplace=True)

    asctb_matches_tree_all=asctb_kidney_all_cts_label_unique.loc[tree_match_asctb_all]
    asctb_matches_tree_all.reset_index(drop=True,inplace=True)

    az_matches_tree_all.rename(columns = {"CT/ID":"AZ.CT/ID","CT/LABEL":"AZ.CT/LABEL"},inplace = True)
    asctb_matches_tree_all.rename(columns = {"CT/ID":"ASCTB.CT/ID","CT/LABEL":"ASCTB.CT/LABEL"},inplace = True)

    az_final_matches =pd.concat([az_matches_tree_all,asctb_matches_tree_all],axis=1)  
    
    az_mismatches_final_all=az_mismatch_asctb_all.loc[tree_not_match_all]
    az_mismatches_final_all.reset_index(drop=True,inplace=True)

    az_mismatches_final_all.rename(columns = {"CT/ID":"AZ.CT/ID","CT/LABEL":"AZ.CT/LABEL"},inplace = True)
    
    return az_final_matches,az_mismatches_final_all

In [102]:
def check_in_az_1(az_kidney_all_cts_label_unique,cl_asctb,i,all_links_asctb,hierarchy):    
    flag=0
    for j in range(len(az_kidney_all_cts_label_unique['CT/ID'])):
        if cl_asctb == az_kidney_all_cts_label_unique['CT/ID'][j]:
            tree_match_asctb_1.append(i)
            tree_match_az_1.append(j)
            flag=1
            hierarchy_list_1.append([[hierarchy],[i],[j]])
            print(cl_asctb,az_kidney_all_cts_label_unique['CT/ID'][j],"Match found")
            break
            
    if flag==0:
        print(cl_asctb)
        ols_call(az_kidney_all_cts_label_unique,cl_asctb,i,all_links_asctb,hierarchy)

In [103]:
def ols_call(azimuth_all_cts_label_unique,cl_asctb,i,all_links_asctb,hierarchy):
    url = "http://www.ebi.ac.uk/ols/api/ontologies/cl/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F"
    payload={}
    headers = {
      'Accept': 'application/json'
    }
    #ASCTB
    try:
        response = requests.request("GET", all_links_asctb['parents']['href'], headers=headers, data=payload)
    except:
        print("No parent")
        tree_not_match_1.append(i)
        hierarchy_list_1.append([[hierarchy],[i]])
        return

    if response.status_code!=200:
        print("Status !=200")
        tree_not_match_1.append(i)
        hierarchy_list_1.append([[hierarchy],[i]])

    else:
        result_asctb= json.loads(response.text)
        all_links_asctb=result_asctb['_embedded']['terms'][0]['_links']
        ct_id_asctb=result_asctb['_embedded']['terms'][0]['obo_id']
        label_asctb=result_asctb['_embedded']['terms'][0]['label']
        hierarchy[ct_id_asctb]=label_asctb
        if ct_id_asctb[:-8:-1]=='0000000':
            hierarchy[ct_id_asctb]= label_asctb
            tree_not_match_1.append(i)
            hierarchy_list_1.append([[hierarchy],[i]])
            print(ct_id_asctb, "No match")
        else:
            hierarchy[ct_id_asctb]= label_asctb

            check_in_az_1(azimuth_all_cts_label_unique,ct_id_asctb,i,all_links_asctb,hierarchy)

In [104]:
def tree_traversal_asctb(asctb_mismatch_az,azimuth_all_cts_label_unique):
    url = "http://www.ebi.ac.uk/ols/api/ontologies/cl/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F"
    payload={}
    #hierarchy={}
    headers = {
      'Accept': 'application/json'
    }
    for i in range(len(asctb_mismatch_az['CT/ID'])):
        hierarchy={}
        cl_asctb=asctb_mismatch_az['CT/ID'][i]
        print(cl_asctb,"Original")
        hierarchy[cl_asctb]=asctb_mismatch_az['CT/LABEL'][i]
        cl_asctb=cl_asctb.replace(":","_")
        response = requests.request("GET", url+cl_asctb, headers=headers, data=payload)
        if response.status_code!=200:
            tree_not_match_1.append(i)
            hierarchy_list_1.append([hierarchy,i])
        else:
            result_asctb= json.loads(response.text)
            all_links_asctb=result_asctb['_embedded']['terms'][0]['_links']
            ols_call(azimuth_all_cts_label_unique,cl_asctb,i,all_links_asctb,hierarchy) 

    asctb_matches_tree=asctb_mismatch_az.loc[tree_match_asctb_1]
    asctb_matches_tree.reset_index(drop=True,inplace=True)

    az_matches_tree=azimuth_all_cts_label_unique.loc[tree_match_az_1]
    az_matches_tree.reset_index(drop=True,inplace=True)

    az_matches_tree.rename(columns = {"CT/ID":"AZ.CT/ID","CT/LABEL":"AZ.CT/LABEL"},inplace = True)
    asctb_matches_tree.rename(columns = {"CT/ID":"ASCTB.CT/ID","CT/LABEL":"ASCTB.CT/LABEL"},inplace = True)

    az_final_matches =pd.concat([asctb_matches_tree,az_matches_tree],axis=1)


    asctb_mismatches_final=asctb_mismatch_az.loc[tree_not_match_1]
    asctb_mismatches_final.reset_index(drop=True,inplace=True)

    asctb_mismatches_final.rename(columns = {"CT/ID":"ASCTB.CT/ID","CT/LABEL":"ASCTB.CT/LABEL"},inplace = True)


    return az_final_matches,asctb_mismatches_final

In [124]:
# parse config file
ct=0
for ref in config['references']:
    name= ref['name']
    asctb_sheet_name = ref['asctb_sheet_name']
    az_url= ref['url']
    
    # Fetch Azimuth data
    azimuth_all_cts_label,azimuth_all_cts_label_unique = fetch_azimuth(az_url)
    
    # Fetch ASCTB data
    asctb_all_cts_label,asctb_all_cts_label_unique = fetch_asctb(asctb_sheet_id,asctb_sheet_name)

    # Number of Azimuth cts without IDs
    azimuth_missing_cts=azimuth_all_cts_label_unique[azimuth_all_cts_label_unique['CT/ID'].isna() & ~azimuth_all_cts_label_unique['CT/LABEL'].isna()].reset_index(drop=True)
    #azimuth_missing_cts.to_csv("./Data/Final/"+name+ ".csv",index=False)
 
    # Number of ASCTB cts without IDs
    asctb_missing_cts=asctb_all_cts_label_unique[asctb_all_cts_label_unique['CT/ID'].isna() & ~asctb_all_cts_label_unique['CT/LABEL'].isna()].reset_index(drop=True)
    #asctb_missing_cts.to_csv("./Data/Final/"+name+ ".csv",index=False)

       
    # Incorrect CT ID in Azimuth
    azimuth_incorrect_ct_ids=incorrect_ct(azimuth_all_cts_label)
    
    # Incorrect CT ID in Asctb
    asctb_incorrect_ct_ids=incorrect_ct(asctb_all_cts_label)
    

    # Perfect Match and Mismatch for Azimuth CT in ASCTB (AZ - ASCTB)
    azimuth_perfect_matches,azimuth_mismatches=perfect_match_for_azimuthct_in_asctb(azimuth_all_cts_label_unique,asctb_all_cts_label_unique)

        
#     print("Matches",len(azimuth_perfect_matches))
#     print("Mismatches",len(azimuth_mismatches))
#     print("Total",len(azimuth_all_cts_label_unique))
    
    # Perfect Match and Mismatch for ASCTB CT in Azimuth (ASCTB - Azimuth)
    asctb_mismatches=perfect_match_for_asctbct_in_azimuth(azimuth_all_cts_label_unique,asctb_all_cts_label_unique)
    
    print("Matches",len(azimuth_perfect_matches))
    print("ASCTB Mismatches",len(asctb_mismatches))
    print("Azimuth Mismatches",len(azimuth_mismatches))
    print("ASCTB Total",len(asctb_all_cts_label_unique))
    print("Azimuth Total",len(azimuth_all_cts_label_unique))

    # Incorrect CT ID in Azimuth (EBI)
    incorrect_ct_azimuth_ebi, azimuth_mismatches_filtered=incorrect_cts_ebi(azimuth_mismatches)
      
    # Incorrect CT ID in Asctb (EBI)
    incorrect_ct_asctb_ebi, asctb_mismatches_filtered=incorrect_cts_ebi(asctb_mismatches)
  
    # Tree traversal for matching Az to Asctb. Traversing up Azmiuth
    tree_match_asctb_all=[]
    tree_match_az_all=[]
    tree_not_match_all=[]
    hierarchy_list_all=[]
    
    azimuth_matches_tree,azimuth_mismatches_tree  = tree_traversal_azimuth(azimuth_mismatches_filtered,asctb_all_cts_label_unique)
    
    azimuth_matches_tree_hier = add_hier(azimuth_mismatches_filtered,hierarchy_list_all)
        
    
    # Tree traversal for matching Asctb to Az. Traversing up Asctb
    
    tree_match_asctb_1=[]
    tree_match_az_1=[]
    tree_not_match_1=[]
    hierarchy_list_1=[]
    asctb_matches_tree_all,asctb_mismatch_tree = tree_traversal_asctb(asctb_mismatches_filtered,azimuth_all_cts_label_unique)
    
    asctb_matches_tree_hier = add_hier_1(asctb_mismatches_filtered,hierarchy_list_1)
    
    with pd.ExcelWriter("./Data/Final/"+name+ ".xlsx") as writer:
   
    # use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
        azimuth_missing_cts.to_excel(writer, sheet_name="AZ_missing_cts", index=False)
        asctb_missing_cts.to_excel(writer, sheet_name="Asctb_missing_cts", index=False)
        azimuth_incorrect_ct_ids.to_excel(writer, sheet_name="AZ_incorrect_cts", index=False)
        asctb_incorrect_ct_ids.to_excel(writer, sheet_name="Asctb_incorrect_cts", index=False)
        azimuth_perfect_matches.to_excel(writer, sheet_name="AZ_ASCTB_Cts_perfect_matches", index=False)
        incorrect_ct_azimuth_ebi.to_excel(writer, sheet_name="AZ_cts_not_found_in_ebi", index=False)
        incorrect_ct_asctb_ebi.to_excel(writer, sheet_name="Asctb_cts_not_found_in_ebi", index=False)
        azimuth_matches_tree_hier.to_excel(writer, sheet_name="AZ_cts_tree_traversal", index=False)
        asctb_matches_tree_hier.to_excel(writer, sheet_name="Asctb_cts_tree_traversal", index=False)
        
        asctb_mismatch_tree.to_excel(writer, sheet_name="Asctb_cts_mismatch_after_tree_traversal", index=False)
        azimuth_mismatches_tree.to_excel(writer, sheet_name="AZ_cts_mismatch_after_tree_traversal", index=False)
    
    ct+=1
    print(name)
    if ct==1:
        break
    

Matches 14
ASCTB Mismatches 82
Azimuth Mismatches 29
ASCTB Total 96
Azimuth Total 43
CL:1000348 Original
CL:0000307
CL:0002202
CL:0002632
CL:0002368
CL:0002076
CL:0002371
CL:0000003
CL:0000000 No match
CL:1000329 Original
CL:0000307
CL:0002202
CL:0002632
CL:0002368
CL:0002076
CL:0002371
CL:0000003
CL:0000000 No match
CL:0002145 Original
CL:0005012
CL:0000067
CL:0000064 CL:0000064 Match found
CL:1000377 Original
CL:0000307
CL:0002202
CL:0002632
CL:0002368
CL:0002076
CL:0002371
CL:0000003
CL:0000000 No match
CL:1000330 Original
CL:0019001
CL:0000313 CL:0000313 Match found
CL:0002600 Original
CL:0019019
CL:0000192 CL:0000192 Match found
CL:0000185 Original
CL:0000183
CL:0000003
CL:0000000 No match
CL:0019002 Original
CL:0000138
CL:0000667
CL:0000327
CL:0000499
CARO:0000000 No match
CL:1000413 Original
CL:0000071
CL:0002139
CL:0000115
CL:0002078
CL:0002371
CL:0000003
CL:0000000 No match
CL:0019018 Original
CL:0000359 CL:0000359 Match found
CL:0002332 Original
CL:0002328 CL:0002328 Match fo

In [125]:
asctb_mismatch_tree

Unnamed: 0,ASCTB.CT/ID,ASCTB.CT/LABEL
0,CL:1000348,basal cell of epithelium of trachea
1,CL:1000329,goblet cell of epithelium of trachea
2,CL:1000377,dense-core granule cell of epithelium of trachea
3,CL:0000185,myoepithelial cells of glands
4,CL:0019002,tracheobronchial chondrocyte
5,CL:1000413,arterial endothelial cell
6,CL:0002329,basal epithelial cell of tracheobronchial tree
7,CL:1000143,lung goblet cell
8,CL:1000223,lung neuroendocrine cell
9,CL:0002075,brush cell of tracheobronchial tree
