Python script to convert mip indel fasta file into ASR JSON

In [9]:
import json
from pysam import FastaFile,FastxFile
from ete3 import Tree



In [18]:
def fasta_to_json(indel_fasta_solution_file,nwk_file,asr_json_file):
    
    # read the asr json file
    f = open(asr_json_file)
    asr_json_data = json.load(f)
    
    # create edge repeat structure
    edges_repeat = {"Recip": True,"Backward": True,"Forward": True,"Weight": 0}
    
    # read the indel inference from mip
    ancestor_indel_info = {}
    with FastxFile(indel_fasta_solution_file) as fh:
        for entry in fh:
            ancestor_indel_info[entry.name] = entry.sequence

    # read each ancestor and change it
    for a in asr_json_data["Ancestors"]:
        # date change
        #a["GRASP_version"] = "1900-01-01"
        a["Directed"] = True
        a["Terminated"] = True
        ancestor_name  = "N" + a["Name"]
        ancestor_indel = ancestor_indel_info[ancestor_name]

        # read the mip indel and change the values
        # change the Indices
        indices = [e_idx - 1 for e_idx,e_val in enumerate(list(ancestor_indel)) if e_val == '1']
        a["Indices"] = indices[1:-1] # do not include first and last node as mip has start/end node
        # changes the number of nodes to be same as the indices
        a["Nodes"] = [{} for i in range(len(a["Indices"]))]

        # change the Edgeindices and adjacent structure
        last_node = a["Indices"][-1]
        all_edges = []
        adjacent  = []
        for i_dx,i_val in enumerate(indices[:-1]):
            all_edges.append([i_val, indices[i_dx + 1]])
            if i_val != -1: # starting node
                if i_val != last_node: # last node
                    adjacent.append([indices[i_dx + 1]])
                else:
                    adjacent.append([])

        a["Edgeindices"] = all_edges
        a["Adjacent"]    = adjacent

        # edges for all edges in the edge indices
        a["Edges"] = [edges_repeat for i in range(len(a["Edgeindices"]))]
        
        print(json.dumps(a))
        break

    # Dump the output in the file
#     with open(folder_location + 'ASR_MIP.json', 'w') as outfile:
#         json.dump(asr_json_data, outfile)   

In [13]:
file_name = 'mip_ancestor_indel.fasta'
nwk_file = 'input_extants_ancestors.nwk'
asr_json_file = 'ASR.json'
folder_location = '/Users/sanjanatule/Documents/uq/Projects/Indels/indelmip/data/st1/'
#main(folder_location + file_name,folder_location + nwk_file,folder_location + asr_json_file)

In [16]:
file_name = 'mip_ancestor_indel.fasta'
nwk_file = 'CYP2U_165.nwk'
asr_json_file = 'ASR_GRP.json'
folder_location = '/Users/sanjanatule/Documents/uq/Projects/Indels/indelmip/data/CYP2U_165/'

In [19]:
fasta_to_json(folder_location + file_name,folder_location + nwk_file,folder_location + asr_json_file)

{"Adjacent": [[11], [12], [13], [14], [28], [29], [30], [31], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [51], [52], [57], [58], [59], [63], [64], [65], [66], [67], [68], [69], [70], [71], [72], [73], [74], [78], [79], [80], [82], [83], [84], [87], [88], [89], [90], [91], [92], [93], [94], [95], [96], [97], [98], [99], [100], [101], [107], [108], [109], [110], [111], [112], [113], [114], [115], [116], [125], [143], [145], [146], [147], [150], [151], [152], [153], [154], [155], [156], [157], [158], [159], [160], [161], [162], [163], [164], [165], [166], [167], [168], [169], [170], [171], [172], [173], [174], [175], [176], [177], [178], [179], [180], [181], [182], [183], [184], [185], [186], [187], [188], [189], [190], [191], [192], [193], [194], [195], [196], [197], [198], [199], [200], [201], [202], [203], [204], [205], [206], [207], [208], [209], [210], [211], [213], [214], [215], [216], [217], [218], [219], [220], [221], [222], [223], [224], [