In [41]:

import json
from Bio import AlignIO
from Bio.AlignIO.PhylipIO import RelaxedPhylipWriter
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment

def convert_json(path, data_set_id):
    with open(path, 'r') as f:
        data = json.load(f)
    language_dict = {}
    matrix = {}
    for language_id in data['Languages']:
        language_dict[int(language_id)] = data['Languages'][language_id]['Name'].replace(" ", "")
        matrix[int(language_id)] = []
    data_sets = data['TypologicalDataSets']

    grids = data_sets[data_set_id]['TypologicalGrids']
    for grid_id in grids:
        features = grids[grid_id]['TypologicalFeatures']
        for feature_id in features:
            variants = features[feature_id]['TypologicalFeatureVariants']
            for variant_id in variants:
                d = {}
                if not 'RecordedValues' in variants[variant_id]:
                    continue
                values = variants[variant_id]['RecordedValues']
                for value_id in values:
                    entry = values[value_id]
                    d[entry['FkLanguageId']] = entry['Value']
                for language_id in language_dict:
                    if language_id in d:
                        if d[language_id]:
                            matrix[language_id].append('1')
                        else:
                            matrix[language_id].append('0')
                    else:
                        matrix[language_id].append('-')
    records = [SeqRecord(matrix[language_id], id=language_dict[language_id]) for language_id in matrix]
    align = MultipleSeqAlignment(records, annotations={}, column_annotations={})
    print(align)
    file_name = path.split("/")[-1].split(".")[0] + ".phy"
    with open("conversion/phy/" + file_name,"w+") as f:
        writer = RelaxedPhylipWriter(f)
        writer.write_alignment(align)
    #align = AlignIO.read("conversion/phy/" + file_name, "phylip-relaxed")
    
convert_json('../database/datasets/raw/DiACL/typodataset-550.json', '550')
    

Alignment with 151 rows and 129 columns
['0', '1', '1', '0', '0', '1', '0', '1', '0', '0', '0', '1', '0', '0', '0', '1', '0', '1', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1', '0', '1', '1', '1', '1', '1', '1', '0', '1', '1', '1']...['-', '-', '-'] Basque(East)
['0', '1', '1', '0', '0', '1', '0', '1', '0', '0', '0', '1', '0', '0', '0', '1', '0', '1', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1', '0', '1', '1', '1', '1', '1', '1', '0', '1', '1', '1']...['-', '-', '-'] Basque(West)
['1', '0', '1', '0', '1', '0', '1', '0', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '-', '-', '0', '1', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '0', '0', '1', '0', '0', '0', '1', '1', '0', '1', '0', '0']...['0', '0', '-'] Albanian(Tosk)
['0', '1', '0', '1', '0', '1', '0', '1', '0', '0', '0', '1', '0', '0', '0', '1', '0', '1', '0', '1', '0', '0', '1', '0', '0', '1', '0', '0', '1', '1', '1', '1', '1', '1', '1', '1', '0', '0', '1', '0', '