In [None]:
#Shuffling of simulated alignments for testing the mixture model

In [6]:
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)

from Bio import AlignIO
from Bio.AlignIO.PhylipIO import RelaxedPhylipWriter
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment

alignment_dir = "data/language_alignments/"
geo_ratio = 75
seed = geo_ratio #purpose is to use not always the same sites of the alignments
np.random.seed = geo_ratio



sim_geo_align = AlignIO.read(alignment_dir +  "sim_geo_duration.phy", "phylip-relaxed")
sim_cognate_align = AlignIO.read(alignment_dir +  "sim_cognate_ie_compatible.phy", "phylip-relaxed")

geo_columns = np.array([sim_geo_align[:, i] for i in range(sim_geo_align.get_alignment_length())])
cognate_columns = np.array([sim_cognate_align[:, i] for i in range(sim_cognate_align.get_alignment_length())])

num_geo = int(len(geo_columns) * (geo_ratio / 100))
num_cognate = int(len(geo_columns) * (1 - (geo_ratio / 100)))
rand_vector = np.array([0] * num_geo + [1] * num_cognate)
np.random.shuffle(rand_vector)
print(rand_vector)

np.random.shuffle(geo_columns)
np.random.shuffle(cognate_columns)

final_columns = []
geo_pointer = 0
cognate_pointer = 0

for i in range(len(rand_vector)):
    if rand_vector[i]:
        final_columns.append(geo_columns[geo_pointer])
        geo_pointer += 1
    else:
        final_columns.append(cognate_columns[cognate_pointer])
        cognate_pointer += 1
        
sequences = ["" for i in range(len(sim_geo_align))]
for column in final_columns:
    for i,el in enumerate(column):
        sequences[i] +=  el
        
        
ids = []
for row in sim_geo_align:
    ids.append(row.id)

records = [SeqRecord(sequences[i], id=sim_geo_align[i].id) for i in range(len(sim_geo_align))]
align = MultipleSeqAlignment(records, annotations={}, column_annotations={})
file_name = alignment_dir + "sim_" + str(geo_ratio) + ".phy"
with open(file_name,"w+") as f:
    writer = RelaxedPhylipWriter(f)
    writer.write_alignment(align)

[0 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 1 0 1
 0 0 1 0 0 1 0 1 0 1 1 1 0 1 0 1 1 1 0 0 0 1 0 0 1 0 0 0 1 1 0 0 1 1 0 0 0
 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 0 1 0 0 0
 1 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1
 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1
 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 1 1
 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 1 0 1 1 0
 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 1 1
 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 