# Bacteria-phage co-evolution animation

This notebook generates frames to make a gif or movie visualizing a simulation of bacteria interacting with phages. 

In [None]:
import numpy as np
from matplotlib import pyplot as plt
import os, cmath, glob
import pandas as pd
import matplotlib.cm as cm
from matplotlib import rcParams
from matplotlib.colors import rgb2hex
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.gridspec as gridspec
from tqdm import tqdm
from scipy import sparse

Check: https://matplotlib.org/2.1.2/gallery/animation/dynamic_image2.html

Fun names:
`bubblEvolve`, `bubble race`, `bubbly`, 

In [None]:
def load_simulation(folder, timestamp):
    
    # load parameters
    parameters = pd.read_csv(folder + "/parameters_%s.txt" %timestamp, delimiter = '\t', header=None)
    parameters.columns = ['parameter', 'value']
    parameters.set_index('parameter')

    f = float(parameters.loc[parameters['parameter'] == 'f']['value'])
    c0 = float(parameters.loc[parameters['parameter'] == 'c0']['value'])
    g = float(parameters.loc[parameters['parameter'] == 'g']['value'])
    B = float(parameters.loc[parameters['parameter'] == 'B']['value'])
    R = float(parameters.loc[parameters['parameter'] == 'R']['value'])
    eta = float(parameters.loc[parameters['parameter'] == 'eta']['value'])
    pv = float(parameters.loc[parameters['parameter'] == 'pv']['value'])
    alpha = float(parameters.loc[parameters['parameter'] == 'alpha']['value'])
    e = float(parameters.loc[parameters['parameter'] == 'e']['value'])
    L = float(parameters.loc[parameters['parameter'] == 'L']['value'])
    mu = float(parameters.loc[parameters['parameter'] == 'mu']['value'])
    m_init = float(parameters.loc[parameters['parameter'] == 'm_init']['value'])

    # load list of all phages that ever existed
    with open(folder + "/all_phages_%s.txt" %timestamp, "rb") as all_phages_file:
        all_phages = all_phages_file.readlines()
    
    #print("creating list of all phages")
    all_phages = recreate_phage(all_phages[0])

    # load simulation population data
    pop_array = sparse.load_npz(folder + "/pop_array_%s.txt.npz" %timestamp) # fast <3 <3 <3
    max_m = int((pop_array.shape[1] -3)/2)

    # load list of mutation times
    with open("%s/mutation_times_%s.txt" %(folder,timestamp), "rb") as mut_f:
        mutation_t = mut_f.readlines()

    mutation_times = recreate_parent_list(mutation_t[0])

    # load parent list
    with open("%s/parents_%s.txt" %(folder,timestamp), "rb") as par:
        parents = par.readlines()
    parent_list = recreate_parent_list(parents[0])
    
    return f, c0, g, B, R, eta, pv, alpha, e, L, mu, m_init, pop_array, max_m, mutation_times, parent_list, all_phages


In [None]:
def recreate_phage(phage_row):
    """
    Input: the list of prophages for a particular timepoint (i.e phage[-1], where phages is read in above)
    Output: the same line formatted as a list of lists of integers
    """
    phage_list = []
    
    phages_string = phage_row.decode("utf-8").split('[')[2:]
    for phage in phages_string:
        phage = phage.split(']')[0]
        phage_list.append(list(np.array(phage.split(','),dtype=int)))
    
    return phage_list

In [None]:
def recreate_parent_list(parent_row):
    """
    Input: list of prophage parents for a particular time point (i.e. parent_list[-1], where parent_list is read
    in above)
    Output: the same line formatted as a list of tuples of integers, or 'nan' if the phage has no parent (i.e. 
    is one of the original phages)
    """
    parent_list_row = []
    parents_string = parent_row.decode("utf-8").split('[')[2:]
    for parent in parents_string:
        parent = parent.split(']')[0]
        if parent == "''": # this is one of the original phages with no back mutations
            parent_list_row.append([])
        else: # has at some point arisen by mutation
            # check if any of the list are blank
            parent = parent.split(',')
            try:
                ind = parent.index("''")
                parent[ind] = np.nan
            except:
                pass
            try:
                ind = parent.index('')
                parent[ind] = np.nan
            except:
                pass
            parent_list_row.append(list(np.array(parent,dtype='float32')))
        
    return parent_list_row

In [None]:
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

In [None]:
folder = "data"
timestamp = "2019-02-11T14:17:12.365722"

In [None]:
f, c0, g, B, R, eta, pv, alpha, e, L, mu, m_init, pop_array, \
    max_m, mutation_times, parent_list, all_phages = load_simulation(folder, timestamp)

In [None]:
# create distance matrix for all the phages that ever lived
all_phages = np.array(all_phages)

distance_matrix = np.zeros((len(all_phages), len(all_phages)))

for i in range(len(all_phages)):
    distance_matrix[i] = np.sum(np.abs(all_phages - all_phages[i]), axis = 1)

In [None]:
# create phages_dict
# the initial phages start at position 0 in the phage list

spread = 50.
m_init = int(m_init)
    
phages_dict = {}

# keys: 

for i, phage in enumerate(all_phages):
    if i < m_init:
        continue
    
    parent_ids = parent_list[i]
    
    parent_distances = []
    mutation_positions = []
    angle = []
    full_distance = []
    mutation_ts = mutation_times[i]
    angles = []
    total_distances = []
    
    parent_angles = []
    
    if len(np.unique(parent_ids)) > 1:
        loop_list = parent_ids
        
    else:
        loop_list = np.unique(parent_ids)
        mutation_ts = [mutation_ts[0]]
        
    for j, pid in enumerate(loop_list):
        pid = int(pid)
        mutation_t = mutation_ts[j]
        parent_distance = distance_matrix[i, pid]
        parent_distances.append(parent_distance)
        
        mutation_pos = np.where(np.abs(phage - np.array(all_phages[pid])) == 1)[0]
        mutation_positions.append(mutation_pos)
          
    phages_dict[i] = {"sequence": phage, "parents": loop_list, "mutation_times": mutation_ts, "parent_distance": parent_distances,
                    "mutation_position": mutation_positions}

## Make frames for bubble plot

In [None]:
# delete previous frames
!rm frames/*

In [None]:
pop_array = pop_array.toarray()

In [None]:

scale = 80.
length = 1.
box_size = 10.1
transparency = 0.1
colours1 = cm.gist_rainbow(np.linspace(0,1,30))

legend_alignment = 'bottom left'
legend_spacing = 0.13

if legend_alignment == 'bottom left':
    y_offset = 0.3
    x_offset = 0.1
    
#if legend_alignment == 'bottom right': #  this will not currently work
#    y_offset = 0.3
#    x_offset = 0.6

skip = 100
end_time = 8000

center_position_x = 0
center_position_y = 0
        
shift = 1
legend_sizes = np.logspace(0,4,5)

for i in range(int((pop_array[-1,-1]*g*c0)/skip)):
    time = i*skip
    
    if time > end_time:
        break
        
    t_ind = find_nearest(pop_array[:,-1], time/(g*c0))

    fig, axs = plt.subplots(1,2, figsize = (15, 8))

    ax0 = axs[1]
    ax1 = axs[0]

    ax0.set_xlim(-box_size,box_size)
    ax0.set_ylim(-box_size,box_size)
    ax0.set_xticks([])
    ax0.set_yticks([])

    ax1.set_xlim(-box_size,box_size)
    ax1.set_ylim(-box_size,box_size)

    ax1.set_xticks([])
    ax1.set_yticks([])
    

    ax0.scatter(center_position_x, center_position_y, s = scale*np.log(pop_array[t_ind, max_m+1]+1), 
                c = 'k', edgecolors = 'k')
    ax1.scatter(center_position_x, center_position_y, s = scale*np.log(pop_array[t_ind, 1]+1), 
                c = 'k', edgecolors = 'k')

    # legend 
    ax1.annotate("Population size:", (-box_size + x_offset*box_size - 0.5, 
                                0-box_size*(y_offset - 0.1)), fontsize = 14)
    for l in range(len(legend_sizes)):
        ax1.scatter(-box_size + x_offset*box_size, 0-box_size*y_offset - box_size*legend_spacing*l, c = 'none', 
                    s = scale*np.log(shift+legend_sizes[l]), edgecolors = 'k')
        ax1.annotate(str(round(legend_sizes[l],3)), (-box_size + (x_offset + 0.1)*box_size, 
                        0-box_size*(y_offset + 0.02) - box_size*legend_spacing*l), fontsize = 14)
    
    # plot circles
    for key, value in phages_dict.items():

        size_phage = pop_array[t_ind, max_m + 1 + key]
        size_bac = pop_array[t_ind, 1 + key]

        if size_phage > 0 or size_bac > 0:

            shortest_distance = np.min(distance_matrix[key, :int(m_init)])
            angle_chain = []
            total_distance = 0

            phage = key
            parent_phage = phage
            time2 = time

            while parent_phage > m_init-1: # continue following back
                mutation_ts = phages_dict[phage]["mutation_times"] # get all the mutations that have happened
                possible_parent_inds = np.where(np.array(mutation_ts) < time2/(g*c0))[0] # get all the indices for mutations before time time2
                possible_parent_phages, sortinds = np.unique(np.array(phages_dict[phage]["parents"])[possible_parent_inds], return_index = True) # unique parent phages

                possible_parent_phages = np.array(possible_parent_phages, dtype = 'int')[np.argsort(sortinds)] # get possible parents in the order in which they happened
                #parent_pop_sizes = pop_array[t_ind, max_m +1 : 2*max_m + 1][possible_parent_phages] # get phage populations for parents
                #parent_phage = possible_parent_phages[np.nonzero(parent_pop_sizes)[0][0]]
                parent_phage = possible_parent_phages[0]
                parent_ind = np.sort(sortinds)[0]
                distance_from_parent = phages_dict[phage]["parent_distance"][parent_ind]
                mutation_pos = phages_dict[phage]["mutation_position"][parent_ind]
                mutation_pos = mutation_pos[0] # in case of double mutant - this is crude, FIX THIS
                angle_chain.append(mutation_pos)
                total_distance += distance_from_parent
                phage = parent_phage
                time2 = mutation_ts[int(possible_parent_inds[-1])]*g*c0
                initial_phage_parent = parent_phage

            mutation_ts = phages_dict[key]["mutation_times"]
            possible_parent_inds = np.where(np.array(mutation_ts) < time/(g*c0))[0] # get all the indices for mutations before time time2
            possible_parent_phages, sortinds = np.unique(np.array(phages_dict[key]["parents"])[possible_parent_inds], return_index = True) # unique parent phages
            possible_parent_phages = np.array(possible_parent_phages, dtype = 'int')[np.argsort(sortinds)] # get possible parents in the order in which they happened

            parent_phage = possible_parent_phages[0]
            parent_ind = np.sort(sortinds)[0]
            mutation_pos = phages_dict[key]["mutation_position"][parent_ind]
            mutation_pos = mutation_pos[0] # in case of double mutant - this is crude, FIX THIS

            if len(angle_chain) < 2:
                parent_angle = 0
                angle = angle_chain[-1] * (360/L) * np.pi / 180

            else:
                # calculate angle from angle_chain
                parent_angle = angle_chain[-1] * (360/L) * np.pi / 180
                angle = parent_angle - (spread/2 )* np.pi / 180 + angle_chain[-2]*(spread/30)*np.pi / 180 

                for a in angle_chain[::-1][2:]:
                    parent_angle = angle
                    angle = parent_angle - (spread/2 )* np.pi / 180 + a*(spread/30)*np.pi / 180 
                    #print(parent_angle, angle)

                    
            if parent_angle == 0:
                colour = colours1[mutation_pos]
            else:
                colour_centre = parent_angle/(2*np.pi)
                colours2 = cm.gist_rainbow(np.linspace(colour_centre-0.5/total_distance,colour_centre+0.5/total_distance,30))
                colour = colours2[mutation_pos]
            
            initial_phage_pos_x = center_position_x
            initial_phage_pos_y = center_position_y

            ax0.scatter(length*total_distance*np.cos(angle) + initial_phage_pos_x, 
                        length*total_distance*np.sin(angle) + initial_phage_pos_y, s = scale*np.log(size_phage+1),
                           c = colour, edgecolors='k')

            ax1.scatter(length*total_distance*np.cos(angle) + initial_phage_pos_x, 
                        length*total_distance*np.sin(angle) + initial_phage_pos_y, s = scale*np.log(size_bac+1),
                           c = colour, edgecolors='k')

            # connect with lines
            ax0.plot([length*(total_distance-1)*np.cos(parent_angle) + initial_phage_pos_x, length*total_distance*np.cos(angle) + initial_phage_pos_x], 
                 [length*(total_distance-1)*np.sin(parent_angle) + initial_phage_pos_y, 
                  length*total_distance*np.sin(angle) + initial_phage_pos_y], 'k-', alpha = transparency)

            if size_bac > 0:        
                ax1.plot([length*(total_distance-1)*np.cos(parent_angle) + initial_phage_pos_x, 
                      length*total_distance*np.cos(angle) + initial_phage_pos_x], 
                     [length*(total_distance-1)*np.sin(parent_angle)+ initial_phage_pos_y, 
                      length*total_distance*np.sin(angle)+ initial_phage_pos_y],
                         'k-', alpha = transparency)
    
    ax0.set_title("Phages", fontsize = 20)
    ax1.set_title("Bacteria", fontsize = 20)
    fig.suptitle('Time = %s generations' %time, fontsize=16, x = 0.415, y = 0.05, ha = 'left')
    plt.tight_layout(rect = [0,0.06, 1, 0.97])
    plt.savefig("frames/frame_%04d.png" % (i,), dpi = 80)
    plt.close()

In [None]:
# make gif using ImageMagick

gif_name = 'mutations_gif_%s' %(timestamp)

file_list = glob.glob('frames/*.png') # Get all the pngs in the current directory
list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.png')[0])) # Sort the images by #, this may need to be tweaked for your use case

with open('image_list.txt', 'w') as file:
    for item in file_list:
        file.write("%s\n" % item)

os.system('convert @image_list.txt {}.gif'.format(gif_name))

In [None]:
# optional: convert gif to movie

os.system('ffmpeg -f gif -i %s.gif %s.mp4' %(gif_name, gif_name))