In [17]:
import numpy as np
import pandas as pd

import random
from random import randrange

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Disable chained assignments
pd.options.mode.chained_assignment = None 

In [8]:
def makeancestor(Nchr = 22, Ngenes = 100):
    ancestor = pd.DataFrame(columns = ['Chr'])
    chrNames = ['AncChr' + str (i + 1) for i in range(Nchr)]
    for i in range(Nchr):
        row = {'Chr' : (i + 1)}
        for i in range(Ngenes):
                ancestor = ancestor.append(row, ignore_index = True)
    ancestor['Genes'] = (ancestor.reset_index().index + 1)
    # ancestor['Genes'] = 'g_' + ancestor['Genes'].astype(str)

    return ancestor

ancestor = makeancestor()

Fix mixing

In [28]:
def fusion(ancestor, mixing = 0):
    '''
    inputs: 
    ancestor : df with chromosome name | gene name
    mixing : float between 0 and 1, where 1 implies extreme mixing and 0 implies no mixing
    '''
    
    def mix(fusion, mixing = 0): # Takes a chromosome and mixes the genes
        n = len(fusion.Genes)
        for i in range(int(mixing * n)):
            g1, g2 = randrange(n), randrange(n)
            fusion[g2], fusion[g1] = fusion[g1], fusion[g2]
        
        return fusion
    
    # Randomly select two chromosomes to fuse
    fuse1 = random.choice(range(len(ancestor.Chr.unique())))
    fuse2 = random.choice(range(len(ancestor.Chr.unique())))

    # Fuse the chromosomes
    fusion = ancestor.loc[ancestor['Chr'].isin([fuse1, fuse2])]
    fusion['Chr'] = f'{fuse1}+{fuse2}'

    # Remove the unfused chromosomes
    ancestor.drop(ancestor[ancestor['Chr'].isin([fuse1, fuse2])].index, inplace = True)

    # Add the fused chromosome back into the genome
    ancestor = ancestor.append([ancestor, fusion])
    
    return ancestor