# Add neutral mutaions with a rate map


[Check these notes from Aaron](https://github.com/santiago1234/TheSciJournal/blob/main/journal-2022/Aaron-Notes/220721-DFE-expected-SFS-scaling.pdf)

In [1]:
import tskit
import fwdpy11
import msprime
import sys
import pandas as pd
import pybedtools

sys.path.append('../')
from simutils.utils import simuldata

In [2]:
# Load the genomic region to simulate
simdat = simuldata('test-data/', sample_id=23, path_to_genetic_maps='test-data/')

In [3]:
# load fwdpy11 simulation output
pop = fwdpy11.DiploidPopulation.load_from_file('results/simulations/sim-seed-1-pop.bin')
ts = pop.dump_tables_to_tskit()

## Setting the recombination map

For the recombination map, say synonymous, we set the recombination rate of coding intervals to $u$ synonymous
and for the complement region (intergenic and intronic) we set $u$ to zero.

In [4]:
# make recombination map

u = simdat.m_synonymous
intervals = simdat.coding_intervals

In [5]:
def make_rate_map_table(region, intervals_with_positve_rate, rate):
    """
    The inpute intervals will have the given rate,
    the complement intervals (to cover the whole region) will have 0 rate
    
    Args:
        regions: chrm,start, end: the genomic region bedtools
        intervals_with_positve_rate: bedtools intervals within the region
    Return:
        pd.DataFrame
    """

    # get the complement regions that will have zero rate
    intervals_zero_rate = region.subtract(intervals_with_positve_rate)
    
    # put in data frame
    intervals_with_positve_rate = intervals_with_positve_rate.to_dataframe()
    intervals_zero_rate = intervals_zero_rate.to_dataframe()

    # add rates accordingly
    intervals_with_positve_rate['rate'] = rate
    intervals_zero_rate['rate'] = 0
    intervals = pd.concat([intervals_with_positve_rate, intervals_zero_rate])
    
    return  intervals.sort_values('start')


def to_msprime_rate_map(rmt):
    """
    Args:
        rmt: rate map table
    Return:
        msprime.RateMap
    """
    positions = rmt.start.to_list()
    # append the last position
    positions.append(rmt.end.to_list()[-1])
    return msprime.RateMap(position=positions, rate=rmt.rate.to_list())


def rate_map(simdat, synonymous=True):
    """
    Make an msprime rate map
    """

    if synonymous:
        u = simdat.m_synonymous
        regions_with_rate = simdat.coding_intervals
    else: #intronic and intergenic regions
        u = simdat.m_noncoding
        regions_with_rate = simdat.noncoding_intervals

    # make Bedtool

    start = simdat.start - simdat.start
    end = simdat.end - simdat.start
    region = pybedtools.BedTool(f'{simdat.chromosome} {start} {end}', from_string=True)

    regions_with_rate = pybedtools.BedTool.from_dataframe(regions_with_rate)

    rmap_table = make_rate_map_table(region, regions_with_rate, u)
    return to_msprime_rate_map(rmap_table)


def neutral_rate_maps(simdat):
    """
    Get the rate map for neutral regions:
    coding and no
    """
    neutral_maps = dict()
    neutral_maps['synonymous'] = rate_map(simdat, synonymous=True)
    neutral_maps['noncoding'] = rate_map(simdat, synonymous=False)
    
    return neutral_maps

neutral_rate_maps(simdat)['noncoding']

left,right,mid,span,rate
0,42494,21247,42494,1e-08
42494,42569,42531.5,75,0
42569,43298,42933.5,729,1e-08
43298,43430,43364,132,0
43430,44779,44104.5,1349,1e-08
44779,44890,44834.5,111,0
44890,46715,45802.5,1825,1e-08
46715,46883,46799,168,0
46883,48388,47635.5,1505,1e-08
48388,48491,48439.5,103,0


In [6]:
neutral_rate_maps(simdat)['synonymous']

left,right,mid,span,rate
0,42494,21247,42494,0
42494,42569,42531.5,75,2.7e-09
42569,43298,42933.5,729,0
43298,43430,43364,132,2.7e-09
43430,44779,44104.5,1349,0
44779,44890,44834.5,111,2.7e-09
44890,46715,45802.5,1825,0
46715,46883,46799,168,2.7e-09
46883,48388,47635.5,1505,0
48388,48491,48439.5,103,2.7e-09
