# Hitch-hiking and polygenic adaptation

Kevin Thornton

Ecology and Evolutionary Biology

University of California, Irvine

# Linked selection vs fate of selected mutations


Hudson & Kaplan, 1995 | De Vladar & Barton, 2014
:------:|:----------:
<img src="HK1995Fig1.png" width="500">|<img src="DvB2014Fig1.png" width="500">

In [1]:
# "Fake" sweep-like tree by simulating rapid growth
import msprime
import numpy as np
from IPython.display import SVG
ts = msprime.simulate(5, random_seed=666)
t = next(ts.trees())
nl = {i:"{:0.2f}".format(ts.tables.nodes.time[i]) for i in range(len(ts.tables.nodes))}
for key, value in nl.items():
    if key < 5:
        nl[key]='0'
tree1_svg=t.draw(path="tree1.svg",format='svg',
                 #height=100,width=100,
                 node_labels=nl)
ts = msprime.simulate(population_configurations=[msprime.PopulationConfiguration(sample_size=5,growth_rate=10)],
                      random_seed=42*666)
# Cheat here even more: divide times by 50 to look more realistic
nl = {i:"{:0.3f}".format(ts.tables.nodes.time[i]/50.) for i in range(len(ts.tables.nodes))}
for key, value in nl.items():
    if key < 5:
        nl[key]='0'
t = next(ts.trees())
tree2_svg=t.draw(path="tree2.svg",
                 #height=100,width=100,
                 format='svg',node_labels=nl)

# Tree structures

Neutral | Recent hard sweep
:------:|:----------:
![Image](tree1.svg)|![Image2](tree2.svg)


In [2]:
%matplotlib agg
# Get distributions of D for neutral and for growth,
# which we're using to fake what hard sweeps look like
import libsequence.variant_matrix as vm
import libsequence.summstats as sstats
import matplotlib.pyplot as plt
import seaborn as sns
Dneutral=[]
for ts in msprime.simulate(50, mutation_rate=100.,num_replicates=1000, random_seed=666):
    m = vm.VariantMatrix.from_TreeSequence(ts)
    ac = m.count_alleles()
    Dneutral.append(sstats.tajd(ac))
Dgrowth=[]
for ts in msprime.simulate(population_configurations=[msprime.PopulationConfiguration(sample_size=50,growth_rate=5)],
                           mutation_rate=100.,
                           num_replicates=1000, random_seed=351212):
    m = vm.VariantMatrix.from_TreeSequence(ts)
    ac = m.count_alleles()
    Dgrowth.append(sstats.tajd(ac))
    
f, ax = plt.subplots(1,2,
                     figsize=(6,4),
                     sharey=True,sharex=True)
sns.distplot(Dneutral,ax=ax[0],norm_hist=False,kde=False)
sns.distplot(Dgrowth,ax=ax[1],norm_hist=False,kde=False)
ax[0].set_ylabel("Count")
ax[0].set_xlabel("Tajima's D")
ax[1].set_xlabel("Tajima's D")
ax[0].set_title("Neutral")
ax[1].set_title("Recent hard sweep")
plt.savefig("tajd.png", dpi=300)

  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


# Detecting selection from genotype data

<img src="tajd.png" width="600">

# Model overview


<img src="Model.png" width="600">


# A locus and model parameters
<img src="LocusLayout.png" width="400">

* $N = 5,000$ diploids
* $\theta = \rho = 1,000$
* $\mu \in 2.5\times 10^{-4}, 10^{-3}, 5\times 10^{-3}$
* $\Theta = 5, 20, or 100$, which lets us have $\theta$ and $\Theta$ floating around 😜
* Mutational effects are $~N(0,\sigma_\mu)$.
