# Simulation Model - Version 1

### Variables

In [None]:
pop_size = 10
num_traits = 1
snp_num = [20]
snp_causal = [10]
mating = "random" # random or assortative
true_h2 = [0.3]

### Imports

In [2]:
import numpy as np
from sklearn.preprocessing import StandardScaler

### Class Definitions

In [None]:
class Trait():

    def __init__(self, type, s_causal, s_noncausal, h2, n):
        self.s_causal = s_causal
        self.s_noncausal = s_noncausal
        self.s = s_causal + s_noncausal
        self.h2 = h2
        self.n = n
        self.type = type
    
    def define_uncorrelated_polygenic_trait(self):
        # compute va and ve from h2
        ve = 1 - self.h2
        va = self.h2

        # simulate genotypes for each person for each SNP
        G = np.zeros((self.n, self.s))
        snp_freqs = []
        for snp in range(self.s):
            freq = np.random.uniform()
            snp_freqs.append(freq)
            s_array = np.array(np.random.binomial(2, freq, size = self.n))
            G[:, snp] = s_array
        self.snp_freqs = snp_freqs

        # scale genotypes
        self.alleles = G.astype(int)
        standardized_G = StandardScaler().fit_transform(G)
        self.standard_alleles = standardized_G

        # choose causal SNPs and effect sizes
        causal_snps = np.random.choice(range(0, self.s), size=self.s_causal, replace=False)
        effect_sizes = list(np.random.normal(loc=0, scale = np.sqrt(va/self.s_causal), size=self.s_causal))
        
        causal_snp_effect = dict(zip(range(0, self.s), [0]*self.s))
        for k in list(causal_snp_effect.keys()):
            if k in causal_snps:
                causal_snp_effect[k] = effect_sizes[0]
                del effect_sizes[0]
        self.causal_snp_effect = causal_snp_effect

        # simulate phenotypes
        phenotypes = []
        phenotypes_causal = []
        phenotypes_noise = []
        for i, row in enumerate(self.standard_alleles):
            phenotype_causal = np.sum(np.array(row) * np.array(list(causal_snp_effect.values())))
            phenotype_noise = np.random.normal(loc=0, scale=np.sqrt(ve))
            phenotype = phenotype_causal + phenotype_noise
            phenotypes.append(phenotype)
            phenotypes_causal.append(phenotype_causal)
            phenotypes_noise.append(phenotype_noise)

        self.phenotypes = phenotypes
        self.genotypes = phenotypes_causal
        self.environment = phenotypes_noise

        return self.genotypes, self.environment, self.phenotypes

    def define_two_correlated_polygenic_traits(self, trait_2, rg, re):
        # simulate genotypes for each person for each SNP
        trait_1_G = np.zeros((self.n, self.s))
        trait_2_G = np.zeros((trait_2.n, trait_2.s))

        assert self.s == trait_2.s
        assert self.s_causal == trait_2.s_causal
        assert self.s_noncausal == trait_2.s_noncausal
        assert self.n == trait_2.n

        snp_freqs = []
        for snp in range(self.s):
            freq = np.random.uniform()
            snp_freqs.append(freq)
            s_array = np.array(np.random.binomial(2, freq, size = self.n))
            trait_1_G[:, snp] = s_array
            trait_2_G[:, snp] = s_array
        self.snp_freqs = snp_freqs
        trait_2.snp_freqs = snp_freqs

        # scale genotypes
        self.alleles = trait_1_G.astype(int)
        standardized_G1 = StandardScaler().fit_transform(trait_1_G)
        self.standard_alleles = standardized_G1

        trait_2.alleles = trait_2_G.astype(int)
        standardized_G2 = StandardScaler().fit_transform(trait_2_G)
        trait_2.standard_alleles = standardized_G2

        # choose causal SNPs
        causal_snps = np.random.choice(range(0, self.s), size=self.s_causal, replace=False)

        # choose effect sizes so rg is true
        effects = np.random.multivariate_normal([0, 0],
            [[1, rg], [rg, 1]], size=self.s_causal)
        trait_1_effect_sizes = effects[:, 0]
        trait_2_effect_sizes = effects[:, 1]

        # normalize so that genetic variance equals h2
        trait_1_effect_sizes *= np.sqrt(self.h2 / np.var(standardized_G1[:, causal_snps] @ trait_1_effect_sizes))
        trait_2_effect_sizes *= np.sqrt(trait_2.h2 / np.var(standardized_G2[:, causal_snps] @ trait_2_effect_sizes))
        trait_1_effect_sizes = list(trait_1_effect_sizes)
        trait_2_effect_sizes = list(trait_2_effect_sizes)

        causal_snp_effect_1 = dict(zip(range(0, self.s), [0]*self.s))
        for k in list(causal_snp_effect_1.keys()):
            if k in causal_snps:
                causal_snp_effect_1[k] = trait_1_effect_sizes[0]
                del trait_1_effect_sizes[0]
        self.causal_snp_effect = causal_snp_effect_1

        causal_snp_effect_2 = dict(zip(range(0, trait_2.s), [0]*trait_2.s))
        for k in list(causal_snp_effect_2.keys()):
            if k in causal_snps:
                causal_snp_effect_2[k] = trait_2_effect_sizes[0]
                del trait_2_effect_sizes[0]
        trait_2.causal_snp_effect = causal_snp_effect_2

        # choose environment so re is true
        mean = [0, 0]
        cov_e = [[1-self.h2, re*np.sqrt((1-self.h2)*(1-trait_2.h2))], 
                [re*np.sqrt((1-self.h2)*(1-trait_2.h2)), 1-trait_2.h2]]
        environment = np.random.multivariate_normal(mean, cov_e, size=self.n)
        trait_1_phenotypes_noise = environment[:, 0]
        trait_2_phenotypes_noise = environment[:, 1]

        # simulate phenotypes
        trait_1_phenotypes_causal = []
        for i, row in enumerate(self.standard_alleles):
            phenotype_causal = np.sum(np.array(row) * np.array(list(self.causal_snp_effect.values())))
            trait_1_phenotypes_causal.append(phenotype_causal)
        trait_1_phenotypes = np.array(trait_1_phenotypes_causal) + np.array(trait_1_phenotypes_noise)

        trait_2_phenotypes_causal = []
        for i, row in enumerate(trait_2.standard_alleles):
            phenotype_causal = np.sum(np.array(row) * np.array(list(trait_2.causal_snp_effect.values())))
            trait_2_phenotypes_causal.append(phenotype_causal)
        trait_2_phenotypes = np.array(trait_2_phenotypes_causal) + np.array(trait_2_phenotypes_noise)

        # update objects
        self.phenotypes = trait_1_phenotypes
        self.genotypes = trait_1_phenotypes_causal
        self.environment = trait_1_phenotypes_noise
        self.correlated_trait = trait_2
        trait_2.phenotypes = trait_2_phenotypes
        trait_2.genotypes = trait_2_phenotypes_causal
        trait_2.environment = trait_2_phenotypes_noise
        trait_2.correlated_trait = self

        #return self.genotypes, self.environment, self.phenotypes
        return (trait_1_phenotypes,
            trait_2_phenotypes,
            trait_1_phenotypes_causal,
            trait_2_phenotypes_causal,
            trait_1_phenotypes_noise,
            trait_2_phenotypes_noise)

    def define_monogenic_recessive_trait(self):
        assert self.s == 1
        assert self.s_causal == 1
        assert self.h2 == 1

        # compute va and ve from h2
        ve = 1 - self.h2
        va = self.h2

        # simulate genotypes for each person for each SNP
        snp_freqs = [np.random.uniform()]
        G = np.array(np.random.binomial(2, snp_freqs[0], size = self.n))
        self.snp_freqs = snp_freqs

        # scale genotypes
        self.alleles = G.astype(int)
        standardized_G = StandardScaler().fit_transform(G)
        self.standard_alleles = standardized_G

        # simulate phenotypes
        phenotypes = []
        phenotypes_causal = []
        phenotypes_noise = []
        for i, val in enumerate(self.standard_alleles):
            phenotype_causal = 0 if val == 0 else 1
            phenotypes.append(phenotype_causal)
            phenotypes_causal.append(phenotype_causal)
            phenotypes_noise.append(0)

        self.phenotypes = phenotypes
        self.genotypes = phenotypes_causal
        self.environment = phenotypes_noise

        return self.genotypes, self.environment, self.phenotypes

# still working on this 10/23
    # def create_offspring_random_mating(self, offspring_trait, fitness):
    #     offspring_alleles = []
    #     for n in range(0, offspring_trait.n):
    #         par1, par2 = np.random.choice(range(0, self.n), p = fitness, size = 2, replace=False)
    #         par1_allele = self.alleles[par1]
    #         par2_allele = self.alleles[par2]
    #         par1_alleles = [0, 0] if par1_allele == 0 else ([0, 1] if par1_allele == 1 else [1, 1])
    #         par2_alleles = [0, 0] if par2_allele == 0 else ([0, 1] if par2_allele == 1 else [1, 1])
    #         if self.type == "monogenic_recessive":
    #             punnet_dict = {{0}:0, {0, 1}: np.random.choice([0, 1], p=[0.75, 0.25]), {1}: np.random.choice([0,1,2], p=[0.25,0.5,0.25]), {1,2}:np.random.choice([1,2]), {2}:2}
    #             offspring_allele = punnet_dict[{par1_allele, par2_allele}]
    #             offspring_alleles.append(offspring_allele)
    #         elif self.type == "uncorrelated_polygenic":

    #             allele_from_par1 = np.random.choice(par1_alleles)
    #             allele_from_par2 = np.random.choice(par2_alleles)
    #             offspring_allele = allele_from_par1 + allele_from_par2
    #             offspring_alleles.append(offspring_allele)
    #         elif self.type == "correlated_polygenic":
    #             correlated_trait = self.correlated_trait
    #             offspring_allele = 

        offspring_trait.alleles = offspring_alleles




    def create_offspring_assortative_mating(self, offspring_trait, fitness):
        pass

In [None]:
class Generation():

    def __init__(self, n):
        self.n = n
        self.traits = []
    
    def add_trait(self, trait):
        self.traits.append(trait)
    
    def calculate_fitness(self, fitness_func):
        individual_fitness = []
        for i in range(self.n):
            phenotypes_i = [trait.phenotypes[i] for trait in self.traits]
            fitness_i = fitness_func(phenotypes_i)
            individual_fitness.append(fitness_i)
        def squish(x):
            mean_x = np.mean(x)
            x_normalized = (x / 1000 )*(1/mean_x)
            return x_normalized
        self.fitness = squish(individual_fitness)

    def create_offspring_random_mating(self, new_n):
        assert self.fitness != None
        offspring = Generation(new_n)
        for trait in self.traits:
            offspring_trait = Trait(trait.type, trait.s_causal, trait.s_noncausal, trait.h2, new_n)
            trait.create_offspring_random_mating(offspring_trait, self.fitness)
            offspring.add_trait(offspring_trait)

    def create_offspring_assortative_mating(self, new_n):
        assert self.fitness != None
        offspring = Generation(new_n)
        for trait in self.traits:
            offspring_trait = Trait(trait.type, trait.s_causal, trait.s_noncausal, trait.h2, new_n)
            trait.create_offspring_assortative_mating(offspring_trait, self.fitness)
            offspring.add_trait(offspring_trait)

### Validation

In [52]:
import numpy as np
from scipy.stats import pearsonr

def validate_two_trait_simulation(trait1, trait2, phen1, phen2, phen1_genetic, phen2_genetic, phen1_env, phen2_env):
    """
    Validate that the simulated traits have correct h2, rg, and re.
    """

    # Empirical heritability: var(G) / var(P)
    h2_trait1_emp = np.var(phen1_genetic) / np.var(phen1)
    h2_trait2_emp = np.var(phen2_genetic) / np.var(phen2)

    # Empirical genetic correlation
    rg_emp = np.corrcoef(phen1_genetic, phen2_genetic)[0, 1]

    # Empirical environmental correlation
    re_emp = np.corrcoef(phen1_env, phen2_env)[0, 1]

    # Total phenotype correlation (should roughly equal rg * sqrt(h2_1 * h2_2) + re * sqrt((1-h2_1)*(1-h2_2)))
    rP_emp = np.corrcoef(phen1, phen2)[0, 1]
    rP_expected = rg_emp * np.sqrt(h2_trait1_emp * h2_trait2_emp) + \
                  re_emp * np.sqrt((1 - h2_trait1_emp) * (1 - h2_trait2_emp))

    print(f"Heritability (target, realized):")
    print(f"  Trait 1 (eye_color): {trait1.h2:.3f}, {h2_trait1_emp:.3f}")
    print(f"  Trait 2 (hair_color): {trait2.h2:.3f}, {h2_trait2_emp:.3f}\n")

    print(f"Genetic correlation (target, realized): {trait1.rg_target:.3f}, {rg_emp:.3f}")
    print(f"Environmental correlation (target, realized): {trait1.re_target:.3f}, {re_emp:.3f}\n")

    print(f"Phenotypic correlation (expected, realized): {rP_expected:.3f}, {rP_emp:.3f}")

    return {
        "h2_trait1_emp": h2_trait1_emp,
        "h2_trait2_emp": h2_trait2_emp,
        "rg_emp": rg_emp,
        "re_emp": re_emp,
        "rP_emp": rP_emp,
        "rP_expected": rP_expected
    }

eye_color = Trait(500, 0, 0.3, 1000)
hair_color = Trait(500, 0, 0.7, 1000)

phen1, phen2, g1, g2, e1, e2 = eye_color.define_two_correlated_polygenic_traits(
    hair_color, rg=0.9, re=0.2
)

# Store targets for easy reference
eye_color.rg_target = 0.9
eye_color.re_target = 0.2

results = validate_two_trait_simulation(eye_color, hair_color, phen1, phen2, g1, g2, e1, e2)

Heritability (target, realized):
  Trait 1 (eye_color): 0.300, 0.263
  Trait 2 (hair_color): 0.700, 0.688

Genetic correlation (target, realized): 0.900, 0.899
Environmental correlation (target, realized): 0.200, 0.259

Phenotypic correlation (expected, realized): 0.507, 0.557


### Generate First Generation

In [53]:
eye_color = Trait(50, 100, 0.3, 100)
hair_color = Trait(50, 100, 0.7, 100)
eye_color.define_two_correlated_polygenic_traits(hair_color, rg=0.9, re=0.3)
gen1 = Generation(100)
gen1.add_trait(eye_color)
gen1.add_trait(hair_color)

### Generate Next Generation

In [None]:
# w = 1 + B1p1 + B2p2
def linear_w_from_p(p1, p2, beta_1, beta_2):
    w = 1 + beta_1*np.array(p1) + beta_2*np.array(p2)
    return w