In [24]:
import numpy as np
import matplotlib.pyplot as plt
import random

file_name = "/content/drive/MyDrive/initial_population.vcf"

class Individual:
    def __init__(self, maternal, paternal):
        self.maternal = maternal
        self.paternal = paternal

def load_initial_population_from_vcf("/content/drive/MyDrive/initial_population.vcf", N, L):
    """Load initial population from a VCF file."""
    population = []
    with open(vcf_file, 'r') as f:
        for line in f:
            if line.startswith('#'):
                continue  # Skip headers
            parts = line.strip().split('\t')
            snp_data = parts[5:]  # Extract SNP data (skip metadata)

            # Create individuals: 2 columns per individual (maternal and paternal alleles)
            for i in range(N):
                maternal = [int(snp_data[2 * i]) for _ in range(L)]
                paternal = [int(snp_data[2 * i + 1]) for _ in range(L)]
                population.append(Individual(maternal, paternal))
    return population

def fitness_function(individual, hetero_snp=None, alt_snp=None):
    fitness = 1
    if hetero_snp is not None and alt_snp is not None:
        # Adjust fitness for SNP42 based on allele configuration
        if (individual.maternal[hetero_snp] + individual.paternal[hetero_snp]) == 1:
            fitness = 1.5  # Heterozygous fitness
        elif (individual.maternal[alt_snp] + individual.paternal[alt_snp]) == 2:
            fitness = 2.0  # Homozygous alternate allele fitness
    return fitness

def select_parents(population, hetero_snp=None, alt_snp=None):
    fitness_values = [fitness_function(ind, hetero_snp, alt_snp) for ind in population]
    parent1 = random.choices(population, weights=fitness_values, k=1)[0]
    parent2 = parent1
    while parent2 == parent1:
        parent2 = random.choices(population, weights=fitness_values, k=1)[0]
    return parent1, parent2

def crossover(parent1, parent2, L):
    k = random.randint(0, L - 1)
    k_prime = random.randint(0, L - 1)
    child_maternal = parent1.maternal[:k] + parent2.paternal[k:]
    child_paternal = parent2.maternal[:k_prime] + parent1.paternal[k_prime:]
    return Individual(child_maternal, child_paternal)

def evolve_population(population, N, L, hetero_snp=None, alt_snp=None):
    new_population = []
    for _ in range(N):
        parent1, parent2 = select_parents(population, hetero_snp, alt_snp)
        child = crossover(parent1, parent2, L)
        new_population.append(child)
    return new_population

def extinction_probability(N):
    p = 1 / (2 * N)
    return (1 - p) ** (2 * N)

def calculate_extinction_and_fixation(population, L):
    extinction_count = 0
    fixation_count = 0
    for snp in range(L):
        allele_sum = sum(ind.maternal[snp] + ind.paternal[snp] for ind in population)
        if allele_sum == 0:
            extinction_count += 1
        elif allele_sum == 2 * len(population):
            fixation_count += 1
    return extinction_count / L, fixation_count / L

def plot_allele_frequencies_over_generations(N, L, generations):
    population = load_initial_population(file_name, N, L)
    frequencies = []

    for _ in range(generations):
        frequency = sum(ind.maternal[0] + ind.paternal[0] for ind in population) / (2 * N)
        frequencies.append(frequency)
        population = evolve_population(population, N, L)

    plt.plot(range(generations), frequencies[:100])
    plt.xlabel('Generation')
    plt.ylabel('Alternate Allele Frequency')
    plt.title('Allele Frequency of First SNP over 20 Generations')
    plt.show()

def plot_extinction_and_fixation(N, L, generations):
    extinction_probs = []
    fixation_probs = []

    population = load_initial_population(file_name, N, L)
    for _ in range(generations):
        population = evolve_population(population, N, L)
        extinction_prob, fixation_prob = calculate_extinction_and_fixation(population, L)
        extinction_probs.append(extinction_prob)
        fixation_probs.append(fixation_prob)

    plt.plot(range(generations), extinction_probs, label='Extinction Probability')
    plt.plot(range(generations), fixation_probs, label='Fixation Probability')
    plt.xlabel('Generation')
    plt.ylabel('Probability')
    plt.title('Extinction and Fixation Probabilities over Generations')
    plt.legend()
    plt.show()

def simulate_fitness_effects_snp42(N, L, generations, fitness_case):
    hetero_snp = 42
    alt_snp = 42

    extinction_counts = 0
    fixation_counts = 0

    for _ in range(1000):  # Simulate multiple times
        population = load_initial_population(file_name, N, L)
        for _ in range(generations):
            population = evolve_population(population, N, L, hetero_snp, alt_snp)
        extinction_prob, fixation_prob = calculate_extinction_and_fixation(population, L)
        extinction_counts += extinction_prob
        fixation_counts += fixation_prob

    print(f"Fitness Case {fitness_case}:")
    print(f"Estimated Extinction Probability: {extinction_counts / 1000}")
    print(f"Estimated Fixation Probability: {fixation_counts / 1000}")

# Parameters
N = 100  # Population size
L = 10000  # Number of SNPs
generations_20 = 20  # Number of generations for part (d)
generations_1000 = 1000  # Number of generations for part (e)

# Part (c): Calculate extinction probability analytically
print(f"Analytical extinction probability: {extinction_probability(N)}")

# Part (d): Plot allele frequencies over 20 generations
plot_allele_frequencies_over_generations(N, L, generations_20)

# Part (e): Plot extinction and fixation probabilities over 1000 generations
plot_extinction_and_fixation(N, L, generations_1000)

# Part (f), (g), (h): Simulate with fitness effects for SNP42
print("\nPart (f): SNP42 with beneficial effect")
simulate_fitness_effects_snp42(N, L, 100, "Beneficial")

print("\nPart (h): SNP42 with deleterious effect")
simulate_fitness_effects_snp42(N, L, 100, "Deleterious")




Analytical extinction probability: 0.36695782172616703


TypeError: load_initial_population() takes 2 positional arguments but 3 were given