# Lab 5. Where do we come from?

In [21]:
import pathlib

from Bio import Phylo
from matplotlib import pyplot as plt

In [37]:
local_path = pathlib.Path()
human_dir = local_path / 'Human'

In [27]:
data = ""

for fasta_file in human_dir.glob('*'):
    file_name = str(fasta_file)
    with open(file_name, 'r') as in_f:
        population_name = file_name[file_name.find(' '):-6]
        data += '>' + population_name + '_' + in_f.read().strip()[1:] + '\n'

with open('human_mt_dna.fasta', 'w') as out_f:
    out_f.write(data)

In [57]:
with open('all_alignment.fasta', 'r') as f:
    s = f.read()
    
l = s.split('> ')[1:]
sequnce_dict = {}
for line in l:
    split_pos = line.find('\n')
    sample = line[:split_pos]
    seq = line[split_pos:].replace('\n', '')
    sequnce_dict[sample] = seq

In [62]:
central_africa = next(filter(lambda s: s.startswith('Central'), sequnce_dict.keys()))
denisovans_samples = list(filter(lambda s: s.startswith('Deniso'), sequnce_dict.keys()))
neanderthal_samples = list(filter(lambda s: s.startswith('Neande'), sequnce_dict.keys()))
non_homo_sapiens = denisovans_samples + neanderthal_samples
print(central_africa)
print(denisovans_samples)
print(neanderthal_samples)

Central_African_FJ713601.1
['Denisova_FN673705.1', 'Denisova_FR695060.1', 'Denisova_KT780370.1']
['Neanderthal_KX198088.1', 'Neanderthal_KX198084.1 ', 'Neanderthal_KX198085.1', 'Neanderthal_KX198086.1', 'Neanderthal_KX198087.1']


In [61]:
def count_mutations(seq1, seq2):
    mutations = 0
    total = len(seq1)
    for seq1_base, seq2_base in zip(seq1, seq2):
        if seq1_base != seq2_base:
            mutations += 1
    return mutations

According to the literature, the mutation rate for mitochondrial hominid DNA is equal to 1 mutation every 3000 years.

In [89]:
original_seq = sequnce_dict[central_africa]
count = 0
total_mutations = 0
for sample, seq in sequnce_dict.items():
    if sample != central_africa and sample not in non_homo_sapiens:
        count += 1
        total_mutations += count_mutations(original_seq, seq)
        
avg_mutations = total_mutations / count
mut_rate = 1. / 3000
print(f'Average number of mutations: {avg_mutations:.2f}')
print(f'This means that out mtDNA has around {avg_mutations / 2:.1f} mutations compared to mitochondrial Eve.')
print(f'Taking mutation rate into consideration we conclude that mitochondrial Eve is around {avg_mutations / 2 * 3000:.1f} years old.')

Average number of mutations: 90.23
This means that out mtDNA has around 45.1 mutations compared to mitochondrial Eve.
Taking mutation rate into consideration we conclude that mitochondrial Eve is around 135340.9 years old.


## Add Denisovans and Neanderthal 

In [39]:
with open('human_mt_dna.fasta', 'r') as in_f:
    data = in_f.read().strip() + '\n'

for fasta_file in local_path.glob('Denisova/*'):
    file_name = str(fasta_file)
    print(file_name)
    with open(file_name, 'r') as in_f:
        data += in_f.read().strip() + '\n'
        
for fasta_file in local_path.glob('Neanderthal/*'):
    file_name = str(fasta_file)
    print(file_name)
    with open(file_name, 'r') as in_f:
        data += in_f.read().strip() + '\n'

with open('human_neanderthal_denisovans.fasta', 'w') as out_f:
    out_f.write(data)

Denisova/FN673705 Denisova.fasta
Denisova/FR695060 Denisova.fasta
Denisova/KT780370 Denisova.fasta
Neanderthal/GoyetQ57-2 Neanderthal.fasta
Neanderthal/GoyetQ56-1 Neanderthal.fasta
Neanderthal/GoyetQ305-4 Neanderthal.fasta
Neanderthal/GoyetQ374a-1 Neanderthal.fasta
Neanderthal/GoyetQ305-7 Neanderthal.fasta


In [None]:
original_seq = sequnce_dict[central_africa]
count = 0
total_mutations = 0
for sample, seq in sequnce_dict.items():
    if sample != central_africa and sample not in non_homo_sapiens:
        count += 1
        total_mutations += count_mutations(original_seq, seq)
        
avg_mutations = total_mutations / count
print(len(seq))
print(f'Average number of mutations: {avg_mutations:.2f}')
print(f'Mutations per base: {avg_mutations / len(original_seq)}')
# print(f'Calculated age of Mitochondrial Eve: {avg_mutations / len(original_seq) / (3 * 10 ** (-5)) * 20}')
print(f'Calculated mutation rate: {avg_mutations / len(original_seq) / 200000} per base per year')

In [99]:
count = 0
total_mutations = 0
for neand_sample in neanderthal_samples:
    neand_seq = sequnce_dict[neand_sample]
    for sample, seq in sequnce_dict.items():
        if sample not in non_homo_sapiens:
            count += 1
            total_mutations += count_mutations(neand_seq, seq)
avg_mutations = total_mutations / count

print(f'Average number of mutations: {avg_mutations:.1f}.')
print(f'In this case the last common ancestor for neanderthal and homo sapiens lived around {avg_mutations / 2 / mut_rate:.0f} years ago.')

Average number of mutations: 220.2.
In this case the last common ancestor for neanderthal and homo sapiens lived around 330367 years ago.


In [103]:
count = 0
total_mutations = 0
for den_sample in denisovans_samples:
    den_seq = sequnce_dict[den_sample]
    for sample, seq in sequnce_dict.items():
        if sample not in non_homo_sapiens:
            count += 1
            total_mutations += count_mutations(den_seq, seq)
avg_mutations = total_mutations / count

print(f'Average number of mutations: {avg_mutations:.1f}.')
print(f'In this case the last common ancestor for denisovans and homo sapiens lived around {avg_mutations / 2 / mut_rate:.0f} years ago.')

Average number of mutations: 395.0.
In this case the last common ancestor for denisovans and homo sapiens lived around 592556 years ago.


<img src="archaeopteryx_js.png">