In [44]:
import pyvolve
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
pyvolve.ReadFrequencies?

[0;31mInit signature:[0m [0mpyvolve[0m[0;34m.[0m[0mReadFrequencies[0m[0;34m([0m[0mby[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
   This class may be used to compute frequencies directly from a specified sequence file. Frequencies may be computed globally (using entire file), or based on specific columns (i.e. site-specific frequencies), provided the file contains a sequence alignment.

   Required positional include, 
       1. **by**. See parent class StateFrequencies for details.
   
   Required keyword arguments include, 
       1. **file** is the file containing sequences from which frequencies will be computed. By default, this file is assumed to be in FASTA format, although you can specify a different format with the optional argument **format**
   
   Optional keyword arguments include, 
       1. **format** is the sequence file format (case-insensitive). Sequence files are parsed using Biopython, so an

In [48]:
f = pyvolve.ReadFrequencies("codon", file="../tmp/query_multiple.fasta")

In [40]:
seq = "ATGAACGAAAATCTGTTCGCTTCATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCAGTACTGATCATTCTATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCATCAACAACCGACTAATCACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCATTTTTATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACTATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCGCAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCCTGGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCCATACTAGTTACTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTACGCCTAACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACAACACA"
len(seq) % 3

0

In [51]:
pd.Series([seq[i: i+3] for i in range(0, len(seq)-2, 3)]).value_counts(normalize=True).sort_index().values

array([0.02212389, 0.03982301, 0.00442478, 0.00884956, 0.03982301,
       0.05309735, 0.02212389, 0.01327434, 0.03539823, 0.07079646,
       0.01769912, 0.05309735, 0.03097345, 0.02654867, 0.01327434,
       0.03539823, 0.01327434, 0.00884956, 0.00884956, 0.08849558,
       0.03097345, 0.03097345, 0.02212389, 0.01327434, 0.00442478,
       0.01769912, 0.05309735, 0.00442478, 0.01327434, 0.01327434,
       0.02212389, 0.01769912, 0.00442478, 0.00442478, 0.00884956,
       0.00884956, 0.00442478, 0.01769912, 0.00884956, 0.01769912,
       0.01327434, 0.01769912, 0.02212389, 0.00442478, 0.01769912])

In [49]:
frequencies = f.compute_frequencies(type="codon")
frequencies

array([0.02214674, 0.040625  , 0.00461957, 0.00910326, 0.04116848,
       0.05203804, 0.        , 0.01875   , 0.        , 0.01290761,
       0.        , 0.00013587, 0.03600543, 0.0705163 , 0.01752717,
       0.05815217, 0.03138587, 0.0267663 , 0.00013587, 0.00027174,
       0.01331522, 0.03627717, 0.00013587, 0.01304348, 0.00883152,
       0.00883152, 0.00013587, 0.00013587, 0.09402174, 0.03125   ,
       0.02717391, 0.02228261, 0.01345109, 0.0044837 , 0.        ,
       0.        , 0.01766304, 0.05529891, 0.00407609, 0.01358696,
       0.01345109, 0.02241848, 0.        , 0.        , 0.01752717,
       0.00461957, 0.00502717, 0.00923913, 0.00910326, 0.00434783,
       0.01793478, 0.00923913, 0.        , 0.01752717, 0.        ,
       0.00013587, 0.        , 0.01820652, 0.02269022, 0.00434783,
       0.01793478])

array([0.        , 0.        , 0.        , 0.        , 0.06380173,
       0.06380173, 0.06380173, 0.06380173, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.07471842, 0.07471842, 0.07471842, 0.07471842,
       0.02668276, 0.02668276, 0.02668276, 0.02668276, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.16959417,
       0.        , 0.16959417, 0.        , 0.        , 0.        ,
       0.        ])

In [None]:

# Define a phylogeny, from a file containing a newick tree
my_tree = pyvolve.read_tree(file = "file_with_tree.tre")


# Below are three example customized parameter dictionaries. Note that each of these could have "fitness" rather than "state_freqs" as a key
codon_freqs = [0.02792, 0.01502, 0.01755, 0.01635, 0.02512, 0.01505, 0.01997, 0.00779, 0.02043, 0.01193, 0.01404, 0.01176, 0.01449, 0.01774, 0.00658, 0.02969, 0.02937, 0.00726, 0.01316, 0.00458, 0.02227, 0.00045, 0.00697, 0.00368, 0.01169, 0.01274, 0.01866, 0.0125, 0.00914, 0.00119, 0.02332, 0.02301, 0.00315, 0.02554, 0.02328, 0.01468, 0.02868, 0.02669, 0.00417, 0.01947, 0.0145, 0.01586, 0.02783, 0.01179, 0.006, 6e-05, 0.00549, 0.02555, 0.03147, 0.03111, 0.02524, 0.00276, 0.02051, 0.01129, 0.02267, 0.02258, 0.00012, 0.03009, 0.02104, 0.02865, 0.0283]
custom_mutation_sym = {"AC": 1.5, "AG": 2.5, "AT": 0.5, "CG": 0.8, "CT": 0.99, "GT": 1.56} # For MutSel models, if you provide only 1 pair for each mutation rate (e.g. only AC and not CA), then Pyvolve will make mutation rates symmetric
custom_mutation_asym = {"AC": 1.5, "CA": 0.8, "AG": 2.5, "GA": 1.2, "AT": 0.5, "TA": 1.1, "CG": 0.8, "GC": 0.9, "CT": 0.99, "TC": 2.3, "GT": 1.56, "TC": 2.56} 

# Customize mutation rates using symmetric mutation rates, and specify frequencies for the MutSel model
parameters1 = {"state_freqs": codon_freqs, "mu":custom_mutation_sym}

# Customize mutation rates using asymmetric mutation rates, and specify frequencies for the MutSel model
parameters2 = {"state_freqs": codon_freqs, "mu":custom_mutation_asym}

# Customize mutation rates using kappa, and specify frequencies for the MutSel model
parameters3 = {"state_freqs": codon_freqs, "kappa":4.25}

my_model = pyvolve.Model("mutsel", parameters3) # Any of the dictionaries shown above is acceptable!


# Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 codon positions
my_partition = pyvolve.Partition(models = my_model, size = 250)

# Evolve!
my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree)
my_evolver()