In [1]:
import set_working_directory

In [2]:
from cogent3 import available_distances

available_distances()

Abbreviation,Suitable for moltype
paralinear,"dna, rna, protein"
logdet,"dna, rna, protein"
jc69,"dna, rna"
tn93,"dna, rna"
hamming,"dna, rna, protein, text, bytes"
pdist,"dna, rna, protein, text, bytes"


In [3]:
from cogent3 import load_aligned_seqs

aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna")
dists = aln.distance_matrix(calc="tn93", show_progress=False)
dists

names,Chimpanzee,Galago,Gorilla,HowlerMon,Human,Orangutan,Rhesus
Chimpanzee,0.0,0.1921,0.0054,0.0704,0.0089,0.014,0.0396
Galago,0.1921,0.0,0.1923,0.2157,0.1965,0.1944,0.1962
Gorilla,0.0054,0.1923,0.0,0.07,0.0086,0.0137,0.0393
HowlerMon,0.0704,0.2157,0.07,0.0,0.0736,0.0719,0.0736
Human,0.0089,0.1965,0.0086,0.0736,0.0,0.0173,0.0423
Orangutan,0.014,0.1944,0.0137,0.0719,0.0173,0.0,0.0411
Rhesus,0.0396,0.1962,0.0393,0.0736,0.0423,0.0411,0.0


In [4]:
from cogent3 import get_distance_calculator, load_aligned_seqs

aln = load_aligned_seqs("data/primate_brca1.fasta")
dist_calc = get_distance_calculator("tn93", alignment=aln)
dist_calc

<cogent3.evolve.fast_distance.TN93Pair at 0x103774650>

In [5]:
dist_calc.run(show_progress=False)
dists = dist_calc.get_pairwise_distances()
dists

names,Chimpanzee,Galago,Gorilla,HowlerMon,Human,Orangutan,Rhesus
Chimpanzee,0.0,0.1921,0.0054,0.0704,0.0089,0.014,0.0396
Galago,0.1921,0.0,0.1923,0.2157,0.1965,0.1944,0.1962
Gorilla,0.0054,0.1923,0.0,0.07,0.0086,0.0137,0.0393
HowlerMon,0.0704,0.2157,0.07,0.0,0.0736,0.0719,0.0736
Human,0.0089,0.1965,0.0086,0.0736,0.0,0.0173,0.0423
Orangutan,0.014,0.1944,0.0137,0.0719,0.0173,0.0,0.0411
Rhesus,0.0396,0.1962,0.0393,0.0736,0.0423,0.0411,0.0


In [6]:
dist_calc.stderr

Seq1 \ Seq2,Galago,HowlerMon,Rhesus,Orangutan,Gorilla,Human,Chimpanzee
Galago,0.0,0.0102748270583958,0.0096163078326485,0.0095356465322767,0.0094913822495401,0.0096150330918649,0.0094692680265901
HowlerMon,0.0102748270583958,0.0,0.0054118117125547,0.0053348584951611,0.0052656124746942,0.0054067602387489,0.0052735726201838
Rhesus,0.0096163078326485,0.0054118117125547,0.0,0.0039408549417865,0.003852798161903,0.0040050459201001,0.0038665597157698
Orangutan,0.0095356465322767,0.0053348584951611,0.0039408549417865,0.0,0.0022291124743011,0.0025151838791803,0.0022606571679022
Gorilla,0.0094913822495401,0.0052656124746942,0.003852798161903,0.0022291124743011,0.0,0.0017596919902326,0.0013848543487237
Human,0.0096150330918649,0.0054067602387489,0.0040050459201001,0.0025151838791803,0.0017596919902326,0.0,0.0017949285088691
Chimpanzee,0.0094692680265901,0.0052735726201838,0.0038665597157698,0.0022606571679022,0.0013848543487237,0.0017949285088691,0.0


In [7]:
from cogent3 import get_model, load_aligned_seqs
from cogent3.evolve import distance

aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna")
d = distance.EstimateDistances(aln, submodel=get_model("F81"))
d.run(show_progress=False)
dists = d.get_pairwise_distances()
dists

names,Chimpanzee,Galago,Gorilla,HowlerMon,Human,Orangutan,Rhesus
Chimpanzee,0.0,0.1892,0.0054,0.0697,0.0089,0.014,0.0395
Galago,0.1892,0.0,0.1891,0.2112,0.1934,0.1915,0.193
Gorilla,0.0054,0.1891,0.0,0.0693,0.0086,0.0136,0.0391
HowlerMon,0.0697,0.2112,0.0693,0.0,0.0729,0.0713,0.0729
Human,0.0089,0.1934,0.0086,0.0729,0.0,0.0173,0.0421
Orangutan,0.014,0.1915,0.0136,0.0713,0.0173,0.0,0.041
Rhesus,0.0395,0.193,0.0391,0.0729,0.0421,0.041,0.0


In [8]:
from cogent3 import load_aligned_seqs

aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna")
dists = aln.distance_matrix(calc="tn93", show_progress=False)
dists.max_pair()

('Galago', 'HowlerMon')

In [9]:
dists[dists.max_pair()]

0.2156879978632928

In [10]:
from cogent3 import load_aligned_seqs

aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna")
dists = aln.distance_matrix(calc="tn93", show_progress=False)
dists.min_pair()

('Chimpanzee', 'Gorilla')

In [11]:
dists[dists.min_pair()]

0.005354100636467117