## Genome Generation
Given a number of chromosome pairs and genes, randomly generates chromosomes with genes distributed at random indices throughout.

In [1]:
# import packages
import random
import string
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import numpy as np
import networkx as nx
from IPython.display import display
# import functions
from functions import *
from genome_generator import *
from meiosis_simu import *

In [2]:
genome=genomecreate(3, 20, 407)
print_genome(genome)


=== Chr1 | Length: 1220 bp ===
Gene  Position ChrA_Before ChrB_Before
   C       234           C           c
   P       347           P           p
   S       485           S           s
   D       624           D           d
   H       715           H           h
   E       998           E           e
   F      1189           F           f

=== Chr2 | Length: 1269 bp ===
Gene  Position ChrA_Before ChrB_Before
   A        99           A           a
   T       516           T           t
   Q       891           Q           q
   B       975           B           b
   G       991           G           g
   N      1228           N           n

=== Chr3 | Length: 1190 bp ===
Gene  Position ChrA_Before ChrB_Before
   I       272           I           i
   K       347           K           k
   O       355           O           o
   L       440           L           l
   M       510           M           m
   R       819           R           r
   J       918           J           j


In [3]:
genotypes=[]

for _ in range(2000):
    g1 = meiosis(genome)
    genotype = fertilization(g1,"a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t")
    genotypes.append(genotype)
    df_genotypes = pd.DataFrame(genotypes, columns=["Genotype"])

print(df_genotypes.head())

                                            Genotype
0  aa/bb/cc/dd/ee/ff/gg/hh/Ii/Jj/Kk/Ll/Mm/nn/Oo/p...
1  aa/bb/Cc/Dd/Ee/Ff/gg/Hh/Ii/jj/kk/ll/mm/Nn/oo/P...
2  Aa/bb/Cc/Dd/Ee/Ff/gg/Hh/Ii/jj/Kk/Ll/Mm/nn/Oo/P...
3  aa/Bb/cc/dd/ee/ff/Gg/hh/ii/Jj/kk/ll/mm/Nn/oo/p...
4  Aa/Bb/cc/dd/ee/ff/Gg/hh/Ii/jj/Kk/Ll/Mm/Nn/Oo/p...


In [4]:
genotypes2=[]

for _ in range(1000):
    g1 = meiosis(genome)
    g2=meiosis(genome)
    genotype = fertilization(g1,g2)
    genotypes2.append(genotype)
    df_genotypes2 = pd.DataFrame(genotypes2, columns=["Genotype"])

In [5]:
recomb_matrix = 100*compute_recombination_matrix(df_genotypes)
recomb_matrix = recomb_matrix.astype(float)
print(recomb_matrix.values.dtype) 
display(recomb_matrix.round(3))

float64


Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T
A,0.0,36.2,52.25,51.4,51.3,50.2,36.75,51.6,49.9,50.95,50.65,50.95,51.05,45.85,50.75,51.45,32.45,51.15,51.05,18.0
B,36.2,0.0,50.85,50.9,51.6,51.9,0.55,51.1,48.8,48.95,48.85,48.55,49.05,9.65,48.95,51.35,3.75,48.95,51.55,18.2
C,52.25,50.85,0.0,16.45,31.75,39.25,51.0,20.35,50.25,51.4,50.3,50.3,49.9,50.2,50.1,4.2,50.9,50.9,9.7,51.65
D,51.4,50.9,16.45,0.0,15.3,22.8,50.95,3.9,48.6,50.15,48.75,48.95,48.15,50.05,48.75,12.25,50.95,49.65,6.75,51.1
E,51.3,51.6,31.75,15.3,0.0,7.5,51.65,11.4,49.5,50.55,49.85,50.45,49.55,50.55,49.95,27.55,51.25,50.35,22.05,51.1
F,50.2,51.9,39.25,22.8,7.5,0.0,51.95,18.9,48.6,49.15,48.85,49.25,48.65,51.15,48.85,35.05,51.55,48.95,29.55,50.9
G,36.75,0.55,51.0,50.95,51.65,51.95,0.0,51.05,48.35,48.6,48.4,48.1,48.6,9.1,48.5,51.5,4.3,48.5,51.7,18.75
H,51.6,51.1,20.35,3.9,11.4,18.9,51.05,0.0,49.4,50.75,49.55,49.95,48.95,50.25,49.55,16.15,50.95,50.45,10.65,51.8
I,49.9,48.8,50.25,48.6,49.5,48.6,48.35,49.4,0.0,27.45,3.45,6.95,10.25,48.55,3.75,49.55,48.45,23.25,48.95,48.9
J,50.95,48.95,51.4,50.15,50.55,49.15,48.6,50.75,27.45,0.0,24.0,20.5,17.2,48.2,23.7,51.5,48.8,4.2,51.5,50.55


### 

##### This will display the distance matrix of genes, grouped by the chromosomes they are on:

In [6]:
groups = group_loci_by_linkage(recomb_matrix)
chrom_matrices = get_chromosome_matrices(recomb_matrix, groups)
display_chromosome_matrices(chrom_matrices)

Linking A-B with r=36.200
Linking A-G with r=36.750
Linking A-N with r=45.850
Linking A-Q with r=32.450
Linking A-T with r=18.000
Linking B-G with r=0.550
Linking B-N with r=9.650
Linking B-Q with r=3.750
Linking B-T with r=18.200
Linking C-D with r=16.450
Linking C-E with r=31.750
Linking C-F with r=39.250
Linking C-H with r=20.350
Linking C-P with r=4.200
Linking C-S with r=9.700
Linking D-E with r=15.300
Linking D-F with r=22.800
Linking D-H with r=3.900
Linking D-P with r=12.250
Linking D-S with r=6.750
Linking E-F with r=7.500
Linking E-H with r=11.400
Linking E-P with r=27.550
Linking E-S with r=22.050
Linking F-H with r=18.900
Linking F-P with r=35.050
Linking F-S with r=29.550
Linking G-N with r=9.100
Linking G-Q with r=4.300
Linking G-T with r=18.750
Linking H-P with r=16.150
Linking H-S with r=10.650
Linking I-J with r=27.450
Linking I-K with r=3.450
Linking I-L with r=6.950
Linking I-M with r=10.250
Linking I-O with r=3.750
Linking I-R with r=23.250
Linking J-K with r=24.000

Unnamed: 0,A,B,G,N,Q,T
A,0.0,36.2,36.8,45.8,32.4,18.0
B,36.2,0.0,0.5,9.6,3.8,18.2
G,36.8,0.5,0.0,9.1,4.3,18.8
N,45.8,9.6,9.1,0.0,13.4,27.8
Q,32.4,3.8,4.3,13.4,0.0,14.4
T,18.0,18.2,18.8,27.8,14.4,0.0

Unnamed: 0,C,D,E,F,H,P,S
C,0.0,16.4,31.8,39.2,20.3,4.2,9.7
D,16.4,0.0,15.3,22.8,3.9,12.2,6.8
E,31.8,15.3,0.0,7.5,11.4,27.6,22.0
F,39.2,22.8,7.5,0.0,18.9,35.0,29.6
H,20.3,3.9,11.4,18.9,0.0,16.2,10.6
P,4.2,12.2,27.6,35.0,16.2,0.0,5.5
S,9.7,6.8,22.0,29.6,10.6,5.5,0.0

Unnamed: 0,I,J,K,L,M,O,R
I,0.0,27.4,3.4,7.0,10.2,3.8,23.2
J,27.4,0.0,24.0,20.5,17.2,23.7,4.2
K,3.4,24.0,0.0,3.5,6.8,0.3,19.8
L,7.0,20.5,3.5,0.0,3.3,3.2,16.3
M,10.2,17.2,6.8,3.3,0.0,6.5,13.0
O,3.8,23.7,0.3,3.2,6.5,0.0,19.5
R,23.2,4.2,19.8,16.3,13.0,19.5,0.0


In [7]:
map_genes(chrom_matrices["Chr1"])
map_genes(chrom_matrices["Chr2"])
map_genes(chrom_matrices["Chr3"])

Mapped positions:
A: 2.15 cM
B: 38.35 cM
G: 38.85 cM
N: 47.95 cM
Q: 34.55 cM
T: 20.15 cM

Total absolute error: 0.2
Mapped positions:
C: 41.66 cM
D: 25.26 cM
E: 9.96 cM
F: 2.46 cM
H: 21.36 cM
P: 37.46 cM
S: 31.96 cM

Total absolute error: 0.5
Mapped positions:
I: 13.77 cM
J: 41.17 cM
K: 17.17 cM
L: 20.67 cM
M: 23.97 cM
O: 17.47 cM
R: 36.97 cM

Total absolute error: 0.2


array([13.76807083, 41.16807083, 17.16807083, 20.66807083, 23.96807083,
       17.46807083, 36.96807083])