In [1]:
## Bring in needed mods
import pandas as pd, numpy as np, glob, sys, os, seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

## Load in QTL functions
sys.path.insert(0, '/home/croth/QTL-functions/SCRIPTS/')
import qtlfunctions as qtlfun

In [2]:
## Set paths
## Phenotype path
phenopath = '../PHENOTYPE/PROCESSED/Cellsize_shape_phenotype.csv'

## Centromere path
centpaths = '/home/croth/Downloads/B3502/DATA/FungiDB-48_CneoformansJEC21_centromeres.csv'

## GFF path
gffpath = '/home/croth/Downloads/B3502/REF/FungiDB-48_CneoformansJEC21.gff.gz'

## path to info dataframes
infopath = '../GENOTYPE/INFO/*.%s.B3502.progeny_info_cols.csv.gz'

## path to genotype dataframes
genopath = '../GENOTYPE/GT/*.%s.B3502.progeny_genotypes.csv.gz'

## path to depth dataframes
deptpath = '../GENOTYPE/DP/*.%s.B3502.progeny_depths.csv.gz'

## path to allelic dataframes
allepath = '../GENOTYPE/AF/*.%s.B3502.progeny_allele_ratios.csv.gz'

In [3]:
## Bring in centromers, gff, and phenotype data
## Load in centormere locs
centlocs = pd.read_csv(centpaths,index_col=0)

## Load in gff path
gff = qtlfun.loadgff(gffpath)

## Load in phenotype data
pheno = pd.read_csv(phenopath)

In [4]:
## View head
pheno.head()

Unnamed: 0,Strain,Basidium,Phenotype_Short,Set,Phenotype_Description,Binary,Trinary,Quadrary,Pentary
0,A_01,1.0,N,0,Yeast,0,0,0,0
1,A_02,1.0,Y,0,Filamentous,1,2,2,2
2,A_03,1.0,N,0,Yeast,0,0,0,0
3,A_04,1.0,L,0,Giant Cells,1,1,1,1
4,A_05,2.0,N (with rare spots of T),0,Non-filamentous with rare hyper-filamentous,0,0,3,4


In [5]:
## Set chromosome
chromn = np.arange(1,15)

## Load in genotype dataframe
genotype = qtlfun.loadvariants(infopath,genopath,chromn)

## Load in depth dataframe
depth = qtlfun.loadvariants(infopath,deptpath,chromn)

## Load in allelic dataframe
allelr = qtlfun.loadvariants(infopath,allepath,chromn)

In [6]:
## View head
genotype.head().T.head().T

Unnamed: 0,Seqid,Pos,Qual,Nalleles,Alleles
1,AE017341.1,21,0.0,1,TTCTC.CTCTG
25,AE017341.1,317,3.09223e-09,2,CAGT.TAT.TAGT
26,AE017341.1,331,317.714,1,ACAATCCG.ACAAGCCA
31,AE017341.1,393,3032.55,1,A.G
34,AE017341.1,435,2502.52,1,G.C


In [7]:
## Gather sample names and print length
samples = genotype.columns[genotype.columns.tolist().index('MAF')+1:].tolist()
print(len(samples))

92


In [8]:
## Gather progeny
progeny = sorted([s for s in samples if len(s)<=4])

## Gather ancestors
ancestors = sorted([s for s in samples if len(s)>4])

## Check work
assert len(progeny) + len(ancestors) == len(samples)

## Gather B3502 stocks
b3502 = [s for s in ancestors if s.split("_")[0]=='B3502']

## parents and progenitor strains
progenitors = [s for s in ancestors if s not in b3502]

In [9]:
## Set chrommap
chrommap = qtlfun.chrommap(genotype,chrom='Seqid')

## Add seqid
centlocs['Seqid'] = chrommap.Seqid

## Merge centlocs
chrommap = chrommap.merge(centlocs)

## View head 
chrommap.tail()

Unnamed: 0,Seqid,Length,Cumlen,Midpts,Chromosome,Left,Right
9,AE017350.1,1085720,14488368,15031228.0,10,807373,880180
10,AE017351.1,1019842,15574088,16084009.0,11,144778,180133
11,AE017352.1,906695,16593930,17047277.5,12,146899,171065
12,AE017353.1,787977,17500625,17894613.5,13,121986,179214
13,AE017356.1,762694,18288602,18669949.0,14,566417,639012


In [10]:
## Merge chrommap
genotype = genotype.merge(chrommap)

In [11]:
## Remove variants that fail our analysis
## Filter on coverate
cov_x = 10
depth_drop_ix = depth[(depth[samples].T.min() < cov_x)].index

## Filter on callrate > 100%
callrate = 1
callrate_drop_ix = genotype[(genotype.Callrate<callrate)].index

## Filter on allelic read depth
allele_ratio = .85
alleldepth_drop_ix = allelr[(allelr[samples].T.min() < allele_ratio)].index

## Drop invariant index
## These are sites where the JEC21 sample
## Is differnet from the referenance
## using the progeny
invariant_ix_jec21_1_drop = genotype[(genotype[progeny].T.var() == 0)].index

## Remove variants 1kb near the centromere
kb = 1000
## Initlize list
centixs = []

## For each variants
for i,c in genotype.groupby('Chromosome'):
    
    ## Find centromere variants and append
    centix = c[(c.Pos>=c.Left.min()-kb) & 
               (c.Pos<=c.Right.max()+kb)].index.tolist() 
    centixs.append(centix)
    
## Concatonate centromere index
centdropix = np.concatenate(centixs)

## Print number of index to drop
len(depth_drop_ix),len(callrate_drop_ix),len(alleldepth_drop_ix),len(centdropix),len(invariant_ix_jec21_1_drop)

(5457, 2262, 5796, 5485, 109843)

In [12]:
## Concatinate index to drop variants
drop_ix = np.unique(np.concatenate(
          [depth_drop_ix,
           callrate_drop_ix,
           alleldepth_drop_ix,
           centdropix,
           invariant_ix_jec21_1_drop
          ]))

## Print length
len(drop_ix)

111662

In [13]:
## Take these filtered variants
variants = genotype.drop(drop_ix,axis=0)
variants.shape

(2561, 108)

In [16]:
## Save the vairants
variants.to_csv('../GENOTYPE/FILTERED/B3502_F1_variants.csv.gz',index=False)

In [15]:
## Break before older code
assert 1 == 0

AssertionError: 

In [None]:
samples = sorted([s for s in geno.columns[:-2]])
len(samples)

In [None]:
progeny = [s for s in samples if len(s) <=4]
len(progeny)

In [None]:
## Bring in information rows x columns of vcf files
data = '../GENOTYPE/INFO/*.csv.gz'
infopaths = sorted(glob.glob(data))
assert len(infopaths) == 14
info = makedf(infopaths)
info.shape

In [None]:
## Bring in read depth rows x columns of vcf files
data = '../GENOTYPE/DP/*depths.csv.gz'
depthpaths = sorted(glob.glob(data))
assert len(depthpaths) == 14
depth = makedf(depthpaths)
depth.shape

In [None]:
info['Meandepth'] = depth[progeny].T.mean()

In [None]:
old = pd.read_csv('../GENOTYPE/B3502_progeny_genotypes.csv.gz',index_col=0)
old.shape

In [None]:
old.Qual.min()

In [None]:
old

In [None]:
info[(info.Seqid == 'AE017356.1') & (info.Pos==388777)]

In [None]:
filtix = info[(geno[progeny].T.var()!=0) & (info.Callrate==1) & (info.Qual>=100) & (info.Meandepth>=8)].index.tolist()

In [None]:
filtgeno = pd.concat([info.loc[filtix],geno.loc[filtix,samples]],axis=1)

In [None]:
filtgeno.to_csv('../GENOTYPE/B3502_additional_progeny_genotypes.csv.gz')

In [None]:
import seaborn as sns

In [None]:
sns.lmplot(x='Meandepth',y='Qual',hue='Type',data=info,ci=False,fit_reg=False);

In [None]:
info[(info.Qual>=100)].shape

In [None]:
temp = info[(geno[progeny].T.var()!=0) & (info.Callrate==1)]
sns.jointplot(data=temp,x='Meandepth',y='Qual',hue='Type');

In [None]:
plt.hist(info.Qual,log=True);

In [None]:
info[(info.Qual>10)]

In [None]:
geno['AF'] = geno[samples].T.mean()

In [None]:
geno[samples].T[0]

In [None]:
## Make a list of the stocks
stocks = ['B3502_%s'%s for s in pheno.Parent.dropna().unique()]
stocks

In [None]:
## Find index where B1 genotype != the others
b1nota1_ix = (geno[stocks[1]] != geno[stocks[0]])
b1notb7_ix = (geno[stocks[1]] != geno[stocks[-1]])

## Drop na
stock_dif_ix = geno[b1nota1_ix & 
                    b1notb7_ix][stocks].dropna().index
len(stock_dif_ix)

In [None]:
## Calculate mean, min and max depths of stock dif vars
mean_stock_depth = depth.loc[stock_dif_ix,stocks].T.mean()
min_stock_depth = depth.loc[stock_dif_ix,stocks].T.min()
max_stock_depth = depth.loc[stock_dif_ix,stocks].T.max()

In [None]:
## Bring in allelic read depth ratios of
## rows x columns of vcf files
data = '../GENOTYPE/AF/*ratios.csv.gz'
alleledepth = sorted(glob.glob(data))
assert len(alleledepth) == 14
ratio = makedf(alleledepth)
ratio.shape

In [None]:
## Calculate mean, min and max of ratios of stock dif vars
mean_stock_ratio = ratio.loc[stock_dif_ix,stocks].T.mean()
min_stock_ratio = ratio.loc[stock_dif_ix,stocks].T.min()
max_stock_ratio = ratio.loc[stock_dif_ix,stocks].T.max()

In [None]:
## Make a dataframe of potential stock variants
## Take info
tempdf = info.loc[stock_dif_ix].copy()

## stock genotypes 
dif_genos = geno.loc[stock_dif_ix,stocks].copy()

## concatonate
df = pd.concat([tempdf,dif_genos],axis=1)

## Add depth and ratio info
labels = ['min','max','mean']

for i,j in enumerate([min_stock_depth,
                      max_stock_depth,
                      mean_stock_depth]):
    
    df['%s_depth'%labels[i]] = np.round(j,2)
    
for i,j in enumerate([min_stock_ratio,
                      max_stock_ratio,
                      mean_stock_ratio]):
    
    df['%s_ratio'%labels[i]] = np.round(j,2)
    
## Drop un-needed columns    
to_drop = ['Maxlen','Minlen',
           'Callrate','Qual','Type','Oldix',
           'max_ratio','max_depth']

df.drop(to_drop,axis=1,inplace=True)

## View head
df.head()

In [None]:
## Check our work
assert np.sum(stock_dif_ix-df.index.values) == 0

In [None]:
## Examine as a ftn of paramaters
## The number of vairants in B3501_B1 we detect
drf = 0.1 ## Slide of allelic read reatio
ratio_fs = np.arange(0,1+drf,drf)
covx_fs = np.arange(0,12+1,1)

## Gather results per conditions
rf_pass = []
for ratio_f in ratio_fs:
    
    covx_pass = []
    for covx_f in covx_fs:
        
        ## Make sure the B3502_B1 stock passes 
        b1_depth_pass = depth[(depth[stocks[1]] >= covx_f) & 
                      (depth.index.isin(stock_dif_ix))].index

        b1_ratio_pass = ratio[(ratio[stocks[1]] >=ratio_f) & 
                      (ratio.index.isin(stock_dif_ix))].index
        
        rp = df[(df.mean_depth>=covx_f) & 
           (df.mean_ratio>=ratio_f) & 
           (df.index.isin(b1_depth_pass)) & 
           (df.index.isin(b1_ratio_pass))].shape[0]
        
        covx_pass.append(rp)
    rf_pass.append(covx_pass)
    
## Make into a dataframe
test = pd.DataFrame(rf_pass)
test.index = np.round(ratio_fs,2)

In [None]:
## Plot results
fig,ax = plt.subplots(1,1,figsize=(15,10))
sns.heatmap(test,cmap='cividis',annot=True, 
            cbar_kws={'label': 'Number of Variants'});
plt.yticks(fontsize=14,rotation=0);
plt.xticks(fontsize=14);
plt.ylabel('Allelic Read Ratio',fontsize=14)
plt.xlabel('Read Depth',fontsize=14);

In [None]:
## Filter potential snps
ratio_f = 0.5
covx_f = 10

## Make sure the B3502_B1 stock passes 
b1_depth_pass = depth[(depth[stocks[1]] >= covx_f) & 
                      (depth.index.isin(stock_dif_ix))].index

b1_ratio_pass = ratio[(ratio[stocks[1]] >=ratio_f) & 
                      (ratio.index.isin(stock_dif_ix))].index

## take variants
vardf = df[(df.mean_depth>=covx_f) & 
           (df.mean_ratio>=ratio_f) & 
           (df.index.isin(b1_depth_pass)) & 
           (df.index.isin(b1_ratio_pass))].copy()

In [None]:
## Find genes that overlap with our candidate variants
gene_res = []

for s in vardf.Seqid.unique():
    vt = vardf[(vardf.Seqid==s)]
    gfft = gff[(gff.Type=='gene') & (gff.Seqid==s)]
    
    for i,p in vt.iterrows():
        pt = p.Pos
        
        for j,k in gfft.iterrows():    
            gps = np.arange(k.Start,k.End+1)
            
            if pt in gps:
                gene_res.append((i,k.Gene))
            else:
                pass
            
hitdf = pd.DataFrame(gene_res,columns=['gix','Gene'])
hitdf.index = hitdf.gix
vardf['Gene'] = hitdf['Gene']
vardf.shape

In [None]:
## Save out data
vardf[['Seqid','Chrom','Pos','Gene','Alleles']+stocks
     ].to_csv('../GENOTYPE/B3502_parent_variants.csv.gz')

In [None]:
## View dataframe results 
vardf[['Chrom','Pos','Gene','Alleles']].tail()

In [None]:
## Gather B3502_progeny who are self filamentous
b1_filamentation = pheno[(pheno.Parent=='B1') & 
                         (pheno.Phenotype=='Y')
                        ].Strain.tolist()

## How many are there of these?
len(b1_filamentation)

In [None]:
## Sum the progeny genotypes
## We are looking for sites not equal to B3502 (ie 1)
## So sites with a sum of zero here in the four
## self-filamentous progeny are candidate repressor sites
vardf['B1_SF_progeny_sum'] = geno.loc[vardf.index,b1_filamentation].T.sum()
vardf[(vardf.B1_SF_progeny_sum==0)][['Chrom','Pos','Gene','Alleles']]

In [None]:
## Gather B3502_progeny who are non-yeast growth
b1_non_yeast= pheno[(pheno.Parent=='B1') & 
                         (pheno.Phenotype!='N')
                        ].Strain.tolist()

## How many are there of these?
len(b1_non_yeast)

In [None]:
## Sum the progeny genotypes
## We are looking for sites not equal to B3502_B1 (ie 1)
## So sites with a sum of zero here in the four
## non-yeast progeny are candidate repressor sites
vardf['B1_NY_progeny_sum'] = geno.loc[vardf.index,b1_non_yeast].T.sum()
vardf[(vardf.B1_NY_progeny_sum==0)][['Chrom','Pos','Gene','Alleles']]

In [None]:
## Gather all progeny that show filamentous growth
## regardlest of parent
filam_prog = pheno[(pheno.Phenotype=='Y')
                    ].Strain.tolist()

## How many are there of these?
len(filam_prog)

In [None]:
## Sum the progeny genotypes
## We are looking for sites not equal to B3502_B1 (ie 1)
## So sites with a sum of zero here in all of the
## filamentous progeny are candidate repressor sites
vardf['Progeny_sum'] = geno.loc[vardf.index,filam_prog].T.sum()
vardf[(vardf.Progeny_sum==0)][['Chrom','Pos','Gene','Alleles']]

In [None]:
## Analysis of clones
## Here we are searching for valid variants that seperate the clones

In [None]:
## Take a look at the first set of clones
c1 = 'A27'
c2 = 'A24'

clones1 = [c1,c2]

## Find candidate genomic sites that are 
## different between the clones
clone1_dif_ix = geno[(geno[c1] != geno[c2])][clones1].dropna().index

## Filter on covX
clone_covx_f = 8
clone1_depth_ix = depth[(depth[clones1].T.min() > clone_covx_f) & 
                        (depth.index.isin(clone1_dif_ix))].index

## Filter ratios
clone_ratio_f = .6
clone1_ratio_ix = ratio[(ratio[clones1].T.min() >= clone_ratio_f) & 
                        (ratio.index.isin(clone1_depth_ix))][clones1].index

pheno[(pheno.Strain.isin(clones1))]

In [None]:
## What are the genotypes at these sites in 
## these clones and their parent strain?
geno.loc[clone1_ratio_ix,clones1+['B3502_B1']]

In [None]:
## Check out the info on these
info.loc[clone1_ratio_ix]

In [None]:
## Take a look at the first set of clones
c3 = 'A44'
c4 = 'A43'

clones2 = [c3,c4]

## Find candidate genomic sites that are 
## different between the clones
clone2_dif_ix = geno[(geno[c3] != geno[c4])][clones2].dropna().index

## Filter on covX
clone_covx_f = 8
clone2_depth_ix = depth[(depth[clones2].T.min() > clone_covx_f) & 
                        (depth.index.isin(clone2_dif_ix))].index

## Filter ratios
clone_ratio_f = .6
clone2_ratio_ix = ratio[(ratio[clones2].T.min() >= clone_ratio_f) & 
                        (ratio.index.isin(clone2_depth_ix))].index

pheno[(pheno.Strain.isin(clones2))]

In [None]:
## What are the genotypes at these sites in 
## these clones and their parent strain?
geno.loc[clone2_ratio_ix,clones2+['B3502_B7']]

In [None]:
geno.loc[clone1_ratio_ix,clones2]

In [None]:
clones = clones1+clones2
clones

In [None]:
## Find genotypes with similar patters of 
## inheritance across clones
clones_dif_ix = geno[(geno[c1]!=geno[c2]) & 
                     (geno[c3]!=geno[c4]) & 
                     (geno[c1]==geno[c3]) & 
                     (geno[c2]==geno[c4])
                    ][clones].dropna().index

In [None]:
ratio.loc[clones_dif_ix,clones+['B3502_B1','B3502_B7']]

In [None]:
depth.loc[clones_dif_ix,clones+['B3502_B1','B3502_B7']]

In [None]:
geno.loc[clones_dif_ix,clones+['B3502_B1','B3502_B7']]

In [None]:





## Load in chromosome map
mappath = '/home/croth/Downloads/B3502/DATA/chrommap.csv.gz'
chrommap = pd.read_csv(mappath)
chrommap['Seqid'] = chrommap.Contig
chrommap.drop('Contig',axis=1,inplace=True)



## Merge dataframes
chrommap['Chromosome'] = chrommap.index+1
chrommap = chrommap.merge(centlocs)


gff = gff.merge(chrommap)

#stock_variants = pd.read_csv('/home/croth/Downloads/B3502/DATA/B3502_stocks_variants.csv',
#                            index_col=0)
#stock_variants.head()

## Load in gene changes analysis 
## This was done on stocks of B3502, CF830, and JEC21
gene_changes = pd.read_csv('../GENES/gene_changes.csv.gz')



In [None]:
pheno[(pheno.Parent=='B1')].shape

In [None]:
pheno[(pheno.Parent=='B1')]

In [None]:
## Gather samples view last 5
samples = sorted(geno.columns.tolist()[:-2])
len(samples), samples[-5:]

In [None]:
## Set parents used in sequenced cross A1, B1, B7
parents = ['B3502_%s'%a for a in ['A1','B1','B7']] + ['CF830','JEC21'] 
parents

In [None]:
## concatonate info with geno
seggeno = geno.loc[:,~geno.columns.isin(['Chrom','Oldix'])]
genodf = pd.concat([info,seggeno],axis=1)
genodf.shape

In [None]:
## Find sites that are invariant across population.
## IE gvs that are all JEC21 (reference genotype)
parent_invar_0_ix = geno[(geno[parents].T.sum() == 0) | 
                         (geno[parents].T.var() == 0)].index.tolist()
len(parent_invar_0_ix)

In [None]:
## Gather the variant sites
vargeno_temp = genodf[~(genodf.index.isin(parent_invar_0_ix))]
vargeno_temp.shape

In [None]:
vartest_ix = vargeno_temp[(vargeno_temp[parents].T.var()>0)
                          ][parents].dropna().index

In [None]:
non_sf_parent = 'B3502_B1'
other_parents = [p for p in parents ]

## Gather indcies where at least one of the 
## B3502 stock parents is different from CF830
var_ix = []
for p in parents:
    
    pgeno = vargeno_temp[[p,parents[-1]]].diff(axis=1).dropna(axis=0,how='all')
    dif_ix = pgeno[(pgeno[parents[-1]] != 0)].index
    var_ix.append(dif_ix)
    
var_ix = np.unique(np.concatenate(var_ix))

## Gather this variant genotype info check shape
vargeno = vargeno_temp.loc[var_ix].dropna().copy()
vargeno.shape

In [None]:
vargeno = vargeno_temp.loc[vartest_ix,:].copy()
vargeno.shape

In [None]:
missing = [i for i in vargeno_temp.index if i not in vargeno.index]
len(missing)

In [None]:
## Make parent and stock variant dataframes
infocols = ['Seqid','Chrom','Oldix','Pos','Qual','Nallele','Alleles','Type']

parents_gvs = vargeno[parents+['JEC21']+infocols].T.drop_duplicates().T.copy()

parents_gvs['Avedepth'] = depth.loc[vargeno.index,parents+['JEC21']].T.mean()
parents_gvs['Mindepth'] = depth.loc[vargeno.index,parents+['JEC21']].T.min()
parents_gvs['Maxdepth'] = depth.loc[vargeno.index,parents+['JEC21']].T.max()

parents_gvs['Averatio'] = ratio.loc[vargeno.index,parents+['JEC21']].T.mean()
parents_gvs['Minratio'] = ratio.loc[vargeno.index,parents+['JEC21']].T.min()
parents_gvs['Maxratio'] = ratio.loc[vargeno.index,parents+['JEC21']].T.max()

parents_gvs.to_csv('../GENOTYPE/B3502_parent_variants.csv.gz',index=True)

In [None]:
casual = 1438418

In [None]:
candidate_ix = parents_gvs[(parents_gvs.Chrom==6) & 
                (parents_gvs.Pos.isin(
                np.arange(1437068, 1438867,1)))].index

In [None]:
b1_p = pheno[(pheno[pheno.columns[3]]=='Y')].Progeny.tolist()

b1_checks = b1_p+ ['B3502_B7','B3502_A1','B3502_B1']
len(b1_checks),len(b1_p)

In [None]:
genodf.loc[candidate_ix,b1_checks+['CF830','Pos']]

In [None]:
repressor_candidate = 'CNF04940'
rc_gff = gff[(gff.Gene==repressor_candidate)].copy()
rc_gff.head()

In [None]:
fig,ax = plt.subplots(1,1,figsize=(8,3))

plt.hlines(0,rc_gff.Start.min(),rc_gff.End.max(),color='grey',alpha=.7,linewidth=5);
plt.hlines(0,rc_gff[(rc_gff.Type=='CDS')].Start.min(),
           rc_gff[(rc_gff.Type=='CDS')].End.max(),color='grey',
           linewidth=10,alpha=0.87);

temp_pos = parents_gvs[(parents_gvs.Chrom==6) & 
                       (parents_gvs.Pos>1435000) & 
                       (parents_gvs.Pos<1500000)].Pos.values

plt.plot(temp_pos,np.zeros(len(temp_pos)),'k.');


In [None]:
 parents_gvs[(parents_gvs.Chrom==6) & 
                       (parents_gvs.Pos>1400000) & 
                       (parents_gvs.Pos<1500000)]

In [None]:
parents_gvs[(parents_gvs.Chrom==6) & 
                    (parents_gvs.Pos>143000) & 
                       (parents_gvs.Pos<144000)]

In [None]:
## Group by parent
byparent = [p.Progeny.values for a,p in pheno.groupby('Parent')]
[print(a) for a,p in pheno.groupby('Parent')];

In [None]:
## Calculate the CF830 allele frequencies 
## PER cross between CF830 and the B3502 stocks
cf_830 = [np.sum((vargeno[bp
            ].T.values  - vargeno['CF830'].values
                   ) == 0,axis=0)/len(bp) for bp in byparent]

## Add as columns to variant df
for i,p in enumerate(['A1','B1','B7']):
    vargeno['%s_af'%p] = cf_830[i]
    
vargeno.tail()

In [None]:
## Find variants that are invariant in the progeny
invar_geno = vargeno[(vargeno[pheno.Progeny.tolist()].T.var() == 0)]
invar_geno.shape

In [None]:
## Assert that we have colledct genotypes == JEC21
## We should have found these above
assert len([i for i in vargeno[(vargeno[(pheno.Progeny)].T.sum()==0)].index 
 if i not in invar_geno.index]) == 0

In [None]:
## Check where the invariance is in the other genotype B3502
vargeno[(np.sum(vargeno[pheno.Progeny] == 1,axis=1) == len(pheno.Progeny))]

In [None]:
## Remove invarnt sites
loci = vargeno[~(vargeno.index.isin(invar_geno.index))]
loci = loci.T.drop_duplicates().T

## SAve
loci.to_csv('../GENOTYPE/B3502_progeny_genotypes.csv.gz')
loci.shape

In [None]:
## Gather progeny from the B3502_B1 stock
## That are filamentous (Y for yes)
#b1_p = pheno[(pheno.Parent=='B1') & (pheno[pheno.columns[3]]=='Y')].Progeny.tolist()
b1_p = pheno[(pheno.Parent=='B1') & 
             (pheno[pheno.columns[3]]!='N')].Progeny.tolist()

b1_checks = b1_p+ ['B3502_B7','B3502_A1','B3502_B1']
len(b1_checks),len(b1_p)

In [None]:
b1_checks

In [None]:
## Find potential repressor sites in geno (full data non filtered)
k = np.min(np.abs(geno[b1_checks[:-1]].T.values - geno[b1_checks[-1]].values).T,axis=1)
#k = np.min(np.abs(geno[b1_checks_ny[:-1]].T.values - geno[b1_checks_ny[-1]].values).T,axis=1)
repres_ix = geno.index.values[(k!=0)]

## Check work
assert len(repres_ix) == np.unique(repres_ix).shape[0]

## Print number
len(repres_ix)

In [None]:
## Gather represors into a dataframe
repres_temp = geno.loc[repres_ix,b1_checks].dropna()
repres = pd.concat([repres_temp,info.loc[repres_temp.index]],axis=1)

repres['Ratio_ave'] = ratio.loc[repres.index,b1_checks].mean(axis=1)
repres['Depth_ave'] = depth.loc[repres.index,b1_checks].mean(axis=1)

repres = repres.T.drop_duplicates().T
repres.head()

In [None]:
## Gather genes on chromosomes in our represor set
gfftemp = gff[(gff.Type=='gene') & 
              (gff.Chromosome.isin(repres.Chrom))]


## If the repressor is in a gene
gene_check = []
for i,j in repres.iterrows():
    gp = gfftemp[(gfftemp.Chromosome==j.Chrom) & 
            (gfftemp.Start<=j.Pos) & (gfftemp.End>=j.Pos)]
    
    if gp.shape[0]>0:
    #    print(gp[['Gene','Start','End']])
    #    print(i,j.Pos,j.Chrom)
    #    print('\n')
        
        gene_check.append(gp.Gene.min())
    
gene_check = np.unique(gene_check)
len(gene_check)

In [None]:
gene_check

In [None]:
## Gather the parent data
parents_and_JEC21 = parents + ['JEC21']
pgeno = geno[parents_and_JEC21].copy().dropna()
pgeno['Ave_depth'] = depth[parents_and_JEC21].T.mean()
pgeno['Ave_ratio'] = ratio[parents_and_JEC21].T.mean()
pgeno['IsJEC21'] = pgeno[parents_and_JEC21].T.sum()
pgeno['Genovar'] = pgeno[parents_and_JEC21].T.var()

In [None]:
## Filter the parent data
pfilt = pgeno[(pgeno.Genovar!= 0) & 
              (pgeno.Ave_depth> 9) & 
              (pgeno.Ave_ratio > .8)]

In [None]:
pfin = pd.concat([info.loc[pfilt.index],pfilt],axis=1)
pfin.shape

In [None]:
pfin[(pfin.Chrom==1) & (pfin.Pos==1430787)]

In [None]:
geno.loc[27604,b1_checks]

In [None]:
pfin[(pfin.Chrom==3) & (pfin.Pos==153548)][parents_and_JEC21]

In [None]:
pfin[(pfin.Chrom==6) & (pfin.Pos>=1437085)]

In [None]:
parents_gvs[(parents_gvs.Chrom==6) & (parents_gvs.Pos>=1437085)]

In [None]:
pfin[(pfin.Chrom==10) & (pfin.Pos==323716)]

In [None]:
notin = [i for i in repres.index if i not in pfin.index]
notin[:5],len(notin)

In [None]:
len(repres)

In [None]:
## Find reprsor index in parent differnet index
repres_in_parents = [i for i in repres.index if i in pfin.index]
len(repres_in_parents)

In [None]:
pfin.loc[repres_in_parents][['Pos','Chrom']]

In [None]:
geno.loc[244989][['A44','A43','A27','A24']]

In [None]:
tempix = geno[(info.Pos>=1437068) & 
              (info.Pos<=1438867) & 
              (geno.Chrom==6)][b1_checks].index

In [None]:
candidate = geno.loc[tempix].copy()
candidate['Pos'] = info.loc[tempix,'Pos']
candidate['Var'] = candidate[b1_checks].T.var()
candidate[(candidate.Var!=0)]
toplot_can = candidate[(candidate.Var!=0)][b1_checks+['Pos']].copy().dropna()
toplot_can.index = toplot_can.Pos
toplot_can.drop(['Pos'],axis=1,inplace=True)
toplot_can

In [None]:
clones1 = geno[(geno['A27'] != geno['A24'])][['A27','A24']].copy().dropna()
clones1['Mdepth'] =depth.loc[clones1.index,['A27','A24']].T.min()
clones1['Mratio'] =ratio.loc[clones1.index,['A27','A24']].T.min()
clones1['Pos'] = info.loc[clones1.index,'Pos']
clones1['Chrom'] = info.loc[clones1.index,'Chrom']

In [None]:
geno.loc[clones1[(clones1.Mratio>0.6)].index,['A44','A43']]

In [None]:
info

In [None]:
clones2 = geno[(geno['A44'] != geno['A43'])][['A44','A43']].copy().dropna()
clones2['Mdepth'] =depth.loc[clones2.index,['A44','A43']].T.max()
clones2['Mratio'] =ratio.loc[clones2.index,['A44','A43']].T.max()
clones2['Pos'] = info.loc[clones2.index,'Pos']
clones2['Chrom'] = info.loc[clones2.index,'Chrom']
clones2['Alleles'] = info.loc[clones2.index,'Alleles']
clones2['Type'] = info.loc[clones2.index,'Type']

clones2[(clones2.Mratio>0.8)].head()

In [None]:
commonix = [i for i in clones1.index.tolist() if i in clones2.index.tolist()]

In [None]:
temp = pd.concat([clones1.loc[commonix,['A27','A24']],clones2.loc[commonix]],axis=1)

In [None]:
temp.shape

In [None]:
sf_progeny = pheno[(pheno[(pheno.columns[3])] =='Y')].Progeny.tolist() + ['B3502_A1','B3502_B7','B3502_B1','CF830']

In [None]:
other_repressor = []
for i,g in vargeno.iterrows():
    
    if (g['B3502_B1'] not in g[sf_progeny[:-2]].tolist()) and (g['B3502_B1']!=g['CF830']):
        other_repressor.append(i)
        
len(other_repressor)

In [None]:
loci.loc[other_repressor][sf_progeny+['Chrom','Pos','Alleles','Type']]

In [None]:
loci.loc[other_repressor][sf_progeny+['Chrom','Pos','Alleles','Type']]

In [None]:
info[(info.Chrom==6) & (info.Pos==1437914)]

In [None]:
import seaborn as sns

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,3))
sns.heatmap(toplot_can.T,cbar=False, linewidths=.5);
plt.xlabel('\nGenetiv Vairant Position',fontsize=12)
plt.ylabel('B3502 Stocks and F$_1$ Progeny\n',fontsize=12);
plt.savefig('../FIGURES/CNF04940_variants.png',dpi=200,bbox_inches='tight')

In [None]:
gff[(gff.Gene=='CNF04940')]

In [None]:
repres_in_parents

In [None]:
info.loc[repres_in_parents]

In [None]:
for i,j in repres.loc[repres_in_parents].iterrows():
    gp = gfftemp[(gfftemp.Chromosome==j.Chrom) & 
           

In [None]:
repres.loc[repres_in_parents]

In [None]:
#[i for i in pfin.index if i not in repres]

In [None]:
p = 1430787
c = 1
ix = info[(info.Pos==p) & (info.Chrom==c)].index


geno.loc[ix,b1_checks]

In [None]:
p = 153548
c = 3
ix = info[(info.Pos==p) & (info.Chrom==c)].index


geno.loc[ix,b1_checks]

In [None]:
p = [1437085,1437389,1438373]
c = 6
ix = info[(info.Pos.isin(p)) & (info.Chrom==c)].index


geno.loc[ix,b1_checks]

In [None]:
p = [323716]
c = 10
ix = info[(info.Pos.isin(p)) & (info.Chrom==c)].index


geno.loc[ix,b1_checks]

In [None]:
cnf04940varix= pfin[(pfin.Chrom==6) & (pfin.Pos>=1437068)].index

In [None]:
cnm00880 = pfin[(pfin.Chrom==13) & (pfin.Pos>= 256057) & (pfin.Pos<=258338)].index

In [None]:
info.loc[cnm00880]

In [None]:
cnm00880ix_temp = info[(info.Chrom==13) & (info.Pos>= 256057) & (info.Pos<=258338)].index.tolist()

In [None]:
cnm00880ix = repres[(repres.index.isin(cnm00880ix_temp))].index

In [None]:
y_progeny = pheno[(pheno[pheno.columns[3]]=='Y')].Progeny.values
check1 = sorted(np.unique(list(b1_checks) + list(y_progeny)))

c_progeny = pheno[(pheno[pheno.columns[3]].isin(['Y','L','T']))].Progeny.values
check2 = sorted(c_progeny)

T_progeny = pheno[(pheno[pheno.columns[3]].isin(['Y','T']))].Progeny.values
check3 = sorted(T_progeny)

In [None]:
g = 'CNB00860'
gse = gff[(gff.Gene==g) & (gff.Type=='gene')][['Start','End']].values[0]
gpos = np.arange(*gse)
gchrom = gff[(gff.Gene==g)]['Chromosome'].min()

gix = repres[(repres.Chrom==gchrom) & (repres.Pos.isin(gpos))].index
geno.loc[gix,check3]

In [None]:
geno.loc[gix,b1_checks]

In [None]:
g = 'CNE05380'
gse = gff[(gff.Gene==g) & (gff.Type=='gene')][['Start','End']].values[0]
gpos = np.arange(*gse)
gchrom = gff[(gff.Gene==g)]['Chromosome'].min()

gix = repres[(repres.Chrom==gchrom) & (repres.Pos.isin(gpos))].index
geno.loc[gix,check3]

In [None]:
geno.loc[gix,check1]

In [None]:
geno.loc[gix,check2]

In [None]:
np.unique(geno.loc[gix,check2].values[0])

In [None]:
geno.loc[gix,b1_checks]

In [None]:
gix in pfin.index.tolist()

In [None]:
pgeno.loc[gix]

In [None]:
g = 'CNF04940'
gse = gff[(gff.Gene==g) & (gff.Type=='gene')][['Start','End']].values[0]
gpos = np.arange(*gse)
gchrom = gff[(gff.Gene==g)]['Chromosome'].min()

gix = repres[(repres.Chrom==gchrom) & (repres.Pos.isin(gpos))].index
geno.loc[gix,check3]

In [None]:
geno.loc[gix,check1]

In [None]:
g = 'CNL04090'
gse = gff[(gff.Gene==g) & (gff.Type=='gene')][['Start','End']].values[0]
gpos = np.arange(*gse)
gchrom = gff[(gff.Gene==g)]['Chromosome'].min()

gix = repres[(repres.Chrom==gchrom) & (repres.Pos.isin(gpos))].index
geno.loc[gix,check3].values

In [None]:
g = 'CNM00880'
gse = gff[(gff.Gene==g) & (gff.Type=='gene')][['Start','End']].values[0]
gpos = np.arange(*gse)
gchrom = gff[(gff.Gene==g)]['Chromosome'].min()

gix = repres[(repres.Chrom==gchrom) & (repres.Pos.isin(gpos))].index
geno.loc[gix,check3].values

In [None]:
g = 'CNJ01150'
gse = gff[(gff.Gene==g) & (gff.Type=='gene')][['Start','End']].values[0]
gpos = np.arange(*gse)
gchrom = gff[(gff.Gene==g)]['Chromosome'].min()

gix = repres[(repres.Chrom==gchrom) & (repres.Pos.isin(gpos))].index
geno.loc[gix,check3].values

In [None]:
geno.loc[info[(info.Pos.isin(gpos)) & (info.Chrom==gchrom)].index,b1_checks]

In [None]:
geno.loc[info[(info.Pos==1430787)].index,b1_checks]

In [None]:
geno.loc[info[(info.Pos==153548) & (info.Chrom==3)].index,b1_checks+['B3502_A7']]

In [None]:

geno.loc[cnm00880ix,check1]

In [None]:
n_progeny = pheno[(pheno[pheno.columns[3]]=='N')].Progeny.values

check2 = np.unique(list(b1_checks) + list(n_progeny) + ['JEC21'])
geno.loc[cnm00880ix,check2]

In [None]:
y_progeny = pheno[(pheno[pheno.columns[3]]=='Y')].Progeny.values
check1 = np.unique(list(b1_checks) + list(y_progeny))
geno.loc[cnf04940varix,check1]

In [None]:
n_progeny = pheno[(pheno[pheno.columns[3]]=='N')].Progeny.values

check2 = np.unique(list(b1_checks) + list(n_progeny) + ['JEC21'])
geno.loc[cnf04940varix,check2]

In [None]:
b1_checks

In [None]:
geno.loc[cnf04940varix,]

In [None]:
#pfin.drop(237053,axis=0,inplace=True)

In [None]:
pfin.to_csv('../GENOTYPE/B3502_refilt_stock_variants.csv.gz')

In [None]:
assert 1 ==0

In [None]:
info['AF'] = (geno[samples] !=0).mean(axis=1)

In [None]:
info[(info.AF>0) & (info.AF<1)].shape[0]

In [None]:
qtlfun.manhattan(info.sort_values('Chrom'),
                 pval='AF',ylabel='Allele Freq.');

In [None]:
plt.plot(info.AF,np.log10((info.Qual/info.Depth)+1),'.');

In [None]:
depthpaths = sorted(glob.glob('../GENOTYPE/DP/*depths.csv.gz'))
len(depthpaths)

depth = makedf(depthpaths)

In [None]:
info['Mdepth'] = depth[samples].min(axis=1)
info['Meandepth'] = depth[samples].mean(axis=1)
info['Ndepth'] = np.mean(depth[samples]>=10,axis=1)

In [None]:
alleledepth = sorted(glob.glob('../GENOTYPE/AF/*ratios.csv.gz'))
len(alleledepth)

ratio = makedf(alleledepth)

In [None]:
info['Ratio'] = ratio[samples].min(axis=1)
info['Nratio'] = np.mean(ratio[samples]>.8,axis=1)

In [None]:
pheno = pd.read_excel('../PHENOTYPE/202011_WGS_B3502Progeny_for_QTL.xlsx')
pheno.head()

In [None]:
pheno

In [None]:
info[(info.Pos==153548) & (info.Chrom==3)]

In [None]:
geno.loc[73829][['Stock1','Stock3','Stock4','JEC21','CF830']]

In [None]:
geno.loc[73829][samples].sum()

In [None]:
info[(info.Pos==1894056)]

In [None]:
geno.loc[110255][['Stock1','Stock3','Stock4','CF830','JEC21']]

In [None]:
## Bring in phenotype data from Dr. Sheng Sun
phenopath = '../PHENOTYPE/202011_WGS_B3502Progeny_for_QTL.xlsx'
pheno = pd.read_excel(phenopath)
pheno['Strain'] = [''.join(a.split('_'))[:3] 
                   for a in pheno[pheno.columns[0]]]
pheno['Parent1'] = [ a.split('x')[0].split('B3502')[-1][:-1] 
                    for a in pheno['Cross_Progeny']]
pheno.index = pheno.Strain

pheno['RIC8'] = -1
pheno['RIC8'] = geno.loc[382952,pheno.index]
pheno['RHO'] = -1
pheno['RHO'] = geno.loc[110255,pheno.index]
#pheno.tail()

In [None]:
pheno[(pheno.Parent1=='B1') & 
      (pheno.RIC8==1) & (pheno.RHO==0)]

In [None]:
b1segs = pheno[(pheno.Parent1=='B1') & 
               (pheno.RIC8==1) 
              ].Strain.tolist()

In [None]:
kix = geno[(geno[b1segs+['Stock1','Stock4','JEC21','CF830']].T.sum()==0) & 
     (geno['Stock3']==1) 
    ].dropna().index.values

In [None]:
genodf = pd.concat([info,geno],axis=1)

In [None]:
genodf[(genodf.Pos==1437085)][['Stock1','Stock4','Stock3']+b1segs]

In [None]:
represors = info.loc[kix].sort_values('Chrom')
represors

In [None]:
gff = qtlfun.loadgff('/home/croth/Downloads/B3502/REF/FungiDB-48_CneoformansJEC21.gff.gz')

In [None]:
gene_check = []
for i, (j,c) in enumerate(represors.iterrows()):
    k = gff[(gff.Type=='gene') & (gff.Seqid==c.Seqid) & 
        (gff.Start<=c.Pos) & (gff.End>=c.Pos)].Gene
    #print(k)
    if len(k) > 0:
        gene_check.append(k.min())
gene_check = np.unique(gene_check)
gene_check

In [None]:
genech = pd.read_csv('/home/croth/Downloads/B3502/DATA/gene_changes.csv.gz')

In [None]:
genech[(genech.Gene=='CNF04940')]

In [None]:
genech[(genech.Gene=='CNM00880')]

In [None]:
gff[(gff.Gene.isin(gene_check)) & (gff.Type=='gene')]

In [None]:
genech[(genech.Gene.isin(gene_check))]

In [None]:
info.loc[(info.Pos== 388777) & (info.Chrom==14)]

In [None]:
info[(info.Chrom==14) & (info.Pos>=388700)]

In [None]:
b1segs = pheno[(pheno.Parent1=='B1')].Strain.tolist()

In [None]:
b1segs

In [None]:
pheno[(pheno.Parent1=='B1')]

In [None]:
geno[(geno.Stock3>=1) & 
     (geno[['Stock1','Stock4']].T.sum()==0) & 
     (info.Chrom==6)].dropna()[['Stock1','Stock4','Stock3']]

In [None]:
geno[(geno.Stock3>=1) & 
     (geno[['Stock1','Stock4']].T.sum()==0) & 
     (info.Chrom==6)].dropna()[['Stock1','Stock4','Stock3']]

In [None]:
geno[(geno.Stock3>=1) & 
     (geno[['Stock1','Stock4']].T.sum()==0) & 
     (info.Chrom==1)].dropna()[['Stock1','Stock4','Stock3']]

In [None]:
geno[(geno.Stock3>=1) & 
     (geno[['Stock1','Stock4']].T.sum()==0) & 
     (info.Chrom==3)].dropna()[['Stock1','Stock4','Stock3']]

In [None]:
geno[(geno.Stock3>=1) & 
     (geno[['Stock1','Stock4']].T.sum()==0) & 
     (info.Chrom==10)].dropna()[['Stock1','Stock4','Stock3']]

In [None]:
info[(info.Pos==1430787)]

In [None]:
geno.loc[30:30][samples][['Stock1','Stock3','Stock4','JEC21','CF830']]

In [None]:
geno.loc[30][samples].sum()

In [None]:
ratio.loc[30:30]['Stock3']

In [None]:
info[(info.Chrom==14) & (info.Pos>=386823) & 
     (info.Pos<=389688) & 
     (info.AF>0) & (info.AF<1) ]

In [None]:
stock_variants[(stock_variants.Pos>=386823) & 
               (stock_variants.Pos<=389688) &
               (stock_variants.Chromosome==14)]

In [None]:
chg = geno[(info.Chrom==14)]
chd = depth[(info.Chrom==14)]
chi = info[(info.Chrom==14)]
cha = ratio[(ratio.Chrom==14)]

In [None]:
cents = centlocs[(centlocs.Chromosome==14)][['Left','Right']].values[0]

In [None]:
info

In [None]:
plt.plot(chi.Pos,np.log2(chi.Meandepth),'.');
plt.hlines(2,*cents,color='r');
plt.plot(chi[(chi.Callrate==1)].Pos,np.log2(chi[(chi.Callrate==1)].Meandepth),'.')

In [None]:
plt.plot(chi.Nratio,chi.Ratio,'.');

In [None]:
atleast = 3
umaf = (len(samples)-atleast)/len(samples)
lmaf = atleast/len(samples)

filt = info[(info.AF>lmaf) & 
     (info.AF<umaf) & 
    (info.)
     #(info.Ndepth>0.8) & 
     #(info.Nratio>0.8) & 
     (info.Callrate==1)].copy()
filt['Chromosome'] = filt.Chrom
filt.shape

In [None]:
loci = geno.loc[filt.index,samples].drop_duplicates()

In [None]:
chrommap

In [None]:
MAT = [1529240,1661861]

In [None]:
fig,ax = plt.subplots(7,2,figsize=(20,15),sharex=True,sharey=True)

for i,(j,c) in enumerate(chrommap.iterrows()):
    
    plt.sca(ax.ravel()[i])
    plt.hlines(0.5,0,c.Length,color='k',linestyle='--')
    
    plt.hlines(0.5,c.Left,c.Right,color='r',linewidth=5)
    
    temp = filt[(filt.Seqid==c.Seqid)]
    
    
    plt.plot(temp.Pos,temp.AF,'ko')
    plt.title(i+1,y=.8)
    
plt.sca(ax.ravel()[3])
plt.hlines(0.5,*MAT,color='g',linewidth=5)

plt.ylim(0,1)
plt.subplots_adjust(wspace=0.1);

In [None]:
filt[(filt.Chromosome==4)].shape

In [None]:
geno.loc[filt[(filt.Chromosome==4)].index,
         samples
        ].drop_duplicates().shape

In [None]:
filt[(filt.Chromosome==11)]

In [None]:
info[(info.Chrom==11) & 
     (info.Callrate==1) & 
     (info.AF>0) & 
     (info.AF<1) & (info.Type=='snp')]

In [None]:
info[(info.Chrom==12) & (info.Pos>773178) & (info.Pos<774000) & (info.AF>0)]

In [None]:
ratio[(info.Chrom==12) & (info.Pos>773178) & (info.Pos<774000) & (info.AF>0)]

In [None]:
info[(info.Chrom==11) & (info.Pos==823427)]

In [None]:
loci = pd.concat([filt,geno.loc[filt.index,samples]],axis=1)

In [None]:
loci.to_csv('../GENOTYPE/B3502_progeny_variants.csv.gz',index=True)

In [None]:
stock_variants[(stock_variants.Chromosome==7) & 
               (stock_variants.Pos==890559)][
                ['CF830','B3502_A1','B3502_B1','B3502_B7']]

In [None]:
filt[(filt.AF<.25) & (filt.Chromosome==7)]

In [None]:
stock_variants.shape

In [None]:
assert 1 == 0
mydpi = 150
for s in samples[:5]:
    
    temp = info.copy()
    temp['Ratio'] = ratio[s]
    temp['Geno'] = geno[s]
    temp['SDepth'] = np.log2(depth[s]+1)
    temp = temp.merge(chrommap[['Seqid','Cumlen']])
    #print(temp.shape)
    #temp = temp.loc[info[(info.AF>0) & 
    #                     (info.AF<1) & 
    #                      (info.depth>8)].index,:]
   
    fig,ax = plt.subplots(2,1,figsize=(12,5),sharex=True);
    fig.set_facecolor('w')
    plt.sca(ax[0])
    
    g0 = temp[(temp.Geno==0)]
    g1 = temp[(temp.Geno!=0)]
    
    plt.plot(g0.Pos+g0.Cumlen,g0.SDepth,'.',
             color='tab:blue',ms=2,alpha=0.5)
    plt.plot(g1.Pos+g1.Cumlen,g1.SDepth,'.',
             color='tab:orange',ms=2,alpha=0.5)
    
    plt.xticks(chrommap.Midpts,
               np.arange(chrommap.shape[0])+1,
               fontsize=12)
    yp,yl = plt.yticks()
    
    plt.vlines(chrommap.Cumlen.values[1:],
               yp[1],yp[-2],color='k',
               linestyle='--',linewidth=1,
              alpha =0.52)
    
    sp = pheno[(pheno.Strain==s)][pheno.columns[2]].min()
    
    plt.title('Strain: %s | Phenotype: %s'%(s,sp),fontsize=12)
    plt.ylabel('-log$_2$ Read Depth',fontsize=12)
    
    for i,c in chrommap.iterrows():
        plt.hlines(0,c.Left+c.Cumlen,
                  c.Right+c.Cumlen,
               color='k')
    
    plt.sca(ax[1])
    
    plt.plot(g0.Pos+g0.Cumlen,g0.Ratio,'.',
             color='tab:blue',ms=1,alpha=0.2)
    plt.plot(g1.Pos+g1.Cumlen,g1.Ratio,'.',
             color='tab:orange',ms=1,alpha=0.2)
    
    plt.xticks(chrommap.Midpts,
               np.arange(chrommap.shape[0])+1,
               fontsize=12)
    
    plt.vlines(chrommap.Cumlen.values[1:],
               0,1,color='k',
               linestyle='--',linewidth=1,
              alpha =0.52)
    
    plt.ylabel('Allelic Ratio',fontsize=12)
    plt.xlabel('Chromosome',fontsize=12)
    
    plt.savefig('../FIGURES/DEPTHPLOTS/%s.png'%(s),
             dpi=mydpi,bbox_inches='tight')
    plt.close()