In [1]:
## Load in needed mods
import pandas as pd, numpy as np

## Bring in our mods
import transposonmobility as tmi

## Load in mod for plotting
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
## Load in bams for analysis
## Set path to bam
bamlistpath = '../DATA/bamlist.txt'

## Load in via pandas
listobams = pd.read_csv(bamlistpath,sep='_',header=None)

## Set columns
listobams.columns = ['Extension','Sample','Lane','Bam']

## Drop the bam column
listobams.drop('Bam',axis=1,inplace=True)

## Re format name
listobams['Extension'] = [a.split('/')[-1] for a in listobams.Extension.values]

## View head
listobams.head()

Unnamed: 0,Extension,Sample,Lane
0,JW-S1,S1,L002
1,JW-S1,S1,L003
2,JW-S1,S1,L004


In [3]:
## Load depth data
## Set depth path
depthpath = '../DATA/SampleDepths.tsv.gz'

## Use pandas to load data
depth = pd.read_csv(depthpath,sep='\t',header=None)

## Set column names
depth.columns = ['Chrom','Pos'] + listobams.Sample.tolist()

## Show first 5 rows
depth.head()

Unnamed: 0,Chrom,Pos,S1,S1.1,S1.2
0,XL280_Chr01,58,0,0,2
1,XL280_Chr01,59,0,2,2
2,XL280_Chr01,60,0,2,4
3,XL280_Chr01,61,1,2,4
4,XL280_Chr01,62,1,2,4


In [4]:
## Print shape
depth.shape

(19177438, 5)

In [5]:
## Make a chromosome length dataframe
## Parse the chromosome and position
temp = depth[['Chrom','Pos']]

## Take the max
clendf = pd.DataFrame(temp.groupby('Chrom')['Pos'].max()).reset_index()

## Add the cumulative position, mid points, and chromosome colors
clendf['Cumpos'] = [0] + list(clendf.Pos.cumsum())[:-1]
clendf['Midpts'] = clendf.Pos.values/2 + clendf.Cumpos
clendf['Colors'] = ['k' if i % 2 == 0 else 'grey' for i in range(clendf.shape[0])]

## View head
clendf.head()

Unnamed: 0,Chrom,Pos,Cumpos,Midpts,Colors
0,XL280_Chr01,2309329,0,1154664.5,k
1,XL280_Chr02,1649354,2309329,3134006.0,grey
2,XL280_Chr03,2117102,3958683,5017234.0,k
3,XL280_Chr04,1793274,6075785,6972422.0,grey
4,XL280_Chr05,1473544,7869059,8605831.0,k


In [6]:
## Gather the index of every 1000 entries
ws = 1000

## Concatonate indexs
K = np.concatenate([temp[(temp.Chrom==c)][::ws].index.tolist() for c in clendf.Chrom.values])

## View first five entries in K
K[:5]

array([   0, 1000, 2000, 3000, 4000])

In [7]:
## Take the K'th index of the depth dataframe
res = depth.loc[K,:]

## Deleate depth as it is a memory hog! 
del depth

## Print the shape of this dataframe
res.shape

(19185, 5)

In [8]:
## Merge the resolved depth with chromosome length dataframe
nres = res.merge(clendf[['Chrom','Cumpos','Colors']])

## Make a cumlative position dataframe
nres['Chrompos'] = nres.Pos.values + nres.Cumpos

## View head
nres.head()

Unnamed: 0,Chrom,Pos,S1,S1.1,S1.2,Cumpos,Colors,Chrompos
0,XL280_Chr01,58,0,0,2,0,k,58
1,XL280_Chr01,1058,13,12,11,0,k,1058
2,XL280_Chr01,2058,22,8,18,0,k,2058
3,XL280_Chr01,3058,14,20,18,0,k,3058
4,XL280_Chr01,4058,10,15,13,0,k,4058


In [9]:
## Load in sample info
sampleinfo = tmi.sampleinfo

## Set index
sampleinfo.index = ['S%s'%s for s in sampleinfo['Sample'].tolist()]

## View head
sampleinfo.head()

Unnamed: 0,Sample,Passaged,Isolate,Name
S23,23,0,4,XL280.3
S21,21,0,1,XL280.1
S1,1,30,1,30-01
S2,2,30,1,Duplicate-30-01
S3,3,30,1,30-02


In [10]:
## Plot log2 read depths
## Gather unique sample names
uniqsam = listobams.Sample.unique()

## Iterate thru the uniq sam files
for si,s in enumerate(uniqsam):
    
    ## Calcluate the 
    sn = nres[s].shape[1]
    
    ## Call a figure, set facecolor
    fig,ax = plt.subplots(sn,1,figsize=(12,6),sharex=True)
    fig.set_facecolor('w')
    
    ## Gather temporary resluts
    temp = nres[['Chrompos','Pos','Colors',s]]
    
    ## Iterate thru the lanes
    for ri in range(sn):
        
        ## Set row
        plt.sca(ax[ri])
        
        ## Add a scatter of depths (log2)
        plt.scatter(x=temp.Chrompos.values, y=np.log2(temp[s].T.values[ri]+1),c =temp.Colors.values,alpha=0.5,s=2, rasterized=True);
        
        ## Add title and lane labels
        plt.title(s if ri == 0 else None,fontsize=14)
        plt.text(x=0,y=.8,s='Lane %s'%(str(ri+1)),fontsize=14,transform=ax[ri].transAxes)
        
    ## Set xticks and x labesl
    plt.xticks(clendf.Midpts,np.arange(1,15),fontsize=14)
    plt.xlabel('Chromosome',fontsize=13);
    
    ## Add ylables
    fig.text(0.08,0.5,s='$log_{2}$ Number of Reads',fontsize=14,rotation=90,va='center',ha='center');
    
    ## Gather name
    names = sampleinfo.loc[s].values
    
    ## Set save name
    to_save = 'S%s_%s_%s_%s_log2_coverage_by_lane.png'%(names[0],names[1],names[2],names[3])
    
    ## SAve and close figure
    plt.savefig('../FIGURES/%s'%to_save,dpi=100,bbox_inches='tight');plt.close()