In [1]:
from pyseqrna import pyseqrna_utils as pu
from pyseqrna import quality_check as qc
from pyseqrna import quality_trimming as qt
from pyseqrna import  aligners as al
from pyseqrna import pyseqrna_stats as ps
from pyseqrna import quantification as quants
from pyseqrna import differential_expression as de
from pyseqrna import pyseqrna_plots as pp
from pyseqrna import multimapped_groups as mmg
import pandas as pd
import dill

In [2]:
from pyseqrna.pyseqrna_utils import PyseqrnaLogger

log = PyseqrnaLogger(mode='w', log='pp')

log.info("Analysis started")

[19:57:06]  <ipython-input-2-bf476bf393c3> :: INFO : Analysis started


In [3]:
data = pu.read_input_file("pyseqrna/example/input_Sample_PE.txt", "pyseqrna/example/data/" , paired=True)
samples= data['samples']

[19:57:12]  pyseqrna_utils :: INFO : Reading input samples File 
[19:57:12]  pyseqrna_utils :: INFO : Input file pyseqrna/example/input_Sample_PE.txt read succesfully
[19:57:12]  pyseqrna_utils :: INFO : Combination created succesfully from pyseqrna/example/input_Sample_PE.txt
[19:57:12]  pyseqrna_utils :: INFO : targets dataframe for differenatial created succesfully from pyseqrna/example/input_Sample_PE.txt


In [None]:
a, b = qc.fastqcRun(sampleDict=samples,paired=True)

In [None]:
import shutil

shutil.which('fastqc')

In [3]:
targets= data['targets']
comb= data['combinations']

In [None]:
dill.dump_session("test.pyseqrna")

In [4]:
dill.load_session("test.pyseqrna")

[13:48:25]  __main__ :: INFO : Analysis started at Sun Nov 21 13:48:25 2021


In [4]:
pu.make_directory("pySeqRNA_results")

[19:57:42]  pyseqrna_utils :: INFO : Succesfully created directory /home/naveen/Documents/Phd_work/ngs_data/pyseqrna/pySeqRNA_results.5


'/home/naveen/Documents/Phd_work/ngs_data/pyseqrna/pySeqRNA_results.5'

In [5]:

a = qt.trim_galoreRun(sampleDict=samples, paired=True, slurm=True)

[19:57:48]  pyseqrna_utils :: INFO : Config generated succesfully from /home/naveen/Documents/Phd_work/ngs_data/pyseqrna/pyseqrna/param/trim_galore.ini
[19:57:48]  quality_trimming :: INFO : Using default config file trim_galore.ini
[19:57:48]  pyseqrna_utils :: INFO : Succesfully created directory /home/naveen/Documents/Phd_work/ngs_data/pyseqrna/pySeqRNA_results/trim_galore_results.2
[19:57:48]  quality_trimming :: INFO : Job successfully submited for Control.1h.A_1.fastq.gz and Control.1h.A_2.fastq.gz with ['36009']
[19:57:48]  quality_trimming :: INFO : Job successfully submited for Control.1h.B_1.fastq.gz and Control.1h.B_2.fastq.gz with ['36009', '36010']
[19:57:48]  quality_trimming :: INFO : Job successfully submited for Heat.1h.A_1.fastq.gz and Heat.1h.A_2.fastq.gz with ['36009', '36010', '36011']
[19:57:48]  quality_trimming :: INFO : Job successfully submited for Heat.1h.B_1.fastq.gz and Heat.1h.B_2.fastq.gz with ['36009', '36010', '36011', '36012']
[19:57:48]  quality_trimm

In [None]:
aligner = al.STAR_Aligner(genome="pyseqrna/example/data/tair10.fasta", slurm=False)

In [None]:
j = aligner.build_index()

In [None]:
aligner.check_index()

In [None]:
outalign, job = aligner.run_Alignment(target=samples, pairedEND=True)

In [None]:
df= ps.align_stats(sampleDict=samples,trimDict=a[0],bamDict=outalign, pairedEND=True)

In [None]:
df.to_excel("pySeqRNA_results/alignment_stats.xlsx", index=False)

In [None]:
df

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
import seaborn as sns

In [None]:
sample = list(df['Sample'])
creads = list(df['Input_reads2x'])
iread = list(df['%_Cleaned2x'])
tc= list(100-df['%_Cleaned2x'])

plt.barh(sample,iread,0.6,label="cleaned Reads", color='skyblue')
plt.barh(sample, tc, 0.6,left=iread, label="discarded", color='tomato')
plt.margins(y=0.01)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel("% Number of reads", fontsize=18)
plt.ylabel("Samples", fontsize=18)
plt.legend(['Cleaned', 'Discarded'], bbox_to_anchor=(1.0, 1.0), loc='upper left', fontsize=15)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(18.5, 10.5)

# plt.legend(frameon=False, fontsize=17)



In [None]:
tc

In [None]:
a= quants.featureCount(bamDict=outalign, gff="pyseqrna/example/data/arabidopsis10.gff", outDir="pySeqRNA_results", slurm=True)

In [None]:
counts= pd.read_csv("pySeqRNA_results/Counts_final.txt", sep="\t")

In [None]:
counts

In [None]:
targets= data['targets']
comb= data['combinations']

In [None]:
dd= de.runDESeq2(countDF=counts,targetFile=targets,design='sample', combination=comb)

In [None]:
import os 
dd.to_excel(os.path.join("pySeqRNA_results","Raw_DEGs_all.xlsx"), index=False)

In [None]:
dd= pd.read_excel("pySeqRNA_results/Raw_DEGs_all.xlsx")
filtered_DEG = de.degFilter(degDF=dd, CompareList=comb, FDR=0.05, FOLD=1)

In [None]:
summary= filtered_DEG['summary']

In [None]:
wd= pd.ExcelWriter(os.path.join("pySeqRNA_results","filtered_DEGs.xlsx"))
for key, value in filtered_DEG['filtered'].items():
    value.to_excel(wd,sheet_name=key)
    wd.save()
wd.close()

In [None]:
pu.getGenes(os.path.join("pySeqRNA_results","filtered_DEGs.xlsx"), combinations=comb)

In [None]:
from pyseqrna import normalize_counts as nc

In [None]:
rpkm = nc.Normalization(countFile="pySeqRNA_results/Counts_final.txt", featureFile="pyseqrna/example/data/arabidopsis10.gff")

In [None]:
rpk = rpkm.meanRatioCount()

In [None]:
rpk.to_excel("pySeqRNA_results/rpkm.xlsx")

In [None]:
rcount= pd.read_excel("pySeqRNA_results/rpkm.xlsx")

In [None]:
rcount

In [None]:
from pyseqrna import pyseqrna_plots as pp

In [None]:
heatmap, ax = pp.plotHeatmap(dd,combinations=comb,num=50, type='deg')

In [None]:
from pyseqrna import gene_ontology as go

In [None]:
bdata= go.query('athaliana')

In [None]:
gdata = go.preprocessBioMart(bdata)

In [None]:
results = go.enrichGO('athaliana',"pySeqRNA_results/diff_genes/C1-D6.txt")

In [None]:
results

In [None]:
df=bdata
import numpy as np
df2 = df[df['GO_ID'].notna()]
gg = list(df2['Gene'])
x = np.array(gg)

bg_count = len(np.unique(x))

bg_count


In [None]:
cd= pd.read_excel("pySeqRNA_results.1/M1-V6_deg.xlsx")

In [None]:
gdata

In [None]:
from pyseqrna import pathway


In [None]:
dk = pathway.enrichKEGG(file="pySeqRNA_results/diff_genes/C1-D6.txt", species='ath')