# Transcriptomic analysis

## trimmomatic output files convention

In [None]:
Nomenclature		Meaning
FR1_nonrRNAs		Forward reads from the replicate 1 that did not map to ribosomal RNAs (rRNAs)
FR2_nonrRNAs 		Forward reads from the replicate 2 that did not map to ribosomal RNAs (rRNAs)
RR1_nonrRNAs 		Reverse reads from the replicate 1 that did not map to ribosomal RNAs (rRNAs)
RR2_nonrRNAs 		Reverse reads from the replicate 2 that did not map to ribosomal RNAs (rRNAs)
FSE_R1_nonrRNAs 	Forward single-end reads from the replicate 1 that did not map to ribosomal RNAs (rRNAs)
FSE_R2_nonrRNAs 	Forward single-end reads from the replicate 2 that did not map to ribosomal RNAs (rRNAs)
FSE_all_nonrRNAs 	Concatenated reads from FSE_R1_nonrRNAs and FSE_R2_nonrRNAs files.
FR1_FSE_nonrRNAs	Concatenated forward reads plus forward single-end reads, both from replicate 1, that did not map to ribosomal RNAs (rRNAs)
FR2_FSE_nonrRNAs	Concatenated forward reads plus forward single-end reads, both from replicate 2, that did not map to ribosomal RNAs (rRNAs)
F_all_nonrRNAs		Concatenated reads from FR2_nonrRNAs and FR1_nonrRNAs files.
R_all_nonrRNAs 		Concatenated reads from RR1_nonrRNAs and RR2_nonrRNAs files.
R1_both_nonrRNAs 	merged reads from FR1_nonrRNAs and RR1_nonrRNAs files.
R2_both_nonrRNAs 	merged reads from FR2_nonrRNAs  and  RR2_nonrRNAs files.
R1_all_nonrRNAs 	merged reads from RR1_nonrRNAs and FR1_FSE_nonrRNAs files.
R2_all_nonrRNAs 	merged reads from R2_all_nonrRNAs and FR2_FSE_nonrRNAs
summa_nonrRNAs 		concatenated reads from R2_all_nonrRNAs and R1_all_nonrRNAs 



In [None]:
echo $PATH
conda install busco=5.0.0
908ac6e4843180b0da78008d8d13ad64

### rnaSpades new assembly

Get the binary of the last version of spades

In [None]:
conda create -n rnaquast

source activate rnaquast
conda activate rnaquast
conda install rnaquast=5.0.0
#command line:slurm

    
#!/bin/bash
#SBATCH --partition=debug
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --time=0-0:10:00
#SBATCH --job-name=rnaspades
#SBATCH --cpus-per-task=1
#SBATCH -o result_%N_%j.out	 # File to which STDOUT will be written
#SBATCH -e result_%N_%j.err	 # File to which STDERR will be written

#SBATCH --mail-type=ALL
#SBATCH --mail-user=evelas13@eafit.edu.co

#export SBATCH_EXPORT=NONE
#export OMP_NUM_THREADS=1

## Para apolo:

#Define variables

spades=/home/evelas13/polyudea/SPAdes-3.15.1-Linux/bin/spades.py
datosF=/shared-dirs/bec-cibiop/Poliquetos/Assemblies/F_all_nonrRNAs.fq.gz
datosR=/shared-dirs/bec-cibiop/Poliquetos/Assemblies/R_all_nonrRNAs.fq.gz
datosS=/shared-dirs/bec-cibiop/Poliquetos/Assemblies/FSE_all_nonrRNAs.fq.gz
output=/home/evelas13/polyudea/spadesnew_output

# Run programs
$spades --rna -1 $datosF -2 $datosR -s $datosS -o $output



In [None]:
#Note
#To get an entire transcriptome it's better to make an assembly with all the reads in all conditions as suggested by Spades Manual

In [None]:
#Running BUSCO
#Create an environment with BUSCO in it
conda create -n busco

source activate busco
conda activate busco
conda install busco=5.0.0

#slurm:
datos=/home/evelas13/polyudea/spadesnew_output/trasnscripts.fasta

run_BUSCO.py -i $datos-o rnaspades -l metazoa_odb10/ -m transcriptome -c 1

busco -i /home/evelas13/polyudea/spadesnew_output/trasnscripts.fasta -o rnaspades -l metazoa_odb10/ -m transcriptome -c 1


In [None]:
##review his command to rnaquast
#Running RNAquast
#Create an environment with RNAQUAST in it

#slurm:
datos=/home/evelas13/polyudea/spadesnew_output/trasnscripts.fasta

python rnaQUAST.py --transcript $datos --gene_mark -o $output


In [None]:
#Comparison of RNAQUAST of new spaeds and old:

######NEWWWWW

METRICS/TRANSCRIPTS                                    transcripts              
 == BASIC TRANSCRIPTS METRICS (calculated without reference genome and gene database) == 
Transcripts                                            235092                   

Transcripts > 500 bp                                   56578                    
Transcripts > 1000 bp                                  29279                    

Average length of assembled transcripts                543.426                  
Longest transcript                                     37387                    
Total length                                           127755067                

Transcript N50                                         1040    
######OLDDDD

METRICS/TRANSCRIPTS                                    rnaspades                
 == BASIC TRANSCRIPTS METRICS (calculated without reference genome and gene database) == 
Transcripts                                            235523                   

Transcripts > 500 bp                                   56726                    
Transcripts > 1000 bp                                  29292                    

Average length of assembled transcripts                542.282                  
Longest transcript                                     37387                    
Total length                                           127719799                

Transcript N50                                         1034   


######NEWWWWW DISHORT SUMMARY REPORT 

METRICS/TRANSCRIPTS                                    transcripts              
 == BASIC TRANSCRIPTS METRICS (calculated without reference genome and gene database) == 
Transcripts                                            235092                   

Transcripts > 500 bp                                   56578                    
Transcripts > 1000 bp                                  29279                    

Average length of assembled transcripts                543.426                  
Longest transcript                                     37387                    
Total length                                           127755067                

Transcript N50                                         1040    




In [None]:
#Comparison of BUSCO of new spaeds and old:


######OLDDDD

# BUSCO version is: 3.0.2 
# The lineage dataset is: metazoa_odb10 (Creation date: 2019-11-20, number of species: 65, number of BUSCOs: 954)
# To reproduce this run: python /home/apolo-cgrisales/.conda/envs/Worms/bin/run_busco -i construct/rnaspades.fasta -o rnaspades -l metazoa_odb10/ -m transcriptome -c 1
#
# Summarized benchmarking in BUSCO notation for file construct/rnaspades.fasta
# BUSCO was run in mode: transcriptome

        C:91.8%[S:83.5%,D:8.3%],F:6.3%,M:1.9%,n:954

        876     Complete BUSCOs (C)
        797     Complete and single-copy BUSCOs (S)
        79      Complete and duplicated BUSCOs (D)
        60      Fragmented BUSCOs (F)
        18      Missing BUSCOs (M)
        954     Total BUSCO groups searched
(END) 


###### NEW
# BUSCO version is: 5.0.0 
# The lineage dataset is:  (Creation date: 2021-02-17, number of species: 65, number of BUSCOs: 954)
# Summarized benchmarking in BUSCO notation for file /home/evelas13/polyudea/spadesnew_output/transcripts.fasta
# BUSCO was run in mode: transcriptome

        ***** Results: *****

        C:91.8%[S:82.3%,D:9.5%],F:5.6%,M:2.6%,n:954        
        876     Complete BUSCOs (C)                        
        785     Complete and single-copy BUSCOs (S)        
        91      Complete and duplicated BUSCOs (D)         
        53      Fragmented BUSCOs (F)                      
        25      Missing BUSCOs (M)                         
        954     Total BUSCO groups searched 

###### NEW# BUSCO version is: 5.0.0 
# The lineage dataset is:  (Creation date: 2021-02-17, number of species: 65, number of BUSCOs: 954)
# Summarized benchmarking in BUSCO notation for file /home/evelas13/polyudea/spadesrest_output/transcripts.fasta
# BUSCO was run in mode: transcriptome

        ***** Results: *****

        C:91.8%[S:82.9%,D:8.9%],F:5.6%,M:2.6%,n:954        
        876     Complete BUSCOs (C)                        
        791     Complete and single-copy BUSCOs (S)        
        85      Complete and duplicated BUSCOs (D)         
        53      Fragmented BUSCOs (F)                      
        25      Missing BUSCOs (M)                         
        954     Total BUSCO groups searched   DIFF K



In [None]:
RSEM quality assesment