-
Notifications
You must be signed in to change notification settings - Fork 0
/
Linux_commands_Transcriptome_analysis
58 lines (40 loc) · 2.17 KB
/
Linux_commands_Transcriptome_analysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#Evolution of drug resistance in cancer cells involves generation of numerous mutations in non-coding genome that reduces the chances of DNA breaks
#Authors:
#Santosh Kumar1†, Valid Gahramanov1†, Julia Yaglom1, Shivani Patel1†, Lukasz Kaczmarczyk1, Ivan Alexandrov2, Gabi Gerlitz1, Mali Salmon-Divon1, Michael Y. Sherman1*
#Affiliations
#1. Department of Molecular Biology, Ariel University, Israel-40700.
#2. Research Center of Biotechnology of the Russian Academy of Sciences, Moscow, Russia-119071.
#********Codes to be used for Transcriptome analysis in linux server to process raw reads from sequencing
cd ../
#Commands for FASTQC and trim galore
#To make excutable forloop
nano filename for making it as loop and write command
control x to exit
choose yes to save it
# Make it excutable
chmod 755 filename
755 runs all file types
Should show file in green colour now
#To run the command
bash qc_trimm
for fileNum in {1..12}
do
file=/home/SequencingData/kumar/Resistance_Irinotecan/BC_${fileNum}.fastq.gz
nohup trim_galore --fastqc $file > ${fileNum}.out &
done
$ top -u username to see the progress
#After trimming and quality control we run alignment of our fastq files to reference sequence
for fileNum in {1..12}
do
file=/home/kumar/trim_galore_and_fastqc/BC_${fileNum}_trimmed.fq.gz
ref=/home/Genomes/Human/hg38/UCSC/HISAT2/hg38_tran/genome_tran
nohup hisat2 -x $ref -U $file 1> $fileNum.sam 2> $fileNum.out &
done
#After we sam file format from alignment we need to convert that file to Bam file format for counting step
samtools view -S -b /home/kumar/Resistance_Irinotecan/1.sam -o 1.bam
#Run all codes by changing the sample numbers to convert to BAM
#After alignment we use output file of alignment to count the number of the reads per sample
featureCounts -T 16 -a /home/Genomes/Human/hg38/ensembl/Homo_sapiens.GRCh38.97.gtf -o featurecounts.txt /home/kumar/Resistance_Irinotecan/alignment_bam/1.bam
/............../home/kumar/Resistance_Irinotecan/alignment_bam/12.bam
#Last code will give featureCount file that will be used further in R environment to calculate differential expression of genes using edgeR.
#********End of codes******