<a href="https://colab.research.google.com/github/ne1al/temp-repo/blob/master/reference_codes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# General Commands

In [None]:
#order files and directories so that most recently changed file is listed last in human readable format with all files displayed
ls -tralh

In [None]:
#to return to the previous directory
cd -

In [None]:
#download files not using wget (zsh)
curl url -o outputfilename

In [None]:
#to know the storage space in local pc
df -h

In [None]:
#retrieve command history
#ctrl+r on terminal and keep pressing ctrl+r till I reach the command I am looking for

In [None]:
#find a file (case insensitive)
find / -iname legacy_blast.pl

In [None]:
#change persmissions of a directory
chmod -R 777 /path

In [None]:
#unzip 
tar -xzvf Silva.nr_v132.tgz

# Conda Environment Commands


In [None]:
#create conda environment, better create a separate environment for qiime
conda create --name qiime

In [None]:
#remove conda environment
conda deactivate
conda env remove -n qiime

In [None]:
#Deactivate the current environment 
conda deactivate #don't write environment name

In [None]:
#set up a tool from a downloaded file on a new conda environment
conda env create -n qiime2-2019.10 --file qiime2-2019.10-py36-linux-conda.yml

In [None]:
#Get a list of all my environments, active environment is shown with *
conda env list

In [None]:
#to know packages installed on conda environment
conda list -n myenvi
conda list

In [None]:
#download SRA toolkit
conda install -c bioconda sra-tools
conda update sra-tools 

# File Manipulation (Bash)


In [None]:
#more information on an option (switch,flag)
ls --help
man ls

In [None]:
#to know the version of a tool or programming lanuage
perl -version
bedtools -version

In [None]:
#command to count files in a directory
ls -l | wc -l
ls *.filtered.fasta | wc -l

In [None]:
#remove a directory with write-protected regular files
rm -fr <directory_name>

In [None]:
#going up in the hierarchy two levels
cd ../..

In [None]:
#to know the storage space in local pc
df -h

In [None]:
#display size of files and directories
ls -s

In [None]:
#join files in parallel
paste [OPTION]... [FILES]...

In [None]:
#order files and directories by size
ls -S

In [None]:
#view directories in the root directory
ls -F /

In [None]:
#view types of files or directories
ls -F

In [None]:
#change persmissions of a directory
chmod -R 777 /path

In [None]:
#remove a directory with write-protected regular files
rm -fr <directory_name>

In [None]:
#unzip 
tar -xzvf Silva.nr_v132.tgz

In [None]:
#zip a file to gz
gzip filename

In [None]:
#view content of zipped file
zless filename.gz

In [None]:
#view a gzipped file without unzipping
zcat file.gz | view

In [None]:
#rename a file
mv stability.files example.files

In [None]:
#copying a file
sel_normal.merged.filtered.good.unique.filter.precluster.pick.dist
sel_normal.merged.filtered.good.unique.filter.precluster.pick.count_table
 cp SOURCE DEST
cp /data3/nehal/skin_micro/sel_normal/sel_normal.merged.filtered.good.names .

In [None]:
#find a file (case insensitive)
find / -iname legacy_blast.pl

In [None]:
#to search for a certain word in a file or manual
/word
#to move between search words
N or shift+N

In [None]:
#command to count files in a directory
ls -l | wc -l
ls *.filtered.fasta | wc -l

In [None]:
#create empty text file
touch myfile.txt

In [None]:
#rename a file with interactive option for confirmation before overwriting
mv -i oldname newname

In [None]:
#copying directories
cp -r olddirectory newdirectory

In [None]:
#copying multiple files to a directory
cp file1.txt file2.txt file3.txt directoryname/

In [None]:
#remove safely, asks for confirmation with y or n
rm -i fule.txt

In [None]:
#creating many directories at once, all at the same level
mkdir folder1 folder2 folder3

In [None]:
#count number of lines, words, characters in a file
wc file1.txt
wc *.txt
wc -l file1.txt (#shows only number of lines)
wc -w file1.txt (#shows only number of words)
wc -m file1.txt (#shows only number of characters)

In [None]:
#show the content of a file on the screen
cat file1.txt

In [None]:
#sort content of file, only displayed in the screen i.e doesn't change the file itself
sort file1.txt (#alphanumeric)
sort -n file1.txt (#numeric)
sort -n linenumbersfile.txt (#arrange number of lines of multiple files numerically)

In [None]:
#show certain number of lines in a file on the screen
head -n 1 file1.txt (#shows first line in the file)
head -n 2 file1.txt (#shows first two lines)
head -n 3 file1.txt (#shows first three lines)

In [None]:
#adding strings to a file
echo hello > file1.txt (#repeating the same command will overwrite on the file)
echo hello >> file2.txt (#repeating same command will ADD to the first string)

In [None]:
#remove certain sections of each line in a file, where -d is delimiter, -f part to cut
cut -d , -f 2 file1.txt

In [None]:
#filter *adjacent* matching lines, duplicates should be adjacent, this is why we should sort files first
sort file1.txt | uniq
#count the frequency of each line, duplicates included
sort file1.txt | uniq -c

In [None]:
#to qc my files before starting analysis
wc -l *.txt | sort -n | head -n 5
wc -l *.txt | sort -n | tail -n 5

In [None]:
#choose certain characters for wild card
ls *[AB]file1.txt

In [None]:
#extract file sizes into csv file (mac zsh)
zmodload -F zsh/stat b:zstat
zstat -L -A data -n +size ./**/*(D.)
for name size in $data; do
    printf '%s\t%s\n' $size $name
done >outfile.csv

#Transferring files from/to remote servers

In [None]:
#transfering files from local PC to remote server
#open terminal in the local directory where the desired files are present
scp selected_SRA_Acc.txt nehal@10.7.28.10:/data3/nehal/skin_micro/sel_normal
scp IntroductionToBio-Linux8_Dec2015.pdf nehal@10.7.28.10:/home/nehal/work

In [None]:
#transferring files from remote server to local
#open terminal in the local directory where I want the files to be downloaded
scp nehal@10.7.28.10:/data3/nehal/skin_micro/sel_normal/sel_normal.merged.filtered.good.unique.filter.precluster.count_table .

In [None]:
#pulling a docker image
(sudo) docker pull imagename

# Analysis
FASTA and FASTQ Files


In [None]:
#counting sequences in fasta file
 grep -c '>' in.fasta

Loop

In [None]:
#wild card instead of for loop
command --argument $(<list.txt)
#here, the txt file is read as one line separated by spaces, however, in a for loop, each separated in a separate line
#for loop
for acc in `cat accessions.txt` ; do   echo  ${acc}.fastq -o ~/results/${acc}_trimm     ; done

In [None]:
#to be copy paste from
for acc in `cat accessions.txt` ; do   echo  ${acc}.fastq -o ~/results/${acc}_trimm     ; done

# R Analysis


In [None]:
#using tidyverse package to remove first row so second row can be names
library(tidyverse)
names(df) <- df %>% slice(1) %>% unlist()
df <- df %>% slice(-1)

In [None]:
#making first column in a dataframe as row names
result <- mydf[-1]
row.names(result) <- mydf$target_id
result

In [None]:
#remove an entire column of the dataframe
Data$genome <- NULL

In [None]:
#to know type of dataframe/matrix
typeof(df or matrix)
class(dataframe/matrix)

In [None]:
#For each row return the column name of the largest value, also applied on matrix
colnames(DF)[apply(DF,1,which.max)]

#For each column, return the row name of the largest value, also applied on matrix
rownames(DF)[apply(DF,2,which.max)]



In [None]:
#removing the rows that contain only zeros
clean = DF[!apply(x == 0, 1, all), , drop = FALSE]


In [None]:
#remove any row values with zero
test = est_counts[ rowSums(est_counts > 0) >= 4, ] #where 4 is the number of columns of a dataframe

In [None]:
#remove a variable
rm(variable_name)

In [None]:
#selecting certain observtions in data frame (source:https://www.statmethods.net/management/subset.html)
# first 5 observations
newdata <- mydata[1:5,]

# based on variable values
newdata <- mydata[ which(mydata$gender=='F'
& mydata$age > 65), ]

# or
attach(mydata)
newdata <- mydata[ which(gender=='F' & age > 65),]
detach(mydata)

# using subset function (keeping only two columns, e.g ID and weight)
newdata <- subset(mydata, age >= 20 | age < 10,
select=c(ID, Weight))

In [None]:
#subtracting two columns in a dataframe and saving it to new variable
d = read.table(textConnection(lines), header=TRUE) 
d$new=d$end - d$start

In [None]:
#subracting two column in a matrix
variablename= matrixname[,"B"] - matrixname[,"A"]
variablename=matrixname[,2] - matrixname[,1]

In [None]:
#exracting only certain columns from dataframe
new_df <- Cars93[,c("MPG.highway","EngineSize")]

In [None]:
#sort dataframe by absolute value of a certain column
library(data.table)
sorted = setDT(df)[order(-abs(similarity)), .SD, by = study]