# Linux tools examples

Demonstration of Linux text processing tools

### awk

In [None]:
cd awk

In [None]:
%%bash
cat people.txt
echo

awk '{print $1, $3, $NF}' people.txt
echo

awk '{print $NF}' ragged.txt
echo

awk '{print $1 " favorite food is " $3}' people.txt
echo

awk '/pizza/ {print $1, $3}' people.txt
echo

cat states.txt
echo

awk -F ',' '{print $1 " state bird is " $3}' states.txt
echo

awk '{sum=$1+$2; prod=$1*$2; print $1, $2, sum, prod}' numbers.txt
echo

awk '{sin1=sin($1); cos2=cos($2); print $1, $2, sin1, cos2}' numbers.txt
echo

awk '{log1=log($1); sqrt2=sqrt($2); print $1, $2, log1, sqrt2}' numbers.txt
echo

awk 'BEGIN {PI=3.14159; print " x y sin(x) cos(y)"} {sin1=sin($1*PI); cos2=cos($2*PI); printf "%4.1f %4.1f %7.4f %7.4f
", $1, $2, sin1, cos2}' numbers.txt
echo

awk '{s1+=$1; s2+=$2; print} END {print s1/NR, s2/NR}' numbers.txt
echo




In [None]:
cd ..

### grep

In [None]:
cd grep

In [None]:
%%bash
grep banana file1.txt
echo

grep Banana file1.txt
echo

grep -i banana file1.txt
echo

grep -B 3 kumquat file1.txt
echo

grep -A 2 kumquat file1.txt
echo

grep lime *.txt
echo

grep -v lime *.txt
echo

grep lime *.txt
echo

grep -c lime *.txt
echo

grep -l kumquat *.txt
echo

grep -L kumquat *.txt
echo

grep pear file1.txt
echo

grep '^pear' file1.txt
echo

grep 'pear$' file1.txt
echo




In [None]:
cd ..

### split

In [None]:
cd split

In [None]:
%%bash
# Look in the split directory to see results of splitting FASTA file
echo

split genome.fasta
echo

split -l 2000 genome.fasta genome_v1_
echo

split -l 200 -t '>' genome.fasta genome_v2_ # Does not work on MacOS
echo




In [None]:
cd ..

### sort

In [None]:
cd sort

In [None]:
%%bash
sort unsorted1.txt
echo

export LC_ALL=C; sort unsorted1.txt; export LC_ALL=''
echo

sort -k3 unsorted2.txt
echo

sort -k2 unsorted2.txt
echo

sort -k2 -n unsorted2.txt
echo

sort -u unsorted1.txt
echo

sort -k3,3 -k1,1 -k2,2 unsorted3.txt
echo

sort -k2r,2 -k1,1 -k3,3 unsorted3.txt
echo

sort -R abc.txt
echo

shuf abc.txt
echo

seq 9 | shuf
echo




In [None]:
cd ..

### sed

In [None]:
cd sed

In [None]:
%%bash
sed s'/pear/XXXX/' file1.txt
echo

sed s'/pear/XXXX/g' file1.txt
echo

sed s'/^pear/XXXX/' file1.txt
echo

sed s'/pear$/XXXX/' file1.txt
echo

sed -n '5'p file2.txt
echo

sed -n '3,5'p file2.txt
echo

sed -n '2~3'p file2.txt # Does not work on MacOS
echo

head -2 file1.txt > file3.txt
echo

sed s'/pear/XXXX/' file3.txt > file3.txt
echo

cat file3.txt
echo

head -2 file1.txt > file3.txt
echo

sed -i s'/pear/XXXX/' file3.txt
echo

cat file3.txt
echo




In [None]:
cd ..

### head-and-tail

In [None]:
cd head-and-tail

In [None]:
%%bash
head file.txt
echo

tail file.txt
echo

head -n 7 file.txt
echo

tail -n 7 file.txt
echo

head -n -15 file.txt # Does not work on MacOS
echo

tail -n +16 file.txt
echo





In [None]:
cd ..

### paste

In [None]:
cd paste

In [None]:
%%bash
paste fruits.txt colors.txt
echo

paste -d ',' fruits.txt colors.txt
echo




In [None]:
cd ..

### CaseStudyHPL

In [None]:
cd CaseStudyHPL

In [None]:
%%bash
echo 'node time P F' # Print header
for file in `ls output.hpl*` # Iterate over output files
do
node=`head -1 $file` # Node name from first line
t=`grep WR12R2R4 $file | awk '{print $6}'` # Time from 6th field of WR12R2R4 line
npass=`grep PASSED $file| wc -l` # Number of passed tests
nfail=`grep FAILED $file| wc -l` # Number of failed tests
echo $node $t $nfail $npass
done



In [None]:
cd ..

### CaseStudyFASTA

In [None]:
cd CaseStudyFASTA

In [None]:
%%bash
split -l 200 -t '>' genome.fasta genome_ # Does not work on MacOS
for file in genome_*
do
sed 's/^sp|/>sp|/' $file > temp
mv temp $file
done




In [None]:
cd ..

### CaseStudyBERT

In [None]:
cd CaseStudyBERT

In [None]:
%%bash
for file in `ls -1 out*`
do
node=`head -1 $file | awk '{print $1}'`
card=`grep 'AIP (' $file | awk '{print $4}'`
tstart=`grep TBEFORE $file | awk '{print $2}'`
tend=`grep AFTER $file | awk '{print $2}'`
corr=`grep 'Correct answers' $file | tail -1 | awk '{print $8}'` # Last instance
ach=`grep 'Achieved sentences' $file | tail -1 | awk '{print $3}'` # Last instance
t=$(expr $tend - $tstart) # Do math to calculate tend - tstart
echo $node $card $corr $ach $t
done



In [None]:
cd ..