diff --git a/lecture_scripts/problem_solutions/set_1_1.zip b/lecture_scripts/problem_solutions/set_1_1.zip deleted file mode 100644 index 8397423..0000000 Binary files a/lecture_scripts/problem_solutions/set_1_1.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_1_1/sequences.fasta b/lecture_scripts/problem_solutions/set_1_1/sequences.fasta new file mode 100644 index 0000000..504844d --- /dev/null +++ b/lecture_scripts/problem_solutions/set_1_1/sequences.fasta @@ -0,0 +1,8 @@ +>seq1 +ATGGCGTCTTGGCCTTAAAAGCTC +>seq2 +ATGGCGTCTTGGCCTTAAAAGCTC +>seq3 +ATGGCGTCTTGGCCTTAAAAGCTC +>seq4 +ATGGCGTCTTGGCCTTAAAAGCTC diff --git a/lecture_scripts/problem_solutions/set_1_1/unix_commands.txt b/lecture_scripts/problem_solutions/set_1_1/unix_commands.txt new file mode 100644 index 0000000..2f1b976 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_1_1/unix_commands.txt @@ -0,0 +1,50 @@ +Log into your machine or account. What is the full path to your home directory? + > pwd + How many files does it contain? + ## look for names that do not end with "/" + > ls -F + How many directories? + ## look for names that end with "/" + > ls -F + +Without using a text editor examine the contents of the file sequences.fasta. + How many lines does this file contain? + > wc -l sequences.fasta + How many characters? + > wc -c sequences.fasta + What is the first line of this file? + > head -1 sequences.fasta + What are the last 3 lines? + > tail -3 sequences.fasta + How many sequences are in the file? + ## grep will only print the lines that contain ">" in the file (headers) + ## pipe the result to "wc -l" to count the number of lines + > grep ">" sequences.fasta | wc -l + +Rename sequences.fasta to something more informative of the sequences the file contains. + > mv sequences.fasta my_random_sequences.fasta + +Create a directory called fasta + > mkdir fasta + +Copy the fasta file that you renamed to the fasta directory + > cp my_random_sequences.fasta fasta + +Verify that the file is within the fasta directory + ## you can visually inspect the output from "ls fasta/" or use + ## "ls fasta/my_random_sequences.fasta" directly to see if the file + ## is there - if it lists, it's fine, otherwise it will given an error + > ls fasta/my_random_sequences.fasta + +Delete the the original file that you used for copying + > rm my_random_sequences.fasta + +Copy a directory + ## copy "fasta" directory and its contents to "fasta_copy" + ## the "-r" flag tells "cp" to copy recursively + > cp -r fasta fasta_copy + +Remove a directory + ## remove the "fasta" directory and its contents + ## the "-r" flag tells "rm" to copy recursively + > rm -r fasta diff --git a/lecture_scripts/problem_solutions/set_1_2.zip b/lecture_scripts/problem_solutions/set_1_2.zip deleted file mode 100644 index 52593ba..0000000 Binary files a/lecture_scripts/problem_solutions/set_1_2.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_1_2/add.pl b/lecture_scripts/problem_solutions/set_1_2/add.pl new file mode 100755 index 0000000..ae5e5d2 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_1_2/add.pl @@ -0,0 +1,17 @@ +#!/usr/bin/perl +## add.pl + +use strict; +use warnings; + +##first value - get it from the command line arguments +my $value1 = shift; + +## second value - get it from the command line arguments +my $value2 = shift; + +## add the two numbers +my $sum = $value1 + $value2; + +## print the sum of the two numbers +print $sum, "\n"; diff --git a/lecture_scripts/problem_solutions/set_1_2/reversec.pl b/lecture_scripts/problem_solutions/set_1_2/reversec.pl new file mode 100755 index 0000000..1bf08b5 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_1_2/reversec.pl @@ -0,0 +1,20 @@ +#!/usr/bin/perl +## reversec.pl + +use strict; +use warnings; + +## get the sequence from the arguments +my $sequence = shift; + +## reverse the sequence +my $reverse_sequence = reverse $sequence; + +## complement the reverse sequence +## tr/// modifies the content of the string directly so we'll first make a copy +my $reverse_complement_sequence = $reverse_sequence; +## now complement the reverse sequence +$reverse_complement_sequence =~ tr/ACGT/TGCA/; + +## print the reverse complemented sequence +print "output: $reverse_complement_sequence\n"; diff --git a/lecture_scripts/problem_solutions/set_2.zip b/lecture_scripts/problem_solutions/set_2.zip deleted file mode 100644 index e59a337..0000000 Binary files a/lecture_scripts/problem_solutions/set_2.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_2/add.pl b/lecture_scripts/problem_solutions/set_2/add.pl new file mode 100755 index 0000000..ecc53f5 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/add.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl +## add.pl + +use strict; +use warnings; + +##first value - get it from the command line arguments +my $value1 = shift; + +## second value - get it from the command line arguments +my $value2 = shift; + +## check that both values are defined +if (not defined $value1 or not defined $value2) { + print "Please provide two numbers.\n"; +} +## check that both values are positive numbers +elsif ($value1 < 0 or $value2 < 0) { + print "Please provide two positive numbers.\n"; +} +## only run the rest of the program if the values are OK + +else { + + ## add the two numbers + my $sum = $value1 + $value2; + + ## print the sum of the two numbers + print $sum, "\n"; +} diff --git a/lecture_scripts/problem_solutions/set_2/even_odd.pl b/lecture_scripts/problem_solutions/set_2/even_odd.pl new file mode 100755 index 0000000..e90feac --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/even_odd.pl @@ -0,0 +1,52 @@ +#!/usr/bin/perl +## even_odd.pl + +use strict; +use warnings; + +## create file myresult.txt and open it for writing output +open OUT, ">", "myresult.txt" or die "Error writing to file: $!\n"; + +## open numbers.txt for reading +open IN, "<", "numbers.txt" or die "Error reading file: $!\n"; + +## read each line in the file +while (my $number = ) { + + ## remove the newline + chomp $number; + + ## check if the number is even -- we can use the modulus operator to see + ## the remainder when the number is divided by 2; if it's 0, then it's + ## even + if ($number % 2 == 0) { + + ## check whether the number is less than 24 + if ($number < 24) { + + ## print the number + print "$number\n"; + } + + } + ## odd number + else { + + ## compute the factorial of the number + my $factorial = 1; + ## go through each value from $number down to 1 + for (my $i = $number; $i > 0; $i--) { + ## multiply the existing result by the new lower number + $factorial *= $i; + } + + ## print the factorial + print OUT "$factorial\n"; + + } + +} + +## close the file handles +close IN; +close OUT; diff --git a/lecture_scripts/problem_solutions/set_2/myresult.txt b/lecture_scripts/problem_solutions/set_2/myresult.txt new file mode 100644 index 0000000..8dbfb01 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/myresult.txt @@ -0,0 +1,4 @@ +1.1962222086548e+56 +1 +8.22283865417792e+33 +120 diff --git a/lecture_scripts/problem_solutions/set_2/numbers.txt b/lecture_scripts/problem_solutions/set_2/numbers.txt new file mode 100644 index 0000000..120c798 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/numbers.txt @@ -0,0 +1,8 @@ +22 +45 +1 +2 +31 +32 +72 +24 diff --git a/lecture_scripts/problem_solutions/set_2/order.pl b/lecture_scripts/problem_solutions/set_2/order.pl new file mode 100755 index 0000000..fafdada --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/order.pl @@ -0,0 +1,20 @@ +#!/usr/bin/perl +## order.pl + +use strict; +use warnings; + +## get the first argument +my $string1 = shift; + +## get the second argument +my $string2 = shift; + +## check that $string1 is "less than or equal" $string2 (already ordered) +if ($string1 le $string2) { + print "right order\n"; +} +## otherwise $string1 is "more than" $string2 +else { + print "wrong order\n"; +} diff --git a/lecture_scripts/problem_solutions/set_2/pali.pl b/lecture_scripts/problem_solutions/set_2/pali.pl new file mode 100755 index 0000000..cfa3337 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/pali.pl @@ -0,0 +1,33 @@ +#!/usr/bin/perl +## pali.pl + +use strict; +use warnings; + +## get the string argument +my $string = shift; + +## lowercase $string so that comparisons are case insensitive +my $lower_case_string = lc $string; + +## remove whitespace from the string +## we want to apply the substitution globally to remove ALL occurences +## since s/// works on the string itself, we'll first make a copy +my $lower_case_clean_string = $lower_case_string; +## we can use s/// to remove the whitespace +$lower_case_clean_string =~ s/\s//g; +## we can remove all non alphanumerical characters +$lower_case_clean_string =~ s/\W//g; + +## get the reverse of the string +my $reverse_lower_case_clean_string = reverse $lower_case_clean_string; + +## check that forward and reverse strings are the same - if they're the same, we have a palindrome +if ($lower_case_clean_string eq $reverse_lower_case_clean_string) { + print "yes!\n"; +} +## otherwise the strings aren't the same +else { + print "no!\n"; +} + diff --git a/lecture_scripts/problem_solutions/set_2/percent.pl b/lecture_scripts/problem_solutions/set_2/percent.pl new file mode 100755 index 0000000..4f38a5e --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/percent.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl +## percent.pl + +use strict; +use warnings; + +## get the first number argument +my $number1 = shift; + +## get the second number argument +my $number2 = shift; + +## check that the sum of the two numbers does not equal to 0 +if ($number1 + $number2 != 0) { + ## calculate the percentage + my $percentage = $number1 / ($number1 + $number2) * 100; + ## use printf to print a nicely formatted percentage + printf "%.2f%%\n", $percentage; +} +## otherwise the sum equals to 0 +else { + print "You are trying to trick me!\n"; +} diff --git a/lecture_scripts/problem_solutions/set_2/reorder.pl b/lecture_scripts/problem_solutions/set_2/reorder.pl new file mode 100755 index 0000000..5f004bb --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/reorder.pl @@ -0,0 +1,20 @@ +#!/usr/bin/perl +## reorder.pl + +use strict; +use warnings; + +## get the first argument +my $string1 = shift; + +## get the second argument +my $string2 = shift; + +## if $string1 is "less than or equal" $string2 (already ordered) +if ($string1 le $string2) { + print "$string1 $string2\n"; +} +## otherwise $string1 is "more than" $string2 (swap them) +else { + print "$string2 $string1\n"; +} diff --git a/lecture_scripts/problem_solutions/set_2/same.pl b/lecture_scripts/problem_solutions/set_2/same.pl new file mode 100755 index 0000000..45554c2 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_2/same.pl @@ -0,0 +1,29 @@ +#!/usr/bin/perl +## same.pl + +use strict; +use warnings; + +## prompt user for the first string +print "Enter string 1: "; +## get the first string from the user input +my $string1 = <>; + +## prompt user for the second string +print "Enter string 2: "; +## get the second string from the user input +my $string2 = <>; + +## make both $string1 and $string2 lower case so that the comparison is +## case insensitive +my $lower_case_string1 = lc $string1; +my $lower_case_string2 = lc $string2; + +## check to see if $lower_case_string1 and $lower_case_string2 are "equal" +if ($lower_case_string1 eq $lower_case_string2) { + print "same\n"; +} +## otherwise they're different +else { + print "different\n"; +} diff --git a/lecture_scripts/problem_solutions/set_3.zip b/lecture_scripts/problem_solutions/set_3.zip deleted file mode 100644 index 7da569d..0000000 Binary files a/lecture_scripts/problem_solutions/set_3.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_3/divide.pl b/lecture_scripts/problem_solutions/set_3/divide.pl new file mode 100755 index 0000000..c08103e --- /dev/null +++ b/lecture_scripts/problem_solutions/set_3/divide.pl @@ -0,0 +1,26 @@ +#!/usr/bin/perl +## divide.pl + +use strict; +use warnings; + +## get the first number argument +my $dividend = shift; + +## get the second number argument +my $divisor = shift; + +## if both arguments have not been provided, bail out +die "Two numbers are required\n" if not defined $dividend or not defined $divisor; + +## if both numbers are not positive, bail out +die "Both numbers have to be positive\n" if $dividend < 0 or $divisor < 0; + +## if divisor is 0, bail out +die "Divisor cannot be zero\n" if $divisor == 0; + +## calculate the quotient +my $quotient = $dividend / $divisor; + +## print quotient +print "$quotient\n"; diff --git a/lecture_scripts/problem_solutions/set_3/line_length.pl b/lecture_scripts/problem_solutions/set_3/line_length.pl new file mode 100755 index 0000000..c88fbc3 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_3/line_length.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl +## line_length.pl + +use strict; +use warnings; + +## get the filename argument +my $file = shift; + +## total length of the file +my $total_length = 0; + +## number of lines in the file +my $number_of_lines = 0; + +## open a filehandle to access the file +open IN, "<", $file or die "Error reading $file: $!\n"; + +## read each line of the file +while (my $line = ) { + + ## remove the newline + chomp $line; + + ## add to the count of lines + $number_of_lines++; + + ## get the length of the line + my $length = length $line; + + ## print the length of the line to STDOUT + print "Line length for line $number_of_lines: $length\n"; + + ## add to the total length of the file up to this line + $total_length += $length; +} + +## calculate the average line length +my $average_line_length = $total_length / $number_of_lines; + +## print the average line length to STDOUT +print "Average line length: $average_line_length\n"; + +## close filehandles +close IN; diff --git a/lecture_scripts/problem_solutions/set_3/lines.txt b/lecture_scripts/problem_solutions/set_3/lines.txt new file mode 100644 index 0000000..3e980b4 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_3/lines.txt @@ -0,0 +1,7 @@ +Two numbers are required. +Numbers have to be positive. +Divisor cannot be zero. +Write the quotient to STDOUT +Write any errors to STDERR +Redirect STDOUT to an output file (out.txt) +Redirect STDERR to an error file (err.txt) diff --git a/lecture_scripts/problem_solutions/set_3/lines_uc.txt b/lecture_scripts/problem_solutions/set_3/lines_uc.txt new file mode 100644 index 0000000..0948733 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_3/lines_uc.txt @@ -0,0 +1,7 @@ +TWO NUMBERS ARE REQUIRED. +NUMBERS HAVE TO BE POSITIVE. +DIVISOR CANNOT BE ZERO. +WRITE THE QUOTIENT TO STDOUT +WRITE ANY ERRORS TO STDERR +REDIRECT STDOUT TO AN OUTPUT FILE (OUT.TXT) +REDIRECT STDERR TO AN ERROR FILE (ERR.TXT) diff --git a/lecture_scripts/problem_solutions/set_3/upper_case_file.pl b/lecture_scripts/problem_solutions/set_3/upper_case_file.pl new file mode 100755 index 0000000..356288a --- /dev/null +++ b/lecture_scripts/problem_solutions/set_3/upper_case_file.pl @@ -0,0 +1,34 @@ +#!/usr/bin/perl +## upper_case_file.pl + +use strict; +use warnings; + +## get the filename argument for reading +my $in_file = shift; + +## get the filename argument for writing +my $out_file = shift; + +## open the filehandle to read from $in_file +open IN, "<", $in_file or die "Error reading $in_file: $!\n"; + +## open the filehandle to write to $out_file +open OUT, ">", $out_file or die "Error writing $out_file: $!\n"; + +## read each line from $in_file +while (my $line = ) { + + ## remove the newline + chomp $line; + + ## make letters uppercase + my $upper_case_line = uc $line; + + ## write the uppercase line to the $out_file + print OUT "$upper_case_line\n"; +} + +## close filehandles +close IN; +close OUT; diff --git a/lecture_scripts/problem_solutions/set_4.zip b/lecture_scripts/problem_solutions/set_4.zip deleted file mode 100644 index 7b314f4..0000000 Binary files a/lecture_scripts/problem_solutions/set_4.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_4/calculate_gc_content.pl b/lecture_scripts/problem_solutions/set_4/calculate_gc_content.pl new file mode 100755 index 0000000..a29d3e2 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_4/calculate_gc_content.pl @@ -0,0 +1,49 @@ +#!/usr/bin/perl +## calculate_gc_content.pl + +use strict; +use warnings; + +## DNA sequence +my $sequence_string = "AGTTGTTACATGAAATCTGCAGTTTCATAATTTC"; + +## Turn a DNA string into an array with split() +## if we split on and empty string, we can put each character of the +## string into the array +my @sequence = split "", $sequence_string; + +## total length of the sequence +my $length = 0; + +## number of C's +my $number_of_c = 0; + +## number of G's +my $number_of_g = 0; + +## Use a foreach loop to look at each nucleotide in turn +foreach my $nucleotide (@sequence) { + + ## increment $length by one since we're accessing a nucleotide + $length++; + + ## check if $nucleotide is a C + if ($nucleotide eq "C") { + + ## increment $number_of_c + $number_of_c++; + + } + ## check if $nucleotide is a G + elsif ($nucleotide eq "G") { + + ## increment $number_of_g + $number_of_g++; + } + +} + +## calculate GC content (g + c) / length * 100 +my $gc_content = ($number_of_g + $number_of_c) / $length * 100; + +printf "GC content: %.2f%%\n", $gc_content; diff --git a/lecture_scripts/problem_solutions/set_4/compare_sequences.pl b/lecture_scripts/problem_solutions/set_4/compare_sequences.pl new file mode 100755 index 0000000..07452ee --- /dev/null +++ b/lecture_scripts/problem_solutions/set_4/compare_sequences.pl @@ -0,0 +1,39 @@ +#!/usr/bin/perl +## compare_sequences.pl + +use strict; +use warnings; + +## first sequence from sequence alignment +my $sequence1_string = "AGTTGTTACATGAAATCTGCAG--TTTCATAATTTCCGTGGGTCGGGCCGGGCGGGCCAGGCGCTGGGCACG---------"; + +## second sequence from sequence alignment +my $sequence2_string = "-GAAAAAACA-GAAGAGGGAAGGATACCAGA------GCGGTTCATACAGGGC---CCAGAAACTAGGCGAGGTGACCCCT"; + +## Turn each string into an array with split() +## if we split on and empty string, we can put each character of the +## string into the array +my @sequence1 = split "", $sequence1_string; +my @sequence2 = split "", $sequence2_string; + +## Use a for loop to compare each index for nucleotide differences +## since both sequences are the same length (it's an alignment) we want +## want to use the same index to compare the array values +for (my $i = 0; $i < scalar(@sequence1); $i++) { + + ## get the nucleotide for each alignment at the given index + my $nucleotide1 = $sequence1[$i]; + my $nucleotide2 = $sequence2[$i]; + + ## check whether $nucleotide1 is different from $nucleotide2 + ## remember that nucleotides are strings, so we use "ne" + if ($nucleotide1 ne $nucleotide2) { + + ## print the nucleotide position + ## since indexes are 0-based, we'll add 1 to to the index to give + ## a position that makes sense for people (1-based) + print $i + 1, "\n"; + + } + +} diff --git a/lecture_scripts/problem_solutions/set_4/shuffle_sequence.pl b/lecture_scripts/problem_solutions/set_4/shuffle_sequence.pl new file mode 100755 index 0000000..24f33a4 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_4/shuffle_sequence.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl +## shuffle_sequence.pl + +use strict; +use warnings; + +## DNA sequence +my $sequence = "AGTTGTTACATGAAATCTGCAGTTTCATAATTTC"; + +## turn DNA string into an array +## if we split on and empty string, we can put each character of the +## string into the array +my @nucleotides = split "", $sequence; + +## use a for loop to run the loop N times (N = length of seq) +for (my $i = 0; $i < scalar(@nucleotides); $i++) { + + ## get two random positions using rand + ## we want the random position to be <= the length of $sequence + ## we can use the int function to get an integer to use as an index + ## perldoc -f rand for more information on rand + my $random_position_a = int(rand(scalar(@nucleotides))); + my $random_position_b = int(rand(scalar(@nucleotides))); + + ## exchange the letters in the @nucleotides array + ## you can do it in a single operation, but it might be conceptually + ## easier to understand a 3 step approach where we use a temporary + ## variable + + ## first take the base at $random_position_a and store to a temporary + ## variable + my $tmp_nucleotide = $nucleotides[$random_position_a]; + + ## assign the nucleotide at $random_position_b to $random_position_a + $nucleotides[$random_position_a] = $nucleotides[$random_position_b]; + + ## assign the nucleotide at $random_position_b to $tmp_nucleotide + $nucleotides[$random_position_b] = $tmp_nucleotide; +} + +## put the shuffled nucleotides into a new sequence string +my $shuffled_sequence = join "", @nucleotides; + +## print the shuffled sequence +print "$shuffled_sequence\n"; diff --git a/lecture_scripts/problem_solutions/set_5.zip b/lecture_scripts/problem_solutions/set_5.zip deleted file mode 100644 index d6ee76f..0000000 Binary files a/lecture_scripts/problem_solutions/set_5.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_5/codon_usage.pl b/lecture_scripts/problem_solutions/set_5/codon_usage.pl new file mode 100755 index 0000000..b9c1111 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_5/codon_usage.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl +## codon_usage.pl + +use strict; +use warnings; + +## dna sequence +my $sequence = "AGCTTAAGGCCGTTAAAATTAAGC"; + +## hash to keep count of codons +my %codon_usage = (); + +## we can use a for loop to go through our sequence and extract the codons +## we usually increment the position by one (++), but in this case +## we can increment the position by 3, since we're looking at 3 bases at a +## time. +## we still want to stop the loop when we reach the end of our sequence +for (my $offset = 0; $offset < length($sequence); $offset += 3) { + + ## extract the codon from the sequence + my $codon = substr($sequence, $offset, 3); + + ## increment the count for $codon in the %codon_usage hash + $codon_usage{$codon}++; + +} + +## report the codon usage +foreach my $codon (keys %codon_usage) { + + ## get the count for $codon + my $count = $codon_usage{$codon}; + + ## print the codon usage to STDOUT + print "$codon - $count\n"; + +} diff --git a/lecture_scripts/problem_solutions/set_5/expression.data b/lecture_scripts/problem_solutions/set_5/expression.data new file mode 100644 index 0000000..ad29875 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_5/expression.data @@ -0,0 +1,4 @@ +CDC2 brain 34.5 AGCGCGGTGAGTTTGAAACTGCTCGCACTTGGCTTCAAAGCTGGCTCTTGGAAATTGAGCGGAGAGCGAC +ALT liver 9.2 ATGTTCAGAAGAAGTTTAAAACTATTAAGTAAAGAAACCATTACTCGTGTTAAACCAAATACAACTATTG +ARG1 liver 458.5 AGCCGATGCGTGGCGCCCCGGCGGCCACGCCGCCGCCCGCTACGGAATCGGCGGCCGAGCGGCTGCGCCG +TSHR thyroid 2.8 CCTCCTCCACAGTGGTGAGGTCACAGCCCCTTGGAGCCCTCCCTCTTCCCACCCCTCCCGCTCCCGGGTC diff --git a/lecture_scripts/problem_solutions/set_5/expression.pl b/lecture_scripts/problem_solutions/set_5/expression.pl new file mode 100755 index 0000000..95ce202 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_5/expression.pl @@ -0,0 +1,86 @@ +#!/usr/bin/perl +## expression.pl + +use strict; +use warnings; + +## get the filename to open from the arguments +my $filename = shift; + +## open the expression file +open IN, "<", $filename or die "Cannot open $filename: $!\n"; + +## hash to store tissue information +my %tissue = (); + +## hash to store expression levels +my %expr = (); + +## hash to store sequences +my %seq = (); + +## read each line from the expression file +while (my $line = ) { + + ## remove the newline + chomp $line; + + ## split the line on tabs + my @columns = split /\t/, $line; + + ## gene id (1st column) + my $gene_id = $columns[0]; + + ## tissue (2nd column) + my $tissue_data = $columns[1]; + + ## expression (3rd column) + my $expression_data = $columns[2]; + + ## sequence (4th column) + my $sequence_data = $columns[3]; + + ## in the 3 hashes $gene_id will be the key + + ## add to %tissue + $tissue{$gene_id} = $tissue_data; + + ## add to %expr + $expr{$gene_id} = $expression_data; + + ## add to %seq + $seq{$gene_id} = $sequence_data; + +} + +## array to keep track of gene_id's expressed in liver +my @gene_ids_expressed_in_liver = (); + +## search %tissue for genes that are expressed in liver +## (where the value is "liver") +foreach my $gene_id (keys %tissue) { + + ## get the tissue the gene is expressed in + my $tissue_data = $tissue{$gene_id}; + + ## check whether the tissue equals "liver" + if ($tissue_data eq "liver") { + + ## add gene_id to @gene_ids_expressed_in_liver + push @gene_ids_expressed_in_liver, $gene_id; + + } + +} + +## create a report of the gene id and expression level for genes expressed in +## liver +foreach my $gene_id (@gene_ids_expressed_in_liver) { + + ## get the expression level + my $expression_data = $expr{$gene_id}; + + ## print the gene id and expression levels to STDOUT + print "$gene_id\t- $expression_data\n"; + +} diff --git a/lecture_scripts/problem_solutions/set_6.zip b/lecture_scripts/problem_solutions/set_6.zip deleted file mode 100644 index 78e1658..0000000 Binary files a/lecture_scripts/problem_solutions/set_6.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_6/restriction_site.pl b/lecture_scripts/problem_solutions/set_6/restriction_site.pl new file mode 100755 index 0000000..8d08044 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_6/restriction_site.pl @@ -0,0 +1,77 @@ +#!/usr/bin/perl +## restriction_site.pl + +use strict; +use warnings; + +## sequence +my $sequence = "GAATTCAAGTTCTTGTGCGCACACAAATCCAATAAAAACTATTGTGCACACAGACGCGAC +TTCGCGGTCTCGCTTGTTCTTGTTGTATTCGTATTTTCATTTCTCGTTCTGTTTCTACTT +AACAATGTGGTGATAATATAAAAAATAAAGCAATTCAAAAGTGTATGACTTAATTAATGA +GCGATTTTTTTTTTGAAATCAAATTTTTGGAACATTTTTTTTAAATTCAAATTTTGGCGA +AAATTCAATATCGGTTCTACTATCCATAATATAATTCATCAGGAATACATCTTCAAAGGC +AAACGGTGACAACAAAATTCAGGCAATTCAGGCAAATACCGAATGACCAGCTTGGTTATC +AATTCTAGAATTTGTTTTTTGGTTTTTATTTATCATTGTAAATAAGACAAACATTTGTTC +CTAGTAAAGAATGTAACACCAGAAGTCACGTAAAATGGTGTCCCCATTGTTTAAACGGTT +GTTGGGACCAATGGAGTTCGTGGTAACAGTACATCTTTCCCCTTGAATTTGCCATTCAAA +ATTTGCGGTGGAATACCTAACAAATCCAGTGAATTTAAGAATTGCGATGGGTAATTGACA +TGAATTCCAAGGTCAAATGCTAAGAGATAGTTTAATTTATGTTTGAGACAATCAATTCCC +CAATTTTTCTAAGACTTCAATCAATCTCTTAGAATCCGCCTCTGGAGGTGCACTCAGCCG +CACGTCGGGCTCACCAAATATGTTGGGGTTGTCGGTGAACTCGAATAGAAATTATTGTCG +CCTCCATCTTCATGGCCGTGAAATCGGCTCGCTGACGGGCTTCTCGCGCTGGATTTTTTC +ACTATTTTTGAATACATCATTAACGCAATATATATATATATATATTTAT"; + +## R^AATTY is the restriction site for ApoI +## R can be either A or G +## Y can be either C or T +## we can using [] in the regular expression to have different characters +## grouping (R) and (ATTY) separately allows us to add characters between +## the two groupings +my $apoI_cutsite_regex = "([AG])(AATT[CT])"; + +## remove the newlines from $seq - since there are many, we'll use a global +## substitution +$sequence =~ s/\n//g; + +## print all restriction sites in $seq + +print "Restriction sites:\n"; + +## if we do a global search we can go through each match +while ($sequence =~ /$apoI_cutsite_regex/g) { + + ## the restriction site will be $1$2 + my $restriction_site = "$1$2"; + + print "$restriction_site\n"; + +} + +## add cuts to the sequence +## make a copy of the sequence +my $sequence_with_cuts = $sequence; + +## "^" needs to go between $1 and $2 (our subpatterns) - we can use s/// to +## handle this globally +$sequence_with_cuts =~ s/$apoI_cutsite_regex/$1^$2/g; + +## print $sequence_with_cuts +print "Sequence with cut sites:\n"; +print "$sequence_with_cuts\n"; + +## $sequence_with_cuts has "^" at the cut sites - we can use that to split +## the string into an array of fragments +my @fragments = split /\^/, $sequence_with_cuts; + +## we need to sort the array of fragments +## sorting should order sequences in reverse order by length +## since we want a reverse order, we want to compare $b and $a, rather than +## $a and $b +## we're comparing integers, so we need to use <=> +my @sorted_fragments = sort { length($b) <=> length($a) } @fragments; + +## print fragments along with their lengths +print "Sorted fragments:\n"; +foreach my $fragment (@sorted_fragments) { + printf "[%3d] %s\n", length($fragment), $fragment; +} diff --git a/lecture_scripts/problem_solutions/set_7.zip b/lecture_scripts/problem_solutions/set_7.zip deleted file mode 100644 index 37a4477..0000000 Binary files a/lecture_scripts/problem_solutions/set_7.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_7/reverse_complement.pl b/lecture_scripts/problem_solutions/set_7/reverse_complement.pl new file mode 100755 index 0000000..8f3b1a4 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_7/reverse_complement.pl @@ -0,0 +1,43 @@ +#!/usr/bin/perl +## reverse_complement.pl + +use strict; +use warnings; + +## get the sequence from the arguments to the program +my $input_sequence = shift; + +## call the reverse_complement subroutine and assign the returned value +## from the subroutine to a new variable +my $reverse_complement_input_sequence = reverse_complement($input_sequence); + +## print the reverse complement +print "$reverse_complement_input_sequence\n"; + + +## subroutines ## + +## this will take a sequence string as an argument and +## return the reverse complement +sub reverse_complement { + + ## get the sequence argument for the subroutine + ## just like we can call "shift" to get an argument for the program + ## we can call shift to get an argument to the subroutine + my $sequence = shift; + + ## reverse the sequence + my $reverse_sequence = reverse $sequence; + + ## create a copy of the reversed string which will become the complement + my $reverse_complement_sequence = $reverse_sequence; + + ## complement the reverse string + ## we can use tr/// to transliterate characters + $reverse_complement_sequence =~ tr/ACGT/TGCA/; + + ## return the reverse complement + ## code that calls this subroutine will be able to use this returned value + return $reverse_complement_sequence; + +} diff --git a/lecture_scripts/problem_solutions/set_8.zip b/lecture_scripts/problem_solutions/set_8.zip deleted file mode 100644 index 21854c6..0000000 Binary files a/lecture_scripts/problem_solutions/set_8.zip and /dev/null differ diff --git a/lecture_scripts/problem_solutions/set_8/.FastaUtilities.pm.swp b/lecture_scripts/problem_solutions/set_8/.FastaUtilities.pm.swp new file mode 100644 index 0000000..d9ddf68 Binary files /dev/null and b/lecture_scripts/problem_solutions/set_8/.FastaUtilities.pm.swp differ diff --git a/lecture_scripts/problem_solutions/set_8/FastaUtilities.pm b/lecture_scripts/problem_solutions/set_8/FastaUtilities.pm new file mode 100644 index 0000000..a66cb1f --- /dev/null +++ b/lecture_scripts/problem_solutions/set_8/FastaUtilities.pm @@ -0,0 +1,94 @@ +## tell Perl that we're creating a package, not a regular script +package FastaUtilities; + +## tell perl that we'll be exporting stuff +use base "Exporter"; + +## export some functions +our @EXPORT = qw(get_id get_description get_sequence reformat_sequence); + +## regex should be in single quotes since we have \ +my $regex = '>(\S+)\s?(.*)'; + +## extract the id from the fasta entry +sub get_id { + + ## get the fasta string ">id description\nseq_chunk\nseq_chunk..." + my $fasta = shift; + + ## break $fasta into individual lines + my @lines = split /\n/, $fasta; + + ## the defline will be the first line in the entry + my $defline = shift @lines; + + ## search the defline with our regex + $defline =~ /$regex/; + + ## id will be the first subgrouping match + my $id = $1; + + return $id; + +} + +## extract the description from the fasta entry +sub get_description { + + ## get the fasta string ">id description\nseq_chunk\nseq_chunk..." + my $fasta = shift; + + ## break $fasta into individual lines + my @lines = split /\n/, $fasta; + + ## the defline will be the first line in the entry + my $defline = shift @lines; + + ## search the defline with our regex + $defline =~ /$regex/; + + ## description will be the second subgrouping match + my $description = $2; + + return $description; + +} + +## extract the sequence from the fasta entry +sub get_sequence { + + ## get the fasta string ">id description\nseq_chunk\nseq_chunk..." + my $fasta = shift; + + ## break $fasta into individual lines + my @lines = split /\n/, $fasta; + + ## get rid of the first line (defline) - we don't care about it here + shift @lines; + + ## join the rest of the array (which is all sequence) as a single string + my $sequence = join "", @lines; + + return $sequence; +} + +## reformat the sequence to user specified number of characters +sub reformat_sequence { + + ## get the fasta string + my $fasta = shift; + + ## get the line length + my $length = shift; + + ## get the sequence - we're calling the defined function + my $sequence = get_sequence($fasta); + + ## use s/// to insert newlines at $length number of character + $sequence =~ s/(.{$length})/$1\n/g; + + return $sequence; +} + +## must always put "1;" at the end of a package +1; diff --git a/lecture_scripts/problem_solutions/set_8/SequenceUtilities.pm b/lecture_scripts/problem_solutions/set_8/SequenceUtilities.pm new file mode 100755 index 0000000..df69186 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_8/SequenceUtilities.pm @@ -0,0 +1,43 @@ +package SequenceUtilities; +## SequenceUtilities.pm +## This module contains a number of useful subroutines for sequence +## manipulation -- at this point it only contains reverse_complement + +use strict; +use warnings; + +## make Exporter available so that we can export subroutines +use base "Exporter"; + +## add to the @EXPORT array the name of the subroutine +## you'll often see people use qw() which just creates a list +## where each element is treated as a string -- "quote word == qw" +our @EXPORT = qw(reverse_complement); + +## this will take a sequence string as an argument and +## return the reverse complement +sub reverse_complement { + + ## get the sequence argument for the subroutine + ## just like we can call "shift" to get an argument for the program + ## we can call shift to get an argument to the subroutine + my $sequence = shift; + + ## reverse the sequence + my $reverse_sequence = reverse $sequence; + + ## create a copy of the reversed string which will become the complement + my $reverse_complement_sequence = $reverse_sequence; + + ## complement the reverse string + ## we can use tr/// to transliterate characters + $reverse_complement_sequence =~ tr/ACGT/TGCA/; + + ## return the reverse complement + ## code that calls this subroutine will be able to use this returned value + return $reverse_complement_sequence; + +} + +## we need to end our module with "1;" +1; diff --git a/lecture_scripts/problem_solutions/set_8/fasta_utilities_test.pl b/lecture_scripts/problem_solutions/set_8/fasta_utilities_test.pl new file mode 100755 index 0000000..dc31857 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_8/fasta_utilities_test.pl @@ -0,0 +1,75 @@ +#!/usr/bin/perl +## fasta_utilities_test.pl + +## fasta sequence +my $fasta = ">gi|170016048|ref|NM_001122723.1| Xenopus (Silurana) tropicalis microcephalin 1 (mcph1), mRNA +CGGGTTCTGAAAGTAATGTGTGTGGAGTATGGAGTCTCCTGTAACAGCGTTGTCTTCTCTGTATGGACAT +GTTCTTTCAGATGTTACTGCATATGTTGAGGTGTGGTCATCCAACAGAAGAGAAAATTATTCTAAAACAT +TTTCACAACAGCTGGTTAATTTAGGTGCAAAGGTTTCGAAAACCTTTAATAAACACATTACTCATGTTGT +TTTTAAAGATGGCCATCAAGGTACCTGGGACAAGGCTGTCAAGGCTGGTGTGAAACTGGTGTCTGTACTT +TGGGTGGAAAGATGCAGAGAGGCTGCGGCACATGTAGATGAATCTGAATTTCCAGCAATCAATACAAATG +ATGGATTGCCACAGATTGTTAAAAAGAAACACAAGTGTATGCAACCCAAGGATTTTGTTGAGAAAACACC +AGAAAATGATAGACGAATGCAAAGAAAATTTGATAAAATGTCTAAAGAACTGAAAGTGCAAAAAGCTGGT +GTAGATGATCCTGTTTTATTATTTGATCAATATGGAACATTAATGTACAGCCCAAAGGCTGTGGCTGCTG +ATTGTTGCAATGCAATGGAAAAAAGAATTAAAGAAATGAAAAACAAGAGAGAGAATCTTTCTCCTACAGC +CTCCCAAATTTCACAGACATTTGATTTTCCCCAACTTAAGCCATCACTTGGATATAGTCCTTCTGCCTTG +GGGAGCTCTCCACTTGACAATGGCTGTGACGATCTTGACACTAGTTATGATGAGCTGTGGGGCAGTCTCG +AAAAGAAACGGAACATTTCTAACTACTCAAAGAACAAAGTAACAAGTAAAAAATCAGTGATACGTGCAAC +AGAACTAGAAGATCCAAATTGCCTTTTAACATCTGGCAAGTCATCTAATAATTTGACTCCTCAGCAGATT +AAAGACAAATTACGGAGGAATAGTCATGCAGTCACGTCTTTTCAAAAGAAAAGCAGAAAGTCACTTGCAA +GTAAAACAGTTGAGGAGCCAGTTCAATCCTTTATGAATTGTGGTACACAGAGTGCATTGGATTCCAATGT +GGATTATGAAGCCCTGCCAAACAAACTTACTGTGTGCGAAGAAAATGAAAGTACTAAGAAAGTAAGATCA +AATTGCCGGAAATCCAAAACTCCGACATCCAATCTGCACCTCTCAACTGAAAACATTGTGATGCCTGAGA +TACCAAAACTTAGTGAATTAAGAAATGGCTCTGATTCAGAAGGAGCATGTTCTAGTTTTGAAGACTTTTT +TTCAGCAGATATAAAAAGCCAGAAAAGACCATTTGCTAGATTTTCCTTAGGAACTCTGCCTCCAGAGTCA +CCAACCTCACCTTTATTTATCAAAAACAAAAAGGGTTCATCTCGAAAAAGGAGGAGGTCAGTTCAAGATT +TGGAAGAGTGTAATTCTAGTGGTAAAAGAAGAAGAAAGTCAATATGTAGTAAGGATAATCTGGTAAATTC +AGAGTCCAAACATGATGCCAAGACAATGTCACCCGTGGCCGACTGTGTTGAAAGGAATCCTATAAAAACT +AAAATAAATAAAACTGCTAAGTCTGATCAAGTAAACAATAAAGGAACTACATCAAGCAGTTTACTATCTT +CTGAAACAAATTTAAATACAAAAGATACATTAGCAACATCTTCTGAATTAAATCAAAAGAAAGATATAGG +TCTGGTAAAAGGTATGGAAAAAATAACTGAATTGACAACATGCAAAAAATCACCAAATCAGCCTGTGATG +AAAAATGGTGCAGAGTCTGAAAAGCAAGAAGATGAGCCAAAAAGCTTTCAGAAATGTATGTGAACAATTA +TCCATTATTAGCTGATCCATTATTAACTGATTTTTCTTTGTTGGTAATACAACTTTGTCTTATAGCTAAT +AATAGTACTGTCCAATTTGGTTAGGTTTCTCATATGTCTGACAAAAAAAGGCAATTCACAGATTTTTTTC +TTTTGCTGTGACCGACACGTTTACTGCATAACATTGTTTTGAATATTTTAAATTATGCAACCTTAGTTTT +TTGTTTTTTTTTATTTAAAAATAAAATTGTTAGAATGGCAGGTGTCAATAAATGTGCTATTAGAAGGATG +CTTTCAGATATACAAGTAATCTTGCATGATCTTGTGTCGAGACAAGCTAAAATATTGCAGTCTGAAATCT +ATGATTCTTTCACTTAATTTTATATGCTGGTGGTTTTAACCTTAGCTTTCTGACATATTTTTTTTTTTTA +TGGAAATTTTCATGTGTGGCCCAAGAATTTCATATTTTTTTGGGAATCTGTAAAAAAAAAATGGAGGGAT +TCTAATGTACAATGCTTCATGCATAGTGGCGCTGTATTATTAGAATATACATACATTGCAAGATAGACTG +ATCGGTGTTGTTTTATCTTGCCTTCTCTGGTCATGCTTTAGAATCAGGTAAAGATTTTGATTTTTTGCAT +TGTCAAATAAAGCTGCTTGATAAGAAAAAAAAAAAAAAAAAAAAAAAAAA"; + +## make FastaUtilities available to our program +use FastaUtilities; + +## get the id +my $id = get_id($fasta); + +## print id +print "Id:\n"; +print "$id\n"; +print "\n"; + +## get the description +my $description = get_description($fasta); + +## print description +print "Description:\n"; +print "$description\n"; +print "\n"; + +## get the sequence +my $sequence = get_sequence($fasta); + +## print sequence +print "Sequence:\n"; +print "$sequence\n"; +print "\n"; + +## reformat the sequence with 60 characters per line +my $new_fasta = reformat_fasta($fasta, 60); + +## print new fasta +print "New fasta:\n"; +print "$new_fasta\n"; diff --git a/lecture_scripts/problem_solutions/set_8/reverse_complement.pl b/lecture_scripts/problem_solutions/set_8/reverse_complement.pl new file mode 100755 index 0000000..ba205a2 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_8/reverse_complement.pl @@ -0,0 +1,18 @@ +#!/usr/bin/perl +## reverse_complement.pl + +use strict; +use warnings; + +## make SequenceUtilities module available to our program +use SequenceUtilities; + +## get the sequence from the arguments to the program +my $input_sequence = shift; + +## call the reverse_complement subroutine from the SequenceUtilities module +## and assign the returned value from the subroutine to a new variable +my $reverse_complement_input_sequence = reverse_complement($input_sequence); + +## print the reverse complement +print "$reverse_complement_input_sequence\n"; diff --git a/lecture_scripts/problem_solutions/set_8/round.pl b/lecture_scripts/problem_solutions/set_8/round.pl new file mode 100755 index 0000000..ea82ce3 --- /dev/null +++ b/lecture_scripts/problem_solutions/set_8/round.pl @@ -0,0 +1,17 @@ +#!/usr/bin/perl +## round.pl + +use strict; +use warnings; + +## we need to make the Math::Round module available to our script +use Math::Round; + +## get a number argument from the command line +my $number = shift; + +## call the round() subrotine to round the number +my $rounded_number = round($number); + +## print the rounded number +print "$rounded_number\n";