Skip to content

Commit

Permalink
add util/plot_distribution.py, and some changes
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Dec 3, 2014
1 parent 4172c3e commit e5b0fec
Show file tree
Hide file tree
Showing 17 changed files with 264 additions and 93 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -7,7 +7,7 @@ See more utilitis in BioUtil module from [CPAN](http://search.cpan.org/search?qu

-------

Copyright (c) 2013, Wei Shen (shenwei356@gmail.com)
Copyright (c) 2014, Wei Shen (shenwei356@gmail.com)


[MIT License](https://github.com/shenwei356/bio_scripts/blob/master/LICENSE)
4 changes: 0 additions & 4 deletions blast/.directory

This file was deleted.

4 changes: 0 additions & 4 deletions enzyme/.directory

This file was deleted.

4 changes: 0 additions & 4 deletions for_education/.directory

This file was deleted.

File renamed without changes.
File renamed without changes.
4 changes: 0 additions & 4 deletions protein/.directory

This file was deleted.

10 changes: 1 addition & 9 deletions protein/protein_batch_compute_pI.pl
Expand Up @@ -7,15 +7,7 @@
# Update : 2014-07-29

use strict;

# try to use BioUtil::Seq
if ( eval { require BioUtil::Seq; 1; } ne 1 ) {
die "\nPlease install BioUtil::Seq by CPAN:\n"
. " cpan install BioUtil\n\n";
}
else {
BioUtil::Seq->import();
}
use BioUtil::Seq;

my $usage = <<"USAGE";
Expand Down
4 changes: 0 additions & 4 deletions sequence/.directory

This file was deleted.

37 changes: 22 additions & 15 deletions sequence/fasta2tab
Expand Up @@ -4,6 +4,7 @@
use strict;
use Getopt::Long;
use BioUtil::Seq;
use BioUtil::Util;

my $usage = q(
fasta2tab - transfrom the fasta fromat to two-column table
Expand All @@ -19,8 +20,10 @@ Options:
-lc, --lowercase Lowercase
-uc, --uppercase Uppercase
-l, --length Ouput sequence length in another column
-gc, --gc Ouput GC content in another column
-l, --length Ouput sequence length at another column
-l2, --length2 Ouput number of latin-letter in sequence
at another column
-gc, --gc Ouput GC content at another column
-h, --help Show this help information
Expand All @@ -33,7 +36,10 @@ Examples:
2. extract sequence longer than 1000 bp
cat seq.fa | fasta2tab -t -l | awk -F'\t' '$3 >= 1000' | tab2fasta -l 70
3. reverse complement sequence, uppercase, and trim gaps
3. extract aligned sequence of which the original sequence is longer than 1000 bp
cat seq.fa | fasta2tab -l2 | awk -F'\t' '$3 >= 1000' | tab2fasta -l 70
4. reverse complement sequence, uppercase, and trim gaps
zcat seq.fa.gz | fasta2tab -uc -rc -t | tab2fasta
This script is usually used in pair with tab2fasta.
Expand All @@ -53,23 +59,15 @@ GetOptions(
'lowercase|lc' => \$$para{lc},
'uppercase|uc' => \$$para{uc},

'length|l' => \$$para{len},
'gc' => \$$para{gc},
'length|l' => \$$para{len},
'length2|l2' => \$$para{len2},
'gc' => \$$para{gc},

) or die $usage;

die $usage if $$para{help};

# get the file list
my @files = ();
for my $file (@ARGV) {
for my $f ( glob $file ) {
push @files, $f;
}
}
if ( @files == 0 ) {
push @files, 'STDIN';
}
my @files = file_list_from_argv(@ARGV);

for my $file (@files) {
my $next_seq = FastaReader($file);
Expand Down Expand Up @@ -101,6 +99,15 @@ for my $file (@files) {

print "$header\t$seq";
print "\t", length $seq if $$para{len};
if ($$para{len2}){
if ($$para{trim}){
print "\t", length $seq;
} else {
my $seq2 = $seq;
$seq2 =~ s/[^a-zA-Z]+//g;
print "\t", length $seq2;
}
}
print "\t", base_content( 'gc', $seq ) if $$para{gc};
print "\n";
}
Expand Down
10 changes: 1 addition & 9 deletions sequence/fasta_extract_by_pattern.pl
Expand Up @@ -59,15 +59,7 @@
die "no patterns given. Type \"$0 -h\" for help.\n" if @patterns == 0;

# get the file list
my @files = ();
for my $file (@ARGV) {
for my $f ( glob $file ) {
push @files, $f;
}
}
if ( @files == 0 ) {
push @files, 'STDIN';
}
my @files = file_list_from_argv(@ARGV);

# patterns_map for rapid matching with full pattern
my %patterns_map = ();
Expand Down
12 changes: 2 additions & 10 deletions sequence/fasta_extract_randomly.pl
Expand Up @@ -5,6 +5,7 @@

use File::Basename;
use BioUtil::Seq;
use BioUtil::Util;

$0 = basename($0);
my $usage = <<USAGE;
Expand All @@ -31,16 +32,7 @@

srand();

my @files = ();

for my $file (@ARGV) {
for my $f ( glob $file ) {
push @files, $f;
}
}
if ( @files == 0 ) {
push @files, 'STDIN';
}
my @files = file_list_from_argv(@ARGV);

my $n = 0;
for my $file (@files) {
Expand Down
14 changes: 0 additions & 14 deletions sequence/fasta_format.pl

This file was deleted.

13 changes: 2 additions & 11 deletions sequence/tab2fasta
Expand Up @@ -3,7 +3,7 @@

use strict;
use Getopt::Long;
use BioUtil::Seq;
use BioUtil::Util;

my $usage = q(
tab2fasta - transfrom column table to fasta fromat
Expand All @@ -28,16 +28,7 @@ GetOptions(

die $usage if $$para{help};

# get the file list
my @files = ();
for my $file (@ARGV) {
for my $f ( glob $file ) {
push @files, $f;
}
}
if ( @files == 0 ) {
push @files, 'STDIN';
}
my @files = file_list_from_argv(@ARGV);

for my $file (@files) {
my $fh = undef;
Expand Down

0 comments on commit e5b0fec

Please sign in to comment.