Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2caebe2
commit 9fb7b1d
Showing
3 changed files
with
125 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/usr/bin/perl | ||
|
||
use strict; | ||
use File::Basename; | ||
use BioUtil::Seq; | ||
|
||
# M13 | ||
my $prefix = "AGCGGCCGCGAATTGCCCTT"; | ||
my $suffix = "AAGGGCAATTCGTTTAAACCT"; | ||
|
||
$0 = basename $0; | ||
my $usage = <<USAGE; | ||
usage: $0 <forward seq> <reverse seq> | ||
USAGE | ||
|
||
die $usage unless @ARGV == 2; | ||
|
||
my $seqf = get_the_one_seq( shift @ARGV ); | ||
my $seqr = revcom (get_the_one_seq( shift @ARGV ) ); | ||
|
||
my $sf = extract_insert( $prefix, $suffix, $seqf ); | ||
my $sr = extract_insert( $prefix, $suffix, $seqr ); | ||
|
||
if ( $sf ne $sr ) { | ||
print "forward: $sf\nreverse: $sr\n"; | ||
die "forward and reverse sequences are not equal!"; | ||
} | ||
|
||
print $sf, "\n"; | ||
|
||
|
||
|
||
sub extract_insert { | ||
my ( $prefix, $suffix, $seq ) = @_; | ||
die "prefix and suffix do not match sequence!\n" | ||
unless $seq =~ /$prefix(.+)$suffix/; | ||
return $1; | ||
} | ||
|
||
sub get_the_one_seq { | ||
my ($file) = @_; | ||
my $seqs = read_sequence_from_fasta_file($file); | ||
die "only one sequence should be in $file. Please check it.\n" | ||
unless keys %$seqs == 1; | ||
return ( values %$seqs )[0]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import csv | ||
import sys | ||
import logging | ||
|
||
args = None | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description="Merge csvfile1 to csvfile2", | ||
epilog="https://github.com/shenwei356/bio_scripts") | ||
|
||
parser.add_argument('csvfile1', type=str, help='cvsfile1') | ||
parser.add_argument("key1", type=int, help='Column number of key in csvfile1') | ||
parser.add_argument('csvfile2', type=str, help='cvsfile2') | ||
parser.add_argument("key2", type=int, help='Column number of key in csvfile2') | ||
parser.add_argument("-H1", help="not containing head in cvsfile1", action="store_true") | ||
parser.add_argument("-H2", help="not containing head in cvsfile2", action="store_true") | ||
parser.add_argument("-F", '--fs', type=str, default="\t", | ||
help='Field separator [\\t]') | ||
parser.add_argument("-Q", '--qc', type=str, default='"', | ||
help='Quote char["]') | ||
|
||
args = parser.parse_args() | ||
|
||
return args | ||
|
||
|
||
def read_csv_file(file, key): | ||
data, indexdict = [], {} | ||
index = 0 | ||
|
||
with open(file, newline='') as fh: | ||
reader = csv.reader(fh, delimiter=args.fs, quotechar=args.qc) | ||
for row in reader: | ||
ncolumn = len(row) | ||
if ncolumn == 0: | ||
continue | ||
if ncolumn < key: | ||
logging.error( | ||
"key ({}) is beyond number of column ({})".format(key, ncolumn)) | ||
print(row) | ||
sys.exit(1) | ||
elif key < 1: | ||
args.pk = 1 | ||
|
||
data.append(row) | ||
indexdict[row[key - 1].strip()] = index | ||
index += 1 | ||
|
||
return data, indexdict | ||
|
||
|
||
if __name__ == '__main__': | ||
logging.basicConfig(level=logging.DEBUG, format="[%(levelname)s] %(message)s") | ||
|
||
args = parse_args() | ||
|
||
data1, map1 = read_csv_file(args.csvfile1, args.key1) | ||
data2, map2 = read_csv_file(args.csvfile2, args.key2) | ||
|
||
if not args.H2: | ||
print('\t'.join(data2[0]), end='') | ||
data2 = data2[1:] | ||
if not args.H1: | ||
print('\t', end='') | ||
print('\t'.join(data1[0]), end='') | ||
print() | ||
|
||
for row in data2: | ||
print('\t'.join(row), '\t'.join(data1[map1[row[args.key2 - 1]]]), sep='\t') |