Skip to content

Commit

Permalink
add util/csv_merge
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Apr 1, 2015
1 parent 2caebe2 commit 9fb7b1d
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 4 deletions.
48 changes: 48 additions & 0 deletions biology/bio_plasmid_get_insert.pl
@@ -0,0 +1,48 @@
#!/usr/bin/perl

use strict;
use File::Basename;
use BioUtil::Seq;

# M13
my $prefix = "AGCGGCCGCGAATTGCCCTT";
my $suffix = "AAGGGCAATTCGTTTAAACCT";

$0 = basename $0;
my $usage = <<USAGE;
usage: $0 <forward seq> <reverse seq>
USAGE

die $usage unless @ARGV == 2;

my $seqf = get_the_one_seq( shift @ARGV );
my $seqr = revcom (get_the_one_seq( shift @ARGV ) );

my $sf = extract_insert( $prefix, $suffix, $seqf );
my $sr = extract_insert( $prefix, $suffix, $seqr );

if ( $sf ne $sr ) {
print "forward: $sf\nreverse: $sr\n";
die "forward and reverse sequences are not equal!";
}

print $sf, "\n";



sub extract_insert {
my ( $prefix, $suffix, $seq ) = @_;
die "prefix and suffix do not match sequence!\n"
unless $seq =~ /$prefix(.+)$suffix/;
return $1;
}

sub get_the_one_seq {
my ($file) = @_;
my $seqs = read_sequence_from_fasta_file($file);
die "only one sequence should be in $file. Please check it.\n"
unless keys %$seqs == 1;
return ( values %$seqs )[0];
}
8 changes: 4 additions & 4 deletions util/csv_grep
Expand Up @@ -75,12 +75,12 @@ if args.patternfile:
with open(args.patternfile, newline='') as patternfile:
reader = csv.reader(patternfile, delimiter=args.fs, quotechar=args.qc)
for row in reader:
nrow = len(row)
if nrow == 0:
ncolumn = len(row)
if ncolumn == 0:
continue
if nrow < args.pk:
if ncolumn < args.pk:
logging.error(
"-pk ({}) is beyond number of column ({})".format(args.pk, nrow))
"-pk ({}) is beyond number of column ({})".format(args.pk, ncolumn))
sys.exit(1)
elif args.pk < 1:
args.pk = 1
Expand Down
73 changes: 73 additions & 0 deletions util/csv_merge
@@ -0,0 +1,73 @@
#!/usr/bin/env python3

import argparse
import csv
import sys
import logging

args = None


def parse_args():
parser = argparse.ArgumentParser(description="Merge csvfile1 to csvfile2",
epilog="https://github.com/shenwei356/bio_scripts")

parser.add_argument('csvfile1', type=str, help='cvsfile1')
parser.add_argument("key1", type=int, help='Column number of key in csvfile1')
parser.add_argument('csvfile2', type=str, help='cvsfile2')
parser.add_argument("key2", type=int, help='Column number of key in csvfile2')
parser.add_argument("-H1", help="not containing head in cvsfile1", action="store_true")
parser.add_argument("-H2", help="not containing head in cvsfile2", action="store_true")
parser.add_argument("-F", '--fs', type=str, default="\t",
help='Field separator [\\t]')
parser.add_argument("-Q", '--qc', type=str, default='"',
help='Quote char["]')

args = parser.parse_args()

return args


def read_csv_file(file, key):
data, indexdict = [], {}
index = 0

with open(file, newline='') as fh:
reader = csv.reader(fh, delimiter=args.fs, quotechar=args.qc)
for row in reader:
ncolumn = len(row)
if ncolumn == 0:
continue
if ncolumn < key:
logging.error(
"key ({}) is beyond number of column ({})".format(key, ncolumn))
print(row)
sys.exit(1)
elif key < 1:
args.pk = 1

data.append(row)
indexdict[row[key - 1].strip()] = index
index += 1

return data, indexdict


if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG, format="[%(levelname)s] %(message)s")

args = parse_args()

data1, map1 = read_csv_file(args.csvfile1, args.key1)
data2, map2 = read_csv_file(args.csvfile2, args.key2)

if not args.H2:
print('\t'.join(data2[0]), end='')
data2 = data2[1:]
if not args.H1:
print('\t', end='')
print('\t'.join(data1[0]), end='')
print()

for row in data2:
print('\t'.join(row), '\t'.join(data1[map1[row[args.key2 - 1]]]), sep='\t')

0 comments on commit 9fb7b1d

Please sign in to comment.