This repository has been archived by the owner on May 3, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #33 from mhalagan-nmdp/hackathon-updates
Hackathon updates
- Loading branch information
Showing
43 changed files
with
791 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
#!/usr/bin/env perl | ||
=head1 NAME | ||
breakup-fasta | ||
=head1 SYNOPSIS | ||
=head1 AUTHOR Mike Halagan <mhalagan@nmdp.org> | ||
Bioinformatics Scientist | ||
3001 Broadway Stree NE | ||
Minneapolis, MN 55413 | ||
ext. 8225 | ||
=head1 DESCRIPTION | ||
=head1 CAVEATS | ||
=head1 LICENSE | ||
Copyright (c) 2016 National Marrow Donor Program (NMDP) | ||
This library is free software; you can redistribute it and/or modify it | ||
under the terms of the GNU Lesser General Public License as published | ||
by the Free Software Foundation; either version 3 of the License, or (at | ||
your option) any later version. | ||
This library is distributed in the hope that it will be useful, but WITHOUT | ||
ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or | ||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | ||
License for more details. | ||
You should have received a copy of the GNU Lesser General Public License | ||
along with this library; if not, write to the Free Software Foundation, | ||
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. | ||
> http://www.gnu.org/licenses/lgpl.html | ||
=head1 VERSIONS | ||
Version Description Date | ||
=head1 TODO | ||
=head1 SUBROUTINES | ||
=cut | ||
use strict; # always | ||
use warnings; # or else | ||
use FindBin; | ||
use Data::Dumper; | ||
use vars qw($USAGE); | ||
BEGIN{ | ||
|
||
$| = 1; | ||
|
||
$USAGE = | ||
qq{./breakup-fasta [--fasta] [--uri] [--verbose] [--help] | ||
-f/--fasta Fasta file | ||
-u/--uri URI of feature service | ||
-l/--locus HLA-Locus | ||
-v/--verbose Flag for running in verbose | ||
-h/--help | ||
}; | ||
|
||
} | ||
use Cwd; | ||
use Math::Round; | ||
my $n_id = getID(); | ||
|
||
|
||
my %h_seqs; | ||
my $s_header; | ||
my $s_seq; | ||
while (<>) { | ||
chomp; | ||
if ($_ =~ />/) { | ||
if ($s_seq) { | ||
$h_seqs{$s_header} = $s_seq; | ||
my $s_out_file = $n_id.".txt"; | ||
open(my $fh,">",$s_out_file) or die "CANT OPEN FILE $! $0"; | ||
print $fh ">".$s_header."\n"; | ||
print $fh $s_seq."\n"; | ||
close $fh; | ||
$n_id = getID(); | ||
} | ||
$s_header = $_; | ||
$s_header =~ s/^>//; # remove ">" | ||
$s_header =~ s/\s+$//; # remove trailing whitespace | ||
if($s_header =~ m/ (\D{1,3}\d{0,1}\*\S+) /){ | ||
$s_header = "HLA-".$1; | ||
} | ||
$s_seq = ""; # clear out old sequence | ||
}else { | ||
s/\s+//g; # remove whitespace | ||
$s_seq .= $_; # add sequence | ||
} | ||
} | ||
|
||
if ($s_seq) { # handle last sequence | ||
my $s_out_file = $n_id.".txt"; | ||
open(my $fh,">",$s_out_file) or die "CANT OPEN FILE $! $0"; | ||
print $fh ">".$s_header."\n"; | ||
print $fh $s_seq."\n"; | ||
close $fh; | ||
} | ||
|
||
|
||
sub getID{ | ||
my $id = int(rand(10000)); | ||
my $s_out_file = $id.".txt"; | ||
if(!-e $s_out_file){ | ||
return $id; | ||
}else{ | ||
return getID(); | ||
} | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
params.fasta = "" | ||
params.output = "" | ||
params.name = "" | ||
outputDir = file("${params.output}") | ||
|
||
// Extracting consensus sequences | ||
process extractConsensus{ | ||
|
||
tag{ expected } | ||
|
||
input: | ||
set file(expected) from file("${params.fasta}") | ||
|
||
output: | ||
set file('*.txt') into fastaFiles mode flatten | ||
|
||
""" | ||
breakup-fasta < ${expected} | ||
""" | ||
} | ||
|
||
|
||
//Get GFE For each sequence | ||
process getGFE{ | ||
errorStrategy 'ignore' | ||
|
||
tag{ fastafile } | ||
|
||
input: | ||
set file(fastafile) from fastaFiles | ||
|
||
output: | ||
set file {"*.txt"} into gfeResults mode flatten | ||
|
||
""" | ||
cat ${fastafile} | fasta2structure | ||
""" | ||
} | ||
|
||
gfeResults | ||
.collectFile() { gfe -> | ||
[ "${params.name}.txt", gfe.text ] | ||
} | ||
.subscribe { file -> copyToFailedDir(file) } | ||
|
||
|
||
//Copy file to output directory | ||
def copyToFailedDir (file) { | ||
log.info "Copying ${file.name} into: $outputDir" | ||
file.copyTo(outputDir) | ||
def copiedFile = new File( "${params.output}/${file.name}" ) | ||
log.info copiedFile.name | ||
if( !copiedFile.exists() ) { | ||
log.info "Failed to copy file copiedFile.name ${file.name} into: $outputDir" | ||
}else{ | ||
log.info "Copied $copiedFile ${file.name} into: $outputDir" | ||
} | ||
} | ||
|
||
//Get subject id from fasta file | ||
def subjectId(Path path) { | ||
def name = path.getFileName().toString() | ||
def subject = name.split('_') | ||
return subject[0] | ||
} | ||
|
||
//Get subject id from fasta file | ||
def blastSubjectId(Path path) { | ||
def fileName = path.getFileName().toString() | ||
def subject = fileName.split('.txt') | ||
return subject[0] | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
Oops, something went wrong.