diff --git a/README.md b/README.md
index 211f6c6..35f3f4c 100644
--- a/README.md
+++ b/README.md
@@ -452,3 +452,4 @@ For any of the listed reasons, or anything else, please leave us a <a href="http
 
 <a href="https://github.com/srobb1/RelocaTE/issues?page=1&sort=comments&state=open">Leave a message here</a>
 
+
diff --git a/sample_relocaTE_run/sample_readme.txt b/sample_relocaTE_run/sample_readme.txt
index c2ac506..cba62e4 100644
--- a/sample_relocaTE_run/sample_readme.txt
+++ b/sample_relocaTE_run/sample_readme.txt
@@ -9,6 +9,7 @@ These scripts expect that the following programs are installed and included in y
 	3. BWA
 	4. BioPerl
    	5. Blat
+	6. Blast: formatdb and fastacmd
 
 STEPS A-C
 A. RelocaTE
@@ -32,32 +33,32 @@ A. To find TE insertions in the included fastq reads run RelocaTE:
         or
         sh run_relocaTE.sh
 
-   These scritps will run relocaTE directly or through a series of shell scripts. 
-        If you are running
+  These scripts will run relocaTE directly or through a series of shell scripts. 
+	If you are running
         - run_relocaTE_qsub.sh:
-                1) follow the instructions that are printed to the screen (run run_these_jobs.sh)
-                2) once complete, view the results in 02052012_sample/mping/results
+		1) follow the instructions that are printed to the screen (run run_these_jobs.sh)
+      		2) once complete, view the results in 02052012_sample/mping/results
         or
         - run_relocaTE_shell.sh
-                1) follow the instructions that are printed to the screen (run run_these_jobs.sh)
-                2) once complete, view the results in 02052012_sample/mping/results
+		1) follow the instructions that are printed to the screen (run run_these_jobs.sh)
+      		2) once complete, view the results in 02052012_sample/mping/results
         or
         - run_relocaTE.sh
-                1) once complete, view the results in 02052012_sample/mping/results
+      		1) once complete, view the results in 02052012_sample/mping/results
 
 
-B. Find Spanners to help classify the insertions (homozygous, heterozygous, etc) by generating a BAM file of the reads not trimmed of TE to the referenc
-e.
+B. Find Spanners to help classify the insertions (homozygous, heterozygous, etc) by generating a BAM file of the reads not trimmed of TE to the reference.
   A BAM file is included in the sample data set, but one can be genreted by running the included script:
-        create_bam.sh by:
-        - changing directory into the directory of this script
-        - and typing "sh create_bam.sh"
+	create_bam.sh by:
+	- changing directory into the directory of this script
+ 	- and typing "sh create_bam.sh"
 
 
 C. To classify the insertions (homozygous, heterozygous, etc) run_characTErizer.sh 
-        - change directory into the directory of this script
-        - type "sh run_characTErizer.sh"
+	- change directory into the directory of this script
+	- type "sh run_characTErizer.sh"
         - once complete, view the resulting files in the directory you ran characTErizer.pl
-                1) sample.inserts_characTErized.gff: GFF file of the classified insertions including excisions 
-                2) sample.inserts_characTErized.txt: Text file of the classified insertions including excisions
-                3) excisions_with_footprint.vcfinfo: additional information on the insertions that have been classified as exicision events 
+		1) sample.inserts_characTErized.gff: GFF file of the classified insertions including excisions 
+		2) sample.inserts_characTErized.txt: Text file of the classified insertions including excisions
+		3) excisions_with_footprint.vcfinfo: additional information on the insertions that have been classified as exicision events  
+ 
diff --git a/scripts/characterizer.pl b/scripts/characterizer.pl
index 028cc7b..dd79d20 100755
--- a/scripts/characterizer.pl
+++ b/scripts/characterizer.pl
@@ -310,3 +310,4 @@ sub getHelp {
     }
   }
 }
+
diff --git a/scripts/characterizer.pl~ b/scripts/characterizer.pl~
deleted file mode 100755
index a16273c..0000000
--- a/scripts/characterizer.pl~
+++ /dev/null
@@ -1,312 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use Data::Dumper;
-use Cwd;
-use Getopt::Long;
-
-if ( !defined @ARGV ) {
-  &getHelp;
-}
-
-my $cwd = getcwd();
-my $sites_file;
-my $bam_dir;
-my $genome_fasta;
-## can be a single .bam file or a direcotory containing .bam files
-my @bam_files;    
-my $excision = 0;
-
-GetOptions(
-  's|sites_file:s'   => \$sites_file,
-  'b|bam_dir:s'      => \$bam_dir,
-  'g|genome_fasta:s' => \$genome_fasta,
-  'x|excision:i'     => \$excision,
-);
-
-sub getHelp {
-  print ' 
-usage:
-./characterizer.pl [-s relocaTE table output file][-b bam file or dir of bam files][-g reference genome fasta file][-h] 
-
-options:
--s file		relocaTE output file: YOURSAMPLENAME.mping.all_nonref.txt [no default]
--b dir/file	bam file of the orginal reads aligned to reference (before TE was trimmed) or directory of bam files [no default]
--g file		reference genome fasta file [no default]
--x int		find excision events that leave a footprint, yes or no(1|0) [0]
--h 		this help message
-
-For more information see documentation: http://srobb1.github.com/RelocaTE/
-
-';
-  exit 1;
-}
-
-if ( -d $bam_dir ) {
-
-  #remove trailing '/'
-  $bam_dir =~ s/\/$//;
-  @bam_files = <$bam_dir/*bam>;
-}
-elsif ( -f $bam_dir or -l $bam_dir ) {
-  push @bam_files, $bam_dir;
-}
-open INSITES, "$sites_file" or die "cannot open $sites_file $!\n";
-my @dir_path = split '/', $sites_file;
-my $filename = pop @dir_path;
-my $file     = $filename;
-$file =~ s/\..+?$//;
-$cwd  =~ s/\/$//;      #remove trailing /
-open OUTGFF, ">$cwd/$file.inserts_characTErized.gff";
-open OUT,    ">$cwd/$file.inserts_characTErized.txt";
-print OUT
-  "strain\tTE\tTSD\tchromosome.pos\tstrand\tavg_flankers\tspanners\tstatus\n";
-print OUTGFF "##gff-version 3\n";
-my %matches;
-my %TSDs;
-my %toPrint;
-
-while ( my $line = <INSITES> ) {
-  next if $line =~ /TE.TSD.Exper.chromosome.insertion_site/;
-  next if $line =~ /^\s*$/;
-  chomp $line;
-
-  # mping TTA A119 Chr1 1446..1448 + T:1 R:0 L:1
-  my (
-    $te,        $TSD,          $exp,          $chromosome, $coor,
-    $TE_orient, $total_string, $right_string, $left_string
-  ) = split /\t/, $line;
-  my ($pos) = $coor =~ /\d+\.\.(\d+)/;
-  $TSDs{$chromosome}{$pos} = $TSD;
-  my ($total_count) = $total_string =~ /T:(\d+)/;
-  my ($left_count)  = $left_string  =~ /L:(\d+)/;
-  my ($right_count) = $right_string =~ /R:(\d+)/;
-
-  my $Mmatch = 0;
-  my $cigar_all;
-  if ( $left_count >= 1 and $right_count >= 1 and $total_count >= 2 ) {
-    my @sam_all;
-    foreach my $bam_file (@bam_files) {
-      ## get any alignments that overlap the insertion site
-      my @sam_out = `samtools view $bam_file \'$chromosome:$pos-$pos\'`;
-      push @sam_all, @sam_out;
-    }
-
-    #remove redundant lines in sam file
-    my %sorted_sam;
-    my $order;
-    foreach my $line (@sam_all) {
-      $order++;
-      if ( !exists $sorted_sam{$line} ) {
-        $sorted_sam{$line} = $order;
-      }
-    }
-
-    #make new sorted sam array by sorting on the value of the sort hash
-    my @sorted_sam =
-      sort { $sorted_sam{$a} <=> $sorted_sam{$b} } keys %sorted_sam;
-
-    foreach my $sam_line (@sorted_sam) {
-      chomp $sam_line;
-      my @sam_line = split /\t/, $sam_line;
-      my $cigar    = $sam_line[5];
-      my $flag     = $sam_line[1];
-      my $seqLen   = length $sam_line[9];
-      my $start    = $sam_line[3];
-      my $end      = $start + $seqLen - 1;
-      next unless $end >= $pos + 5;
-      next unless $start <= $pos - 5;
-      ## must be a all M match no soft clipping
-      if ( $cigar =~ /^\d+M$/ ) {
-        my ($NM) = $sam_line =~ /NM:i:(\d+)/;    ## edit distance used
-        ## bwa specific: mismatch count, often the same as NM, have not seen a XM>0 and NM==0
-        my ($XM) = $sam_line =~ /XM:i:(\d+)/;
-        if ( defined $XM and $XM == 0 ) {
-          $Mmatch++;
-        }
-        elsif ( defined $NM and $NM == 0 ) {
-          $Mmatch++;
-        }
-        elsif ( !defined $NM and !defined $XM ) {
-          $Mmatch++;
-        }
-      }
-      elsif ( $cigar =~ /[IND]/ ) {
-        $matches{"$chromosome.$pos"}{sam}{$sam_line} = 1;
-      }
-    }
-    my $spanners         = $Mmatch;
-    my $average_flankers = $total_count / 2;
-    my $status           = 0;
-
-    if ( $spanners == 0 ) {
-      $status = 'homozygous';
-    }
-    elsif ( $average_flankers >= 5 and $spanners < 5 ) {
-      $status = 'homozygous/excision_no_footprint';
-    }
-    elsif ( $spanners < ( $average_flankers * .2 ) and $spanners <= 10 ) {
-      $status = 'homozygous/excision_no_footprint';
-    }
-    elsif ( $average_flankers <= 2 and $spanners > 10 ) {
-      $status = 'new_insertion';
-    }
-    elsif ( abs( $average_flankers - $spanners ) <= 5 ) {
-      $status = 'heterozygous';
-    }
-    elsif (
-      abs( $average_flankers - $spanners ) -
-      ( ( $average_flankers + $spanners ) / 2 ) <= 10 )
-    {
-      $status = 'heterozygous?';
-    }
-    elsif ( $average_flankers > 10 and $spanners > 10 ) {
-      $status = 'heterozygous';
-    }
-    elsif (
-      (
-        ( $spanners - $average_flankers ) >
-        ( $spanners + $average_flankers ) / 2
-      )
-      and ( $average_flankers <= 10 )
-      )
-    {
-      $status = 'new_insertion';
-    }
-    else {
-      $status = 'other';
-    }
-
-    $matches{"$chromosome.$pos"}{status}         = $status;
-    $toPrint{$chromosome}{$pos}{$TSD}{TE}        = $te;
-    $toPrint{$chromosome}{$pos}{$TSD}{flank}     = $average_flankers;
-    $toPrint{$chromosome}{$pos}{$TSD}{span}      = $spanners;
-    $toPrint{$chromosome}{$pos}{$TSD}{status}    = $status;
-    $toPrint{$chromosome}{$pos}{$TSD}{strain}    = $exp;
-    $toPrint{$chromosome}{$pos}{$TSD}{coor}      = $coor;
-    $toPrint{$chromosome}{$pos}{$TSD}{TE_orient} = $TE_orient;
-  }
-}
-
-if ($excision) {
-
-##generate vcf of spanners looking for excision events
-  my @unlink_files;
-  my @vcfs;
-  foreach my $pos ( keys %matches ) {
-    my ( $target, $loc ) = split /\./, $pos;
-    next unless exists $toPrint{$target}{$loc};
-    my $range      = "$target:$pos";
-    my $sam        = "$cwd/$pos.sam";
-    my $bam        = "$cwd/$pos.bam";
-    my $sorted_bam = "$cwd/$pos.sorted";
-    my @sam_lines  = keys %{ $matches{$pos}{sam} };
-    if ( @sam_lines > 1 ) {
-      my $pos_sam = join "\n", @sam_lines;
-      open POSSAM, ">$sam";
-      print POSSAM $pos_sam;
-      `samtools view -bT $genome_fasta $sam > $bam`;
-      `samtools sort $bam $sorted_bam`;
-      `samtools index $sorted_bam.bam`;
-
-`samtools mpileup -C50 -ugf $genome_fasta -r $range  $sorted_bam.bam | bcftools view -bvcg - > $cwd/$pos.var.raw.bcf`;
-`bcftools view $cwd/$pos.var.raw.bcf | vcfutils.pl varFilter -D 100 > $cwd/$pos.var.flt.vcf`;
-      push @vcfs, "$cwd/$pos.var.flt.vcf";
-      push @unlink_files, $sam, $bam, "$sorted_bam.bam.bai", "$sorted_bam.bam",
-        "$cwd/$pos.var.raw.bcf", "$cwd/$pos.var.flt.vcf";
-      close POSSAM;
-    }
-  }
-##Chr1	16327633	.	GAGTACTACAATTAGTA	GAGTA	1930.0%	.	INDEL;DP=2;AF1=1;CI95=0.5,1;DP4=0,0,1,1;MQ=29;FQ=-40.5	GT:PL:GQ	1/1:58,6,0:10
-  open EXCISION, ">>$cwd/excisions_with_footprint.vcfinfo"
-    or die "can't open $cwd/excisions_with_footprint.vcfinfo for writing $!";
-  foreach my $vcf (@vcfs) {
-    ##Chr2.30902247.var.flt.vcf
-    my @path = split /\//, $vcf;
-    my $file = pop @path;
-    my ( $insert_ref, $insert_pos ) = $file =~ /(.+)\.(\d+)\.var\.flt\.vcf/;
-    my $TSD     = $TSDs{$insert_ref}{$insert_pos};
-    my $TSD_len = length $TSD;
-    open VCF, $vcf;
-    while ( my $line = <VCF> ) {
-      next unless $line !~ /^#/;
-      chomp $line;
-      my ( $ref, $first_base, $col_3, $ref_seq, $strain_seq ) = split /\t/,
-        $line;
-      my $aln_start      = $first_base - $insert_pos - 1;
-      my $aln_end_ref    = $first_base - length($ref_seq) - $insert_pos - 1;
-      my $aln_end_strain = $first_base - length($strain_seq) - $insert_pos - 1;
-      my $aln_end_ref_near_insert_pos    = 0;
-      my $aln_end_strain_near_insert_pos = 0;
-      my $aln_start_near_insert_pos      = 0;
-      my $insert_bwt_ends                = 0;
-      my $all_values_after_insertion     = 0;
-
-      if (  ( $aln_start <= $TSD_len + 1 )
-        and ( ( $aln_start * -1 <= $TSD_len + 1 ) ) )
-      {
-        $aln_start_near_insert_pos = 1;
-      }
-      if (  ( $aln_end_ref <= $TSD_len + 1 )
-        and ( ( $aln_end_ref * -1 <= $TSD_len + 1 ) ) )
-      {
-        $aln_end_ref_near_insert_pos = 1;
-      }
-      if (  ( $aln_end_strain <= $TSD_len + 1 )
-        and ( ( $aln_end_strain * -1 <= $TSD_len + 1 ) ) )
-      {
-        $aln_end_strain_near_insert_pos = 1;
-      }
-      my ( $end_ref, $end_strain ) = (
-        ( $first_base + length($ref_seq) - 1 ),
-        ( $first_base + length($strain_seq) + 1 )
-      );
-      if ( ( $end_ref < $insert_pos and $end_strain > $insert_pos )
-        or ( $end_strain < $insert_pos and $end_ref > $insert_pos ) )
-      {
-        $insert_bwt_ends = 1;
-      }
-      if (  ( ( $first_base - $TSD_len + 1 ) > $insert_pos )
-        and ( $end_ref > $insert_pos )
-        and ( $end_strain > $insert_pos ) )
-      {
-        $all_values_after_insertion = 1;
-      }
-      ## if the alignment end in ref or in strain is close to the insertion postion,
-      ## or if one is before and one is after the insertion postion,
-      ## it is a potential excision with footprint
-      if ( ( $aln_end_ref_near_insert_pos or $aln_end_strain_near_insert_pos )
-        or ($insert_bwt_ends) )
-      {
-        ##make sure all values are not after the insertion
-        if ( !$all_values_after_insertion ) {
-          print EXCISION "$insert_ref.$insert_pos\t$line\n";
-          my $status = $toPrint{$insert_ref}{$insert_pos}{$TSD}{status};
-          ##only append if it already isnt there
-          $toPrint{$insert_ref}{$insert_pos}{$TSD}{status} =
-            $status . "/excision_with_footprint"
-            if $status !~ /\/excision_with_footprint/;
-        }
-      }
-    }
-  }
-  unlink @unlink_files;
-}
-
-foreach my $chr ( sort keys %toPrint ) {
-  foreach my $pos ( sort { $a <=> $b } keys %{ $toPrint{$chr} } ) {
-    foreach my $tsd ( sort keys %{ $toPrint{$chr}{$pos} } ) {
-      my $TE        = $toPrint{$chr}{$pos}{$tsd}{TE};
-      my $flankers  = $toPrint{$chr}{$pos}{$tsd}{flank};
-      my $spanners  = $toPrint{$chr}{$pos}{$tsd}{span};
-      my $status    = $toPrint{$chr}{$pos}{$tsd}{status};
-      my $strain    = $toPrint{$chr}{$pos}{$tsd}{strain};
-      my $coor      = $toPrint{$chr}{$pos}{$tsd}{coor};
-      my $TE_orient = $toPrint{$chr}{$pos}{$tsd}{TE_orient};
-      my ($start) = $coor =~ /(\d+)\.\.\d+/;
-      print OUT
-"$strain\t$TE\t$tsd\t$chr:$coor\t$TE_orient\t$flankers\t$spanners\t$status\n";
-      print OUTGFF
-"$chr\t$strain\ttransposable_element_attribute\t$start\t$pos\t$TE_orient\t.\t.\tID=$chr.$pos.spanners;avg_flankers=$flankers;spanners=$spanners;type=$status;TE=$TE;TSD=$tsd\n";
-    }
-  }
-}
diff --git a/scripts/relocaTE.pl b/scripts/relocaTE.pl
index 283164a..04c479a 100755
--- a/scripts/relocaTE.pl
+++ b/scripts/relocaTE.pl
@@ -147,8 +147,8 @@
 }
 else {
   my $fq_path = File::Spec->rel2abs($fq_dir);
-  @fq_files = <$fq_path/*fq>;
-  my @fastq_files = <$fq_path/*fastq>;
+  @fq_files = <$fq_path/*.fq>;
+  my @fastq_files = <$fq_path/*.fastq>;
   push @fq_files, @fastq_files;
   if ( scalar @fq_files == 0 ) {
     print "Must provide at least 1 short read file\n";
diff --git a/scripts/relocaTE.pl~ b/scripts/relocaTE.pl~
deleted file mode 100755
index 133cd8a..0000000
--- a/scripts/relocaTE.pl~
+++ /dev/null
@@ -1,862 +0,0 @@
-#!/usr/bin/perl -w
-use File::Spec;
-use Getopt::Long;
-use Cwd;
-use FindBin qw($RealBin);
-use File::Path qw(make_path);
-use strict;
-
-my $scripts = $RealBin;
-
-if ( !defined @ARGV ) {
-  &getHelp();
-}
-my $genome_fasta = 'NONE';
-my $te_fasta;
-my $target             = 'NONE';
-my $len_cutoff         = 10;
-my $mismatch_allowance = 0;
-my $fq_dir;
-my $exper              = 'not.given';
-my $mate_file_1        = '_p1';
-my $mate_file_2        = '_p2';
-my $mate_file_unpaired = '.unPaired';
-my $workingdir;
-my $outdir     = 'outdir_teSearch';
-my $parallel   = 0;
-my $qsub_array = 0;
-my $qsub_q;
-my ( $blat_minScore, $blat_tileSize ) = ( 10, 7 );
-my $flanking_seq_len = 100;
-my $existing_TE      = 'NONE';
-my $bowtie2          = 0;
-GetOptions(
-  'p|parallel:i'         => \$parallel,
-  'a|qsub_array:i'       => \$qsub_array,
-  'q|qsub_q:s'           => \$qsub_q,
-  'e|exper:s'            => \$exper,
-  'w|workingdir:s'       => \$workingdir,
-  'o|outdir:s'           => \$outdir,
-  'd|fq_dir:s'           => \$fq_dir,
-  'g|genome_fasta:s'     => \$genome_fasta,
-  't|te_fasta:s'         => \$te_fasta,
-  'l|len_cutoff:i'       => \$len_cutoff,
-  'm|mismatch:f'         => \$mismatch_allowance,
-  '1|mate_1_id:s'        => \$mate_file_1,
-  '2|mate_2_id:s'        => \$mate_file_2,
-  'u|unpaired_id:s'      => \$mate_file_unpaired,
-  'bm|blat_minScore:i'   => \$blat_minScore,
-  'bt|blat_tileSize:i'   => \$blat_tileSize,
-  'f|flanking_seq_len:i' => \$flanking_seq_len,
-  '-r|reference_ins:s'   => \$existing_TE,
-##  '-b2|bowtie2:i'        => \$bowtie2,
-  'h|help' => \&getHelp,
-);
-my $current_dir;
-
-$parallel = 1 if $qsub_array == 1;
-if ( defined $qsub_q ) {
-  $qsub_q = "-q $qsub_q";
-}
-else {
-  $qsub_q = '';
-}
-
-if ( defined $workingdir and -d $workingdir ) {
-  $current_dir = File::Spec->rel2abs($workingdir);
-  $current_dir =~ s/\/$//;
-}
-else {
-  $current_dir = cwd();
-}
-my $mapping = 1;
-
-if ( !defined $genome_fasta ) {
-  print "\n\nPlease provide reference genome by using -g Genome fasta path\n";
-  die "\nuse -h option to get help\n";
-}
-elsif ( $genome_fasta eq 'NONE' ) {
-  print
-"A reference genome fasta was NOT provided. Proceeding without a reference will result in only the reads containing the TE being identified, no mapping of insertions will be performed\n";
-  print "Proceed without mapping? (y|n) \n";
-  my $answer = <STDIN>;
-  if ( $answer =~ /n/i ) {
-    &getHelp();
-  }
-  elsif ( $answer =~ /y/i ) {
-    $mapping = 0;
-  }
-  print "Great, proceeding without aligning to a reference genome.\n";
-}
-elsif ( !-e $genome_fasta ) {
-  print "$genome_fasta does not exist. Check file name.\n";
-  die "\nuse -h option to get help\n";
-}
-my $genome_path;
-if ( -e $genome_fasta ) {
-  $genome_path = File::Spec->rel2abs($genome_fasta);
-}
-if ( !defined $te_fasta ) {
-  print
-"\nPlease provide fasta file containing transposable elements by using -t TE fasta path
-
-SAMPLE TE FASTA:
->mping TSD=TTA
-GGCCAGTCACAATGGGGGTTTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGAATAAAAAATG
-ATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGGTGAAACTCGTCAGCGTCGTT
-TCCAAGTCCTCGGTAACAGAGTGAAACCCCCGTTGAGGCCGATTCGTTTCATTCACCGGATCTCTTGCGT
-CCGCCTCCGCCGTGCGACCTCCGCATTCTCCCGCGCCGCGCCGGATTTTGGGTACAAATGATCCCAGCAA
-CTTGTATCAATTAAATGCTTTGCTTAGTCTTGGAAACGTCAAAGTGAAACCCCTCCACTGTGGGGATTGT
-TTCATAAAAGATTTCATTTGAGAGAAGATGGTATAATATTTTGGGTAGCCGTGCAATGACACTAGCCATT
-GTGACTGGCC
-
-FASTA header must contain \"TSD=\", can be a Perl regular expression.  
-  Example: these exact characters TTA: TSD=TTA 
-  Example: any 4 characters: TSD=....
-  Example: A or T followed by GCC: TSD=(A|T)GCC 
-  Example: CGA followed by any character then an A then CT or G: TSD=CGA.A(CT|G) 
-\n";
-  die "\nuse -h option to get help\n";
-}
-elsif ( !-e $te_fasta ) {
-  print "$te_fasta does not exist. Check file name.\n";
-  die "\nuse -h option to get help\n";
-}
-else {
-  open INFILE, $te_fasta or die "Can't open $te_fasta\n";
-  my $first_line = <INFILE>;
-  close INFILE;
-  if ( $first_line !~ /^>\S+\s+TSD=\S+/ ) {
-    die
-"The TE_fasta:$te_fasta does not have the proper format:\n>TE_NAME TSD=TSD\nSEQUENCE\n";
-  }
-}
-my @fq_files;
-my %fq_files;
-if ( !defined $fq_dir ) {
-  print "\n\nPlease provide a directory of paired fastq files\n";
-  die "\nuse -h option to get help\n";
-}
-elsif ( $fq_dir eq 'SKIP' ) {
-  ##skip all other steps for processing the raw fq files
-}
-elsif ( !-d $fq_dir ) {
-  print
-"\n\nCheck the spelling or location of $fq_dir, Please provide a directory of paired fastq files\n";
-  die "\nuse -h option to get help\n";
-}
-else {
-  my $fq_path = File::Spec->rel2abs($fq_dir);
-  @fq_files = <$fq_path/*fq>;
-  my @fastq_files = <$fq_path/*fastq>;
-  push @fq_files, @fastq_files;
-  if ( scalar @fq_files == 0 ) {
-    print "Must provide at least 1 short read file\n";
-    die "\nuse -h option to get help\n";
-  }
-}
-my $existing_TE_path = 'NONE';
-my $existing_blat    = 0;
-if ( $existing_TE ne 'NONE' ) {
-  if ( $existing_TE eq '1' ) {
-    ##run blat
-    $existing_blat = 1;
-  }
-  elsif ( !-e $existing_TE ) {
-    print "$existing_TE does not exist\n";
-    print "Please use -r 1 or provide a file that exists\n";
-    die "\nuse -h option to get help\n";
-  }
-  else {
-    $existing_TE_path = File::Spec->rel2abs($existing_TE);
-    open INFILE, $existing_TE or die "Can't open $existing_TE\n";
-    my $first_line = <INFILE>;
-    close INFILE;
-    if ( $first_line !~ /\S+\t\S+:\d+\.\.\d+/ ) {
-      print "The existing_TE file is not in the appropriate format:
-   
-SAMPLE Reference TEs (the two columns are tab-delimited):
-mping   Chr12:839604..840033
-mping   Chr11:23200534..23200105
-
-TE_name<tab>ref_seqname:first_Base_Of_TIR1..Last_base_of_TIR2
-
-or (recommended) use \'-r 1\' for RelocaTE to find your TE in the reference
-   ";
-      die "\nuse -h option to get help\n";
-    }
-  }
-}
-
-sub getHelp {
-  print ' 
-usage:
-./relocaTE.pl [-t TE_fasta_file][-g chromosome_genome_fasta][-d dir_of_fq][-e short_sample_name][-h] 
-
-options:
-
-**required:
--t |--te_fasta		file		fasta containing nucleotide sequences of transposable elements with 
-					TSD=xxx in the desc. [no default]
--d |--fq_dir		dir		directory of paired and unpaired fastq files (paired _p1.fq & _p2.fq)
-					(.fq or .fastq is acceptable)  [no default]
-
-**recommended: 
--g |--genome_fasta	file		genome (reference) fasta file path. If not provided will only align 
-					reads to TE and remove TE seq from short reads. [no default]
--e |--exper 		STR		Short sample name, will be used in the output files to create IDs for
-					the insert (ex. A123) [not.given]
--o |--outdir 		STR		name for directory to contain output directories and files, will be
-					created for the run (ex. 04222012_A123) [outdir_teSearch]
-
-**optional:
--p |--parallel 		INT		Break down the single big job of relocaTE into as many smaller jobs as
-					possible. The alternative (0) would be to run one after the other
-					(int, 0=false or 1=true) [0] 
--q |--qsub_q 		STR		same as qsub -q option, not required [no default]
--a |--qsus_array	INT		if \'-a 1\' , create qsub PBS array jobs to run the many shell scripts
-					created in the \'-a 1\' option. (see: man qsub option -t).(
-					0=false or 1=true) [0] 
--w |--workingdir	dir		base working directory, needs to exist, will not be creates, full path
-					required [cwd] 
--l |--len		INT		len cutoff for the TE trimmed reads to be aligned [10] 
--m |--mismatch		FRACTION	mismatch allowance for alignment to TE (ex 0.1) [0] 
--1 |--mate_1_id		STR		string to uniquely identify mate 1 paired files ex: file_p1.fq [_p1]
--2 |--mate_2_id		STR		pattern to uniquely identify mate 2 paired files ex: file_p2.fq [_p2]
--u |--unpaired_id	STR		pattern to uniquely identify unpaired files ex: file.unPaired.fq [.unPaired] 
--bm|--blat_minScore	INT		blat minScore value, used by blat in the comparison of reads to TE sequence [10]
--bt|--blat_tileSize	INT		blat tileSize value, used by blat in the comparison of reads to TE sequence  [7]
--f |--flanking_seq	INT		length of the sequence flanking the found insertion to be returned. This
-					sequence is taken from the reference genome [100]
--r |--reference_ins	STR		To identify reference and shared insertions (reference and reads)
-					choose option-1 or option-2. 
-					option-1) (recommended) use \'-r 1\' to have RelocaTE find the location of your TE in the 
-					reference.
-					option-2) input the file name of a tab-delimited file containing the coordinates
-					of TE insertions pre-existing in the reference sequence. [no default]
--h |--help				this message
-
-
-See documentation for more information. http://srobb1.github.com/RelocaTE/
-
-';
-## use in V2
-## -b2 |--bowtie2	        INT             to use bowtie2 use \'-b2 1\' else for bowtie use \'-b2 0\' [0]
-  exit 1;
-}
-if ( $outdir eq '' or $outdir =~ /^\s+$/ or !defined $outdir ) {
-  die "your -o option has an incorrect value, it needs to be something
-\nuse -h option to get help\n";
-}
-else {
-  $outdir =~ s/\/$//;
-}
-my $te_path = File::Spec->rel2abs($te_fasta);
-my @outdir = split /\//, $outdir;
-$outdir = pop @outdir;
-my $top_dir = $outdir;
-my @depend;
-my $shellscripts = "$current_dir/$top_dir/shellscripts";
-if ($qsub_array) {
-  mkdir "$current_dir/$top_dir";
-  mkdir "$shellscripts";
-  open QSUBARRAY, ">$current_dir/$top_dir/run_these_jobs.sh"
-    or die "Can't open $current_dir/$top_dir/run_these_jobs.sh\n";
-}
-elsif ($parallel) {
-  mkdir "$current_dir/$top_dir";
-  mkdir "$shellscripts";
-  open PARALLEL, ">$current_dir/$top_dir/run_these_jobs.sh"
-    or die "Can't open $current_dir/$top_dir/run_these_jobs.sh\n";
-}
-else {
-  mkdir "$current_dir/$top_dir";
-}
-my $qsub_formatGenome_cmd = 0;
-## get names of each ref sequecne
-my @genome_seqs;
-if ( $mapping > 0 ) {
-  open( INFASTA, "$genome_path" ) || die "$!\n";
-  while ( my $line = <INFASTA> ) {
-    next unless $line =~ /^>(\S+)/;
-    if ( $line =~ /^>(\S+)/ ) {
-      my $id = $1;
-      if ( $id =~ /\|/ ) {
-        $id =~ s/\|/_/g;
-      }
-      push @genome_seqs, $id;
-    }
-    else {
-      die "Your genome FASTA file is in a unexpected format. 
->SEQNAME
-SEQUENCE
->SEQNAME2
-SEQUENCE2\n";
-    }
-  }
-  close(INFASTA);
-
-  #create bowtie index
-  my $cmd;
-  if ( !$bowtie2 and !-e "$genome_path.bowtie_build_index.1.ebwt" ) {
-    $cmd =
-"bowtie-build -f $genome_path $genome_path.bowtie_build_index 12> $current_dir/$top_dir/bowtie-build.out";
-    $qsub_formatGenome_cmd = 1;
-  }
-  elsif ( $bowtie2 and !-e "$genome_path.bowtie2_build_index.1.bt2" ) {
-    $cmd =
-"bowtie2-build -f $genome_path $genome_path.bowtie2_build_index 12> $current_dir/$top_dir/bowtie-build2.out";
-    $qsub_formatGenome_cmd = 1;
-  }
-  my $ref = 'ref';
-  if ( $genome_path =~ /(?:.+\/)?(.+)\.(fa|fasta)$/ ) {
-    $ref = $1;
-  }
-  if ( $parallel and defined $cmd ) {
-    my $shell_dir = "$shellscripts/step_1";
-    mkdir $shell_dir;
-    open OUTSH, ">$shell_dir/step_1.$ref.formatGenome.sh";
-    print OUTSH "$cmd\n";
-    close OUTSH;
-    chmod 0755, "$shell_dir/step_1.$ref.formatGenome.sh";
-    print PARALLEL "sh $shell_dir/step_1.$ref.formatGenome.sh\n"
-      if !$qsub_array;
-  }
-  elsif ( $parallel and !defined $cmd ) {
-    my $step1_file =
-      "$shellscripts/step_1_not_needed_genome_fasta_already_formatted";
-    my $shell_dir = "$shellscripts";
-    mkdir $shell_dir;
-    if ($parallel) {
-      open STEP1, ">$step1_file" or die "Can't Open $step1_file\n";
-      print STEP1 '';
-      close STEP1;
-    }
-  }
-  elsif ( defined $cmd ) {
-    ##run it now
-    print "Formatting the reference genome: $genome_path\n";
-    system($cmd);
-  }
-  if ($qsub_array) {
-    if ( !-e "$shellscripts/step_1_not_needed_genome_fasta_already_formatted" )
-    {
-      my $job = "$shellscripts/step_1/step_1.$ref.formatGenome.sh";
-      print QSUBARRAY
-        "STEP1=\`qsub -e $shellscripts -o $shellscripts $qsub_q $job\`
-echo \$STEP1\n";
-
-    }
-  }
-}    ##end if($mapping)
-
-##run existing TE blat against ref if the file does not exsit
-my $qsub_existingTE_cmd = 0;
-my $existing_blat_cmd =
-"blat $genome_path $te_path $current_dir/$top_dir/existingTE.blatout 1> $current_dir/$top_dir/existingTE.blat.stdout";
-if ($existing_blat) {
-  ##if running blat set existing_TE_path to blatout
-  $existing_TE_path = "$current_dir/$top_dir/existingTE.blatout";
-  if ( $parallel
-    and !-e "$current_dir/$top_dir/existingTE.blatout" )
-  {
-    my $shell_dir = "$shellscripts";
-    if ( !-d $shell_dir ) {
-      mkdir $shell_dir;
-    }
-    open OUTSH, ">$shell_dir/step_0.existingTE_blat.sh"
-      or die "Can't open $shell_dir/step_0.existingTE_blat.sh for writing $!\n";
-    print PARALLEL "sh $shell_dir/step_0.existingTE_blat.sh\n" if !$qsub_array;
-    print OUTSH "$existing_blat_cmd\n";
-    if ($qsub_array) {
-      $qsub_existingTE_cmd = 1;
-      print QSUBARRAY
-"EXISTINGTE=`qsub -e $shellscripts -o $shellscripts $qsub_q $shellscripts/step_0.existingTE_blat.sh`
-echo \$EXISTINGTE\n";
-    }
-    close OUTSH;
-  }
-  elsif ( !-e "$current_dir/$top_dir/existingTE.blatout" ) {
-    ## do it now
-    print "finding TEs ($te_path) in the reference genome ($genome_path)\n";
-    system($existing_blat_cmd);
-  }
-}
-
-my @fq;
-my @fa;
-
-#convert fq files to fa for blat
-open QSUBARRAY2, ">$shellscripts/step_2.fq2fa.sh"
-  if $qsub_array;
-my $fq_count = 0;
-if ( $fq_dir ne 'SKIP' ) {
-  foreach my $fq (@fq_files) {
-    my $fq_path = File::Spec->rel2abs($fq);
-    push @fq, $fq_path;
-    my $fa = $fq;
-    if ( $fa =~ s/\.(fq|fastq)$/.fa/ ) {
-      push @fa, $fa;
-      if ( !-e $fa ) {
-        my $cmd = "$scripts/relocaTE_fq2fa.pl $fq_path $fa";
-        if ($parallel) {
-          my @fq_path   = split '/', $fq_path;
-          my $fq_name   = pop @fq_path;
-          my $shell_dir = "$shellscripts/step_2";
-
-          mkdir $shell_dir;
-          my $outsh = "$shell_dir/$fq_count." . "fq2fa.sh";
-          open OUTSH, ">$outsh";
-          print PARALLEL "sh $outsh\n" if !$qsub_array;
-          print OUTSH "$cmd\n";
-        }
-        else {
-          ##run it now
-          print "Converting $fq_path to fasta for blat\n";
-          system($cmd);
-        }
-      }
-      else {
-        my $shell_dir = "$shellscripts";
-
-        mkdir $shell_dir;
-        my $step2_file =
-          "$shellscripts/step_2_not_needed_fq_already_converted_2_fa";
-
-        if ($parallel) {
-          open STEP2, ">$step2_file" or die "Can't Open $step2_file\n";
-          print STEP2 '';
-          close STEP2;
-        }
-      }
-    }
-    else {
-      print
-"$fq does not seem to be a fastq based on the file extension. It should be fq or fastq\n";
-      &getHelp();
-    }
-    $fq_count++;
-  }
-  if ( !-e "$shellscripts/step_2_not_needed_fq_already_converted_2_fa"
-    and $qsub_array )
-  {
-    my $end = $fq_count - 1;
-    my $job = "$shellscripts/step_2.fq2fa.sh";
-    if ( !@depend ) {
-      print QSUBARRAY
-        "STEP2=\`qsub -e $shellscripts -o $shellscripts $qsub_q -t 0-$end $job\`
-echo \$STEP2\n";
-      @depend = ( "STEP2", "afterokarray" );
-    }
-    else {
-      my ( $last_job, $afterok ) = @depend;
-      @depend = ( "STEP2", "afterokarray" );
-      my $jobName = $depend[0];
-      print QSUBARRAY
-"$jobName=`qsub -e $shellscripts -o $shellscripts $qsub_q -t 0-$end -W depend=$afterok:\$$last_job $job`
-echo \$$jobName\n";
-    }
-    print QSUBARRAY2 "sh $shellscripts/step_2/\$PBS_ARRAYID.fq2fa.sh";
-  }
-  elsif ($qsub_array) {
-    unlink "$shellscripts/step_2.fq2fa.sh";
-  }
-}    ##end if $fq_dir ne 'SKIP'
-close QSUBARRAY2;
-
-##split the TE fasta of many seqs into individual files
-my @te_fastas;
-my %TSD;
-open( INFASTA, "$te_fasta" ) || die "$!\n";
-my $i = 0;
-while ( my $line = <INFASTA> ) {
-  if ( $line =~ /^>(\S+)\s+TSD=(\S+)/ ) {
-    my $id = $1;
-    $TSD{$id} = $2;
-    if ( $i > 0 ) {
-      close(OUTFASTA);
-      $i = 0;
-    }
-    my $te_file = "$id.fa";
-    $te_file =~ s/\|/_/g;
-    ##create new dir for files: workingDir/outdir/TE/
-    my $te_dir = "$current_dir/$top_dir/$id";
-    push @te_fastas, "$te_dir/$te_file";
-
-    mkdir $te_dir;
-    open( OUTFASTA, ">$te_dir/$te_file" ) or die "$!\n";
-    print OUTFASTA $line;
-    $i++;
-  }
-  elsif ( $line =~ /^>/ and $line !~ /TSD=/ ) {
-    die
-"The TE_fasta:$te_fasta does not have the proper format:\n>TE_NAME TSD=TSD\nSEQUENCE\n";
-  }
-  else {    ##should be sequence
-    print OUTFASTA $line;
-  }
-}
-close(INFASTA);
-close(OUTFASTA);
-
-#foreach TE fasta blat against target chromosome and parse and find insertion sites
-my $depend = 1 if @depend;
-foreach my $te_path (@te_fastas) {
-  if ($depend) {
-    @depend = ( "STEP2", "afterokarray" );
-  }
-  else {
-    @depend = ();
-  }
-  my @path     = split '/', $te_path;
-  my $te_fasta = pop @path;
-  my $path     = join '/', @path;
-  my $TE       = $te_fasta;
-  $TE =~ s/\.fa//;
-  mkdir "$path/blat_output";
-  mkdir "$path/flanking_seq";
-  mkdir "$path/te_containing_fq";
-  mkdir "$path/te_only_read_portions_fa";
-
-  #blat fa files against te.fa
-  my @flanking_fq;
-  my $fq_file_count = scalar @fq;
-
-  open QSUBARRAY3, ">$shellscripts/step_3.$TE.blat.sh"
-    if $qsub_array;
-  open QSUBARRAY4, ">$shellscripts/step_5.$TE.finder.sh"
-    if $qsub_array;
-  for ( my $i = 0 ; $i < $fq_file_count ; $i++ ) {
-    my $fa = $fa[$i];
-    my $fq = $fq[$i];
-
-    #remove and save filename part of path
-    my @fa_path = split '/', $fa;
-    my $fa_name = pop @fa_path;
-    $fa_name =~ s/\.fa$//;
-    my $shell_dir = "$shellscripts/step_3/$TE";
-    if ($parallel) {
-      make_path( $shell_dir, { mode => 0755 } );
-      open OUTSH, ">$shell_dir/$i.$TE.blat.sh"
-        or die "Can't open $shell_dir/$i.$TE.blat.sh $!\n";
-      print PARALLEL "sh $shell_dir/$i.$TE.blat.sh\n" if !$qsub_array;
-    }
-
-    #use pre-existing blatout files
-    if ( !-e "$path/blat_output/$fa_name.te_$TE.blatout" ) {
-      my $cmd =
-"blat -minScore=$blat_minScore -tileSize=$blat_tileSize $te_path $fa $path/blat_output/$fa_name.te_$TE.blatout 1>> $path/blat_output/blat.out";
-      print OUTSH "$cmd\n" if $parallel;
-      print "Finding reads in $fa_name that contain sequence of $TE\n"
-        if !$parallel;
-      system($cmd) if !$parallel;
-    }
-
-    #use pre-existing te_containing_fq files
-    my $te_Containing_fq =
-      "$path/te_containing_fq/$fa_name.te_$TE.ContainingReads.fq";
-    if ( -e $te_Containing_fq ) {
-      $fq = $te_Containing_fq;
-    }
-    my $cmd =
-"perl $scripts/relocaTE_trim.pl $path/blat_output/$fa_name.te_$TE.blatout $fq $len_cutoff $mismatch_allowance > $path/flanking_seq/$fa_name.te_$TE.flankingReads.fq";
-    if ($parallel) {
-      print OUTSH "$cmd\n";
-      close OUTSH;
-      chmod 0755, "$shell_dir/*blat.sh";
-    }
-    else {
-      ##run it now
-      print "Trimming $fq reads of $TE sequence\n" if !$parallel;
-      system($cmd) if !$parallel;
-    }
-  }
-  if ($qsub_array) {
-    my $end  = $fq_file_count - 1;
-    my $job  = "$shellscripts/step_3.$TE.blat.sh";
-    my $desc = $TE;
-    $desc =~ s/\W/_/;
-    if ( !@depend ) {
-      print QSUBARRAY
-"STEP_3_$desc=\`qsub -e $shellscripts -o $shellscripts $qsub_q -t 0-$end $job\`
-echo \$STEP_3_$desc\n";
-      @depend = ( "STEP_3_$desc", "afterokarray" );
-    }
-    else {
-      my ( $last_job, $afterok ) = @depend;
-      @depend = ( "STEP_3_$desc", "afterokarray" );
-      my $jobName = $depend[0];
-      print QSUBARRAY
-"$jobName=`qsub -e $shellscripts -o $shellscripts $qsub_q -t 0-$end -W depend=$afterok:\$$last_job $job`
-echo \$$jobName\n";
-
-    }
-    print QSUBARRAY3 "sh $shellscripts/step_3/$TE/\$PBS_ARRAYID.$TE.blat.sh";
-  }
-  ##if a genome file was provided, align seqs to genome
-  ##if no genome file was provided, will only blat and trim reads of te seq
-  if ($mapping) {
-    my $param_path = "$current_dir/$top_dir/$TE";
-    my $outregex   = "$param_path/regex.txt";
-    open OUTREGEX, ">$outregex" or die $!;
-    print OUTREGEX "$mate_file_1\t$mate_file_2\t$mate_file_unpaired\t$TSD{$TE}";
-    my $cmd =
-"$scripts/relocaTE_align.pl $scripts $param_path $genome_path $outregex $TE $exper $bowtie2";
-    if ( !$parallel ) {
-      ## run now
-      print "Aligning $TE trimmed reads to the reference ($genome_path)\n";
-      system($cmd);
-    }
-    else {
-      my $shell_dir = "$shellscripts/step_4/$TE";
-      $genome_path =~ /.+\/(.+)\.(fa|fasta)$/;
-      my $ref = $1;
-
-      #`mkdir -p $shell_dir`;
-      make_path( $shell_dir, { mode => 0755 } );
-
-      #mkdir $shell_dir;
-      open OUTSH, ">$shell_dir/step_4.$ref.$TE.align.sh";
-      print OUTSH "$cmd\n";
-      print PARALLEL "sh $shell_dir/step_4.$ref.$TE.align.sh\n" if !$qsub_array;
-      close OUTSH;
-
-      #`chmod +x $shell_dir/step_4.$ref.$TE.align.sh`;
-      chmod 0755, "$shell_dir/step_4.$ref.$TE.align.sh";
-      if ($qsub_array) {
-        my $existing_depend = '';
-        if ( $qsub_formatGenome_cmd ) {
-          $existing_depend = "-W depend=afterok:\$STEP1" if !@depend;
-          $existing_depend = ",depend=afterok:\$STEP1" if @depend;
-        }
-        my $job  = "$shell_dir/step_4.$ref.$TE.align.sh";
-        my $desc = $TE;
-        $desc =~ s/\W/_/;
-        if ( !@depend ) {
-          print QSUBARRAY
-"STEP_4_$desc=\`qsub -e $shellscripts -o $shellscripts $qsub_q $existing_depend $job\`
-echo \$STEP_4_$desc\n";
-          @depend = ( "STEP_4_$desc", "afterok" );
-        }
-        else {
-          my ( $last_job, $afterok ) = @depend;
-          @depend = ( "STEP_4_$desc", "afterok" );
-          my $jobName = $depend[0];
-          print QSUBARRAY
-"$jobName=`qsub -e $shellscripts -o $shellscripts $qsub_q -W depend=$afterok:\$$last_job","$existing_depend $job`
-echo \$$jobName\n";
-        }
-      }
-    }
-
-    my $genome_count = 0;
-    foreach my $seq_id (@genome_seqs) {
-      $genome_path =~ /.+\/(.+)\.(fa|fasta)$/;
-      my $ref           = $1;
-      my $merged_bowtie = "$path/bowtie_aln/$ref.$TE.bowtie.out";
-      my $cmd =
-"$scripts/relocaTE_insertionFinder.pl $merged_bowtie $seq_id $genome_path $TE $outregex $exper $flanking_seq_len $existing_TE_path $mismatch_allowance $bowtie2";
-      if ( !$parallel ) {
-        ##run it now
-        print "Finding $TE insertions in $seq_id\n";
-        system($cmd);
-      }
-      else {
-        my $shell_dir = "$shellscripts/step_5/$TE";
-        make_path( $shell_dir, { mode => 0755 } );
-        open OUTSH, ">$shell_dir/$genome_count.$TE.findSites.sh";
-        print OUTSH "$cmd\n";
-        close OUTSH;
-        print PARALLEL "sh $shell_dir/$genome_count.$TE.findSites.sh\n"
-          if !$qsub_array;
-        chmod 0755, "$shell_dir/$genome_count.$TE.findSites.sh";
-      }
-      $genome_count++;
-    }
-    if ($qsub_array) {
-      my $end             = $genome_count - 1;
-      my $job             = "$shellscripts/step_5.$TE.finder.sh";
-      my $existing_depend = '';
-      if ($qsub_existingTE_cmd) {
-        $existing_depend = "-W depend=afterok:\$EXISTINGTE" if !@depend;
-        $existing_depend = ":\$EXISTINGTE" if @depend;
-      }
-      #if ( $qsub_formatGenome_cmd and $existing_depend eq '' ) {
-      #  $existing_depend = "-W depend=afterok:\$STEP1" if !@depend;
-      #}
-      #elsif ( $qsub_formatGenome_cmd and ( $existing_depend ne '' or @depend ) )
-      #{
-      #  $existing_depend .= ":\$STEP1";
-      #}
-      my $desc = $TE;
-      $desc =~ s/\W/_/;
-      if ( !@depend ) {
-        print QSUBARRAY
-"STEP_5_$desc=\`qsub -e $shellscripts -o $shellscripts $qsub_q -t 0-$end $existing_depend $job\`
-echo \$STEP_5_$desc\n";
-        @depend = ( "STEP_5_$desc", "afterokarray" );
-      }
-      else {
-        my ( $last_job, $afterok ) = @depend;
-        @depend = ( "STEP_5_$desc", "afterokarray" );
-        my $jobName = $depend[0];
-        print QSUBARRAY
-"$jobName=`qsub -e $shellscripts -o $shellscripts $qsub_q -t 0-$end -W depend=$afterok:\$$last_job",
-          "$existing_depend $job`
-echo \$$jobName\n";
-      }
-      print QSUBARRAY4
-        "sh $shellscripts/step_5/$TE/\$PBS_ARRAYID.$TE.findSites.sh";
-    }
-  }
-  if ($qsub_array) {
-    close QSUBARRAY3;
-    close QSUBARRAY4;
-  }
-}
-## Finished, clean up, cat files
-##cat all '.te_insertion_sites.table.txt' results into one file
-foreach my $te_path (@te_fastas) {
-  my @path     = split '/', $te_path;
-  my $te_fasta = pop @path;
-  my $path     = join '/', @path;
-  my $TE       = $te_fasta;
-  $TE =~ s/\.fa//;
-  if ($parallel) {
-    my $shell_dir = "$shellscripts/step_6/$TE";
-    make_path( $shell_dir, { mode => 0755 } );
-    open FINISH, ">$shellscripts/step_6/$TE/step_6.$TE.finishing.sh";
-    print PARALLEL "sh $shellscripts/step_6/$TE/step_6.$TE.finishing.sh\n"
-      if !$qsub_array;
-## toDo: Need to put all of this in a perl script then have the
-## finishing shell script execute that perl script
-    print FINISH "
-`mkdir -p $path/results/all_files`
-
-#combine confident insertions to one file
-echo \"TE\tTSD\tExper\tchromosome\tinsertion_site\tstrand\tleft_flanking_read_count\tright_flanking_read_count\tleft_flanking_seq\tright_flanking_seq\tTE_orientation\" > $path/results/temp
-for i in \`ls $path/results/*.$TE.confident_nonref_insert.txt\` ; do grep -v flanking_read_count \$i >> $path/results/temp ; done
-mv $path/results/temp $path/results/$exper.$TE.confident_nonref.txt
-mv $path/results/*.$TE.confident_nonref_insert.txt $path/results/all_files
-
-#combine all insertions to one file
-echo \"TE\tTSD\tExper\tchromosome\tinsertion_site\tstrand\tcombined_read_count\tright_flanking_read_count\tleft_flanking_read_count\" > $path/results/temp2
-for i in \`ls $path/results/*.$TE.all_nonref_insert.txt\` ; do grep -v total \$i | grep -v Note >> $path/results/temp2 ; done
-mv $path/results/temp2 $path/results/$exper.$TE.all_nonref.txt
-mv $path/results/*.$TE.all_nonref_insert.txt $path/results/all_files
-
-#combine confident insertions ref seqs to one file
-for i in \`ls $path/results/*.$TE.confident_nonref_genomeflank.fa\` ; do cat \$i  >> $path/results/temp3 ; done
-mv $path/results/temp3 $path/results/$exper.$TE.confident_nonref_genomeflanks.fa
-mv $path/results/*.$TE.confident_nonref_genomeflank.fa $path/results/all_files
-
-#combine confident insertions gff to one file
-echo \"##gff-version 3\" > $path/results/temp4
-for i in \`ls $path/results/*.$TE.all_insert.gff\` ; do grep -v gff \$i  >> $path/results/temp4 ; done        
-mv $path/results/temp4 $path/results/$exper.$TE.all_inserts.gff
-mv $path/results/*.$TE.all_insert.gff $path/results/all_files
-
-#combine confident insertions reads to one file
-for i in \`ls $path/results/*.$TE.confident_nonref_insert_reads_list.txt\` ; do cat \$i  >> $path/results/temp5 ; done
-mv $path/results/temp5 $path/results/$exper.$TE.confident_nonref_reads_list.txt
-mv $path/results/*.$TE.confident_nonref_insert_reads_list.txt $path/results/all_files
-
-";
-    `chmod +x $shellscripts/step_6/$TE/step_6.$TE.finishing.sh`;
-  }
-  if ($qsub_array) {
-    my $job  = "$shellscripts/step_6/$TE/step_6.$TE.finishing.sh";
-    my $desc = $TE;
-    $desc =~ s/\W/_/;
-    if ( !@depend ) {
-      my $jobName = "STEP_6_$desc";
-      print QSUBARRAY
-        "$jobName=\`qsub -e $shellscripts -o $shellscripts $qsub_q $job\`
-echo \$$jobName\n";
-      @depend = ( "STEP_6_$desc", "afterok" );
-    }
-    else {
-      my ( $last_job, $afterok ) = ( "STEP_5_$desc", "afterokarray" );
-      @depend = ( "STEP_6_$desc", "afterok" );
-      my $jobName = $depend[0];
-      print QSUBARRAY
-"$jobName=`qsub -e $shellscripts -o $shellscripts $qsub_q -W depend=$afterok:\$$last_job $job`
-echo \$$jobName\n";
-    }
-  }
-  if ( !$parallel and !$qsub_array ) {
-    ##do it now
-    ##combine and delete individual chr files for confident sites
-    print "Finishing and cleaning up\n";
-`echo \"TE\tTSD\tEper\tchromosome\tinsertion_site\tstrand\tleft_flanking_read_count\tright_flanking_read_count\tleft_flanking_seq\tright_flanking_seq\tTE_orientation\" > $path/results/temp`;
-    my @files = `ls $path/results/*.$TE.confident_nonref_insert.txt`;
-    foreach my $file (@files) {
-      chomp $file;
-      `grep -v flanking_read_count $file  >> $path/results/temp`;
-      unlink $file;
-    }
-    `mv $path/results/temp $path/results/$exper.$TE.confident_nonref.txt`;
-
-    ##combine and delete individual chr files for all sites
-`echo \"TE\tTSD\tExper\tchromosome\tinsertion_site\tstrand\tcombined_read_count\tright_flanking_read_count\tleft_flanking_read_count\" > $path/results/temp2`;
-    @files = `ls $path/results/*.$TE.all_nonref_insert.txt`;
-    foreach my $file (@files) {
-      chomp $file;
-      `grep -v total $file | grep -v Note  >> $path/results/temp2`;
-      unlink $file;
-    }
-    `mv $path/results/temp2 $path/results/$exper.$TE.all_nonref.txt`;
-
-    ##combine and delete individual chr fasta files
-    @files = `ls $path/results/*.$TE.confident_nonref_genomeflank.fa`;
-    foreach my $file (@files) {
-      chomp $file;
-      `cat $file >> $path/results/temp3`;
-      unlink $file;
-    }
-`mv $path/results/temp3 $path/results/$exper.$TE.confident_nonref_genomeflanks.fa`;
-
-    ##combine and delete individual chr gff files
-    `echo \"##gff-version 3\" > $path/results/temp4`;
-    @files = `ls $path/results/*.$TE.all_insert.gff`;
-    foreach my $file (@files) {
-      chomp $file;
-      `grep -v gff $file >> $path/results/temp4`;
-      unlink $file;
-    }
-    `mv $path/results/temp4 $path/results/$exper.$TE.all_inserts.gff`;
-
-    ##combine and delete individual chr reads list
-    @files = `ls $path/results/*.$TE.confident_nonref_insert_reads_list.txt`;
-    foreach my $file (@files) {
-      chomp $file;
-      `cat $file >> $path/results/temp5`;
-      unlink $file;
-    }
-`mv $path/results/temp5 $path/results/$exper.$TE.confident_nonref_reads_list.txt`;
-    print "$TE results are found in $path/results\n";
-  }
-  close FINISH;
-}
-
-if ($qsub_array) {
-  close QSUBARRAY;
-## this would happen before IO was finished on the file
-  #  system ("qsub $qsub_q $current_dir/$top_dir/run_these_jobs.sh");
-  print "$current_dir/$top_dir/run_these_jobs.sh was created
--- Run this script, \'sh $current_dir/$top_dir/run_these_jobs.sh\' 
--- This script will submit all jobs to the queue in the appropriate order.
--- Be sure to check the error files in $current_dir/$top_dir/shellscripts. They should all be file size 0.
-\n";
-}
-elsif ($parallel) {
-
-  #system (sort "$current_dir/$top_dir/run_these_jobs.sh");
-  print
-    "Run each command line statement in $current_dir/$top_dir/run_these_jobs.sh.
---Run these in order (step_1,step_2,step_3, so on) for each TE.
---For example, all the step_3 scripts for a specific TE should be successfully completed (finished without errors) 
-before running a step_4 script of the same TE.
---All scripts of the same step can be run in parallel (at the same time).
-\n";
-}
diff --git a/scripts/relocaTE_insertionFinder.pl b/scripts/relocaTE_insertionFinder.pl
index ce3b46e..8d6381d 100755
--- a/scripts/relocaTE_insertionFinder.pl
+++ b/scripts/relocaTE_insertionFinder.pl
@@ -444,3 +444,4 @@ sub TSD_check {
     }
   }
 }
+