Skip to content

Commit

Permalink
Fixing intronsize filtering step when creating the negative training …
Browse files Browse the repository at this point in the history
…set. Also added in functionality to save out layers
  • Loading branch information
maplesondctx committed Aug 4, 2018
1 parent fd384ac commit 5ac7173
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 5 deletions.
1 change: 0 additions & 1 deletion deps/boost/tools/build/src/engine/bootstrap/.gitignore

This file was deleted.

7 changes: 4 additions & 3 deletions scripts/portcullis/portcullis/rule_filter.py
Expand Up @@ -276,9 +276,10 @@ def create_training_sets(args):
neg_juncs.to_csv(args.prefix + ".neg_layer_" + str(i) + ".tab", sep='\t')


neg_length_limit = int(L95 * 20)
print("Intron size L95 =", L95, "negative set will use junctions with intron size over L95 x 20:", neg_length_limit)
neg_length_limit = int(L95 * 8)
print("Intron size L95 =", L95, "negative set will use junctions with intron size over L95 x 8:", neg_length_limit, "and with maxmmes < 12")
neg_juncs = other_juncs.loc[other_juncs["size"] > neg_length_limit]
neg_juncs = neg_juncs.loc[neg_juncs["maxmmes"] < 12]
neg_set = pd.concat([neg_set, neg_juncs])
if args.genuine:
print(str(i+1) + "\t" + calcPerformance(neg_juncs, df).longStr())
Expand Down Expand Up @@ -388,4 +389,4 @@ def main():



if __name__=='__main__': main()
if __name__=='__main__': main()
8 changes: 8 additions & 0 deletions src/junction_filter.cc
Expand Up @@ -244,6 +244,10 @@ void portcullis::JunctionFilter::filter() {
args.push_back(ruleset + "/selftrain_initial_neg.layer7.json");

args.push_back("--prefix=" + output.string() + ".selftrain.initialset");

if (this->saveLayers) {
args.push_back("--save_layers");
}
args.push_back(junctionFile.string());

char* char_args[50];
Expand Down Expand Up @@ -648,6 +652,7 @@ int portcullis::JunctionFilter::main(int argc, char *argv[]) {
bool no_ml;
bool saveBad;
bool save_features;
bool save_layers;
bool exongff;
bool introngff;
uint32_t max_length;
Expand Down Expand Up @@ -719,6 +724,8 @@ int portcullis::JunctionFilter::main(int argc, char *argv[]) {
"If you wish to use a custom random forest model to filter the junctions file, rather than self-training on the input dataset use this option to. See manual for more details.")
("save_features", po::bool_switch(&save_features)->default_value(false),
"Use this flag to save features (both for training set and again for all junctions) to disk.")
("save_layers", po::bool_switch(&save_layers)->default_value(false),
"Use this flag to save to disk each layer produced when creating the training set.")
;
// Positional option for the input bam file
po::positional_options_description p;
Expand Down Expand Up @@ -768,6 +775,7 @@ int portcullis::JunctionFilter::main(int argc, char *argv[]) {
}
}
filter.setSaveFeatures(save_features);
filter.setSaveLayers(save_layers);
filter.setReferenceFile(referenceFile);
filter.setThreshold(threshold);
filter.setSmote(!no_smote);
Expand Down
11 changes: 10 additions & 1 deletion src/junction_filter.hpp
Expand Up @@ -97,6 +97,7 @@ namespace portcullis {
uint16_t threads;
bool saveBad;
bool saveFeatures;
bool saveLayers;
bool outputExonGFF;
bool outputIntronGFF;
uint32_t maxLength;
Expand Down Expand Up @@ -192,14 +193,22 @@ namespace portcullis {
this->train = train;
}

bool isSaveBad() const {
bool doSaveBad() const {
return saveBad;
}

void setSaveBad(bool saveBad) {
this->saveBad = saveBad;
}

bool doSaveLayers() const {
return saveLayers;
}

void setSaveLayers(bool saveLayers) {
this->saveLayers = saveLayers;
}

bool isOutputExonGFF() const {
return outputExonGFF;
}
Expand Down
8 changes: 8 additions & 0 deletions src/portcullis.cc
Expand Up @@ -185,6 +185,8 @@ int mainFull(int argc, char *argv[]) {
uint32_t max_length;
uint32_t mincov;
string canonical;
bool save_layers;
bool save_features;
bool balanced;
bool verbose;
bool help;
Expand Down Expand Up @@ -258,6 +260,10 @@ int mainFull(int argc, char *argv[]) {
hidden_options.add_options()
("bam-files", po::value< std::vector<path> >(&bamFiles), "Path to the BAM files to process.")
("genome-file", po::value<path>(&genomeFile), "Path to the genome file to process.")
("save_features", po::bool_switch(&save_features)->default_value(false),
"Use this flag to save features (both for training set and again for all junctions) to disk.")
("save_layers", po::bool_switch(&save_layers)->default_value(false),
"Use this flag to save to disk each layer produced when creating the training set.")
;
// Positional option for the input bam file
po::positional_options_description p;
Expand Down Expand Up @@ -359,6 +365,8 @@ int mainFull(int argc, char *argv[]) {
filter.setOutputExonGFF(exongff);
filter.setOutputIntronGFF(introngff);
filter.setSaveBad(saveBad);
filter.setSaveLayers(save_layers);
filter.setSaveFeatures(save_features);
filter.filter();

// *********** BAM filter *********
Expand Down

0 comments on commit 5ac7173

Please sign in to comment.