Skip to content

Commit

Permalink
Merge pull request #18 from iskoldt-X/main: fixing paths
Browse files Browse the repository at this point in the history
fixing paths
  • Loading branch information
iskoldt-X committed Apr 20, 2023
2 parents ce533fe + fb6b1fc commit 81da6f7
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 5 deletions.
6 changes: 3 additions & 3 deletions scripts/Plasmer
Expand Up @@ -121,7 +121,7 @@ if [ ! -s ${outpath}/intermediate/${prefix}.seqkit ];then
fi

echo "Checking sequence length..."
python3 ${script_Dir}/check_length_by_input.py ${genome} ${outpath}/intermediate/${prefix}.seqkit $length $minimum_length ${outpath}/intermediate/${prefix}.plasmer.length.class ${outpath}/intermediate/${prefix}.plasmer.length.unclass.fasta
python3 ${script_Dir}/check_length_by_input.py ${genome} ${outpath}/intermediate/${prefix}.seqkit $length $minimum_length ${outpath}/intermediate/${prefix}.plasmer.length.class ${outpath}/intermediate/${prefix}.plasmer.length.unclass.fasta ${outpath}/intermediate/${prefix}.plasmer.shorterM.class ${outpath}/results/${prefix}.plasmer.shorterM.fasta

echo ${outpath}/intermediate/${prefix}.plasmer.length.unclass.fasta > ${outpath}/intermediate/${prefix}.sample.list

Expand Down Expand Up @@ -171,7 +171,7 @@ perl ${script_Dir}/getFeatDomTbl.pl ${outpath}/intermediate/${prefix}.plasmer.le
perl ${script_Dir}/getFeatDomTbl.pl ${outpath}/intermediate/${prefix}.plasmer.length.unclass.fasta ${outpath}/intermediate/${prefix}.replication.domtblout > ${outpath}/intermediate/${prefix}.replication.domtblout.feature

#MPS - RDS score
perl ${script_Dir}/getFeatMDS.pl ${outpath}/intermediate/${prefix}.plasmer.length.unclass.fasta ${outpath}/intermediate/${prefix}.mps.dmnd.out /share/pasteur/zhuqh/lung_microbiome/chromosome_plasmid/platon_db/mps.tsv > ${outpath}/intermediate/${prefix}.mps.rds.feature
perl ${script_Dir}/getFeatMDS.pl ${outpath}/intermediate/${prefix}.plasmer.length.unclass.fasta ${outpath}/intermediate/${prefix}.mps.dmnd.out ${db}/platon_db/mps.tsv > ${outpath}/intermediate/${prefix}.mps.rds.feature

perl ${script_Dir}/getFeatBlastn.pl ${outpath}/intermediate/${prefix}.plasmer.length.unclass.fasta ${outpath}/intermediate/${prefix}.orit.blastn.out > ${outpath}/intermediate/${prefix}.orit.feature

Expand Down Expand Up @@ -201,7 +201,7 @@ echo "Predicting..."
Rscript ${script_Dir}/predict_by_rpmodel.R ${db}/All.rk18pk25.rp.all2class.per0.8OR10w.traindata.rfmodel.RData ${outpath}/intermediate/${prefix}.allFeatures
#Rscript ${script_Dir}/predict_by_rpmodel.R ${db}/All.sample6w.filter.features.rmGC.2class.rfmodel.RData ${outpath}/intermediate/${prefix}.allFeatures

cat ${outpath}/intermediate/${prefix}.allFeatures.plasmer.predClass.tsv ${outpath}/intermediate/${prefix}.plasmer.length.class > ${outpath}/results/${prefix}.plasmer.predClass.tsv
cat ${outpath}/intermediate/${prefix}.allFeatures.plasmer.predClass.tsv ${outpath}/intermediate/${prefix}.plasmer.length.class ${outpath}/intermediate/${prefix}.plasmer.shorterM.class > ${outpath}/results/${prefix}.plasmer.predClass.tsv

#extract plasmids sequences
python3 ${script_Dir}/extract_predPlasmids_seqs.py ${genome} ${outpath}/results/${prefix}.plasmer.predClass.tsv ${outpath}/results/${prefix}.plasmer.predPlasmids.fa
Expand Down
22 changes: 20 additions & 2 deletions scripts/check_length_by_input.py
Expand Up @@ -9,11 +9,14 @@
min_length = sys.argv[4].strip()
classfile = sys.argv[5].strip()
unclassfasta = sys.argv[6].strip()
labelshortfile = sys.argv[7].strip()
labelshortfasta = sys.argv[8].strip()


w1 = open(classfile,"w")

unclassID = []

shortID = []
with open(seqkit) as r:
for line in r:
line = line.strip().split("\t")
Expand All @@ -23,19 +26,34 @@
unclassID.append(line[0])
elif float(line[1]) > float(min_length) and float(max_length) == 0:
unclassID.append(line[0])
elif float(line[1]) <= float(min_length):
shortID.append(line[0])

w1.close()

w2 = open(unclassfasta,'w')
w3 = open(labelshortfasta,"w")

if genome.endswith('gz'):
with gzip.open(genome,"rt") as f:
for record in SeqIO.parse(f,"fasta"):
if record.id in unclassID:
SeqIO.write(record,w2,'fasta')
elif record.id in shortID:
SeqIO.write(record,w3,'fasta')

elif genome.endswith('fasta') or genome.endswith('fa') or genome.endswith('fna'):
for record in SeqIO.parse(genome,"fasta"):
if record.id in unclassID:
SeqIO.write(record,w2,'fasta')

elif record.id in shortID:
SeqIO.write(record,w3,'fasta')
w2.close()
w3.close()

w4 = open(labelshortfile,"w")
for i in shortID:
w4.write(i + "\t" + "shorter_than_" + str(min_length) + "\n")
w4.close()


0 comments on commit 81da6f7

Please sign in to comment.