minor patch - it works!

maxplanck-ie · Apr 17, 2023 · 7c4b41b · 7c4b41b
1 parent 3beb258
commit 7c4b41b
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 7 deletions.
diff --git a/snakePipes/shared/rules/three_prime_seq.snakefile b/snakePipes/shared/rules/three_prime_seq.snakefile
@@ -150,7 +150,7 @@ rule preprocess_cluster_pas:
     input: 
         find_replicates_cluster_pas
     output: 
-        temp("three_prime_seq/tmp/condition-{condition}_preprocessed.txt")
+        "three_prime_seq/tmp/condition-{condition}_preprocessed.txt"
     shell:        
         "cat {input} | "
         "sed '/^[ ]*Chrom/ d' | " 
@@ -162,7 +162,7 @@ rule clusterPAS:
     input: 
         "three_prime_seq/tmp/condition-{condition}_preprocessed.txt"
     output: 
-        temp("three_prime_seq/tmp/condition-{condition}_clusterPAS_tmpdb.txt")
+        "three_prime_seq/tmp/condition-{condition}_clusterPAS_tmpdb.txt"
     conda:
         CONDA_SHARED_ENV
     params: 
@@ -175,6 +175,7 @@ rule clusterPAS:
 # awk command: remove entries with multiple genes in 4th column (must be unambiguous)
 # python script: add "_1", "_2", to each cluster label (4th column) to make each 
 # unique for each genomic position
+# also strip header of "bedlike" file
 rule postprocess_cluster_pas:
     input: 
         "three_prime_seq/tmp/condition-{condition}_clusterPAS_tmpdb.txt"

diff --git a/snakePipes/shared/tools/three_prime_seq/dedup_clusterPAS.py b/snakePipes/shared/tools/three_prime_seq/dedup_clusterPAS.py
@@ -28,11 +28,9 @@ def dedup(sub_df):
 
 
 def main():
-    df = pd.read_table(sys.stdin, index_col=None, header=None)
-    df.columns = HEADERS
-    df = df.groupby("Gene").apply(dedup)
-    df.to_csv(sys.stdout, sep="\t", index=False)
-
+    df = pd.read_table(sys.stdin, index_col=None, header=0)
+    df = df.groupby("Gene", group_keys=False).apply(dedup).reset_index(drop=True)
+    df.to_csv(sys.stdout, sep="\t", index=False, header=False)
 
 if __name__ == "__main__":
     main()