Skip to content

Commit

Permalink
minor patch - it works!
Browse files Browse the repository at this point in the history
  • Loading branch information
David Koppstein committed Apr 17, 2023
1 parent 3beb258 commit 7c4b41b
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
5 changes: 3 additions & 2 deletions snakePipes/shared/rules/three_prime_seq.snakefile
Expand Up @@ -150,7 +150,7 @@ rule preprocess_cluster_pas:
input:
find_replicates_cluster_pas
output:
temp("three_prime_seq/tmp/condition-{condition}_preprocessed.txt")
"three_prime_seq/tmp/condition-{condition}_preprocessed.txt"
shell:
"cat {input} | "
"sed '/^[ ]*Chrom/ d' | "
Expand All @@ -162,7 +162,7 @@ rule clusterPAS:
input:
"three_prime_seq/tmp/condition-{condition}_preprocessed.txt"
output:
temp("three_prime_seq/tmp/condition-{condition}_clusterPAS_tmpdb.txt")
"three_prime_seq/tmp/condition-{condition}_clusterPAS_tmpdb.txt"
conda:
CONDA_SHARED_ENV
params:
Expand All @@ -175,6 +175,7 @@ rule clusterPAS:
# awk command: remove entries with multiple genes in 4th column (must be unambiguous)
# python script: add "_1", "_2", to each cluster label (4th column) to make each
# unique for each genomic position
# also strip header of "bedlike" file
rule postprocess_cluster_pas:
input:
"three_prime_seq/tmp/condition-{condition}_clusterPAS_tmpdb.txt"
Expand Down
8 changes: 3 additions & 5 deletions snakePipes/shared/tools/three_prime_seq/dedup_clusterPAS.py
Expand Up @@ -28,11 +28,9 @@ def dedup(sub_df):


def main():
df = pd.read_table(sys.stdin, index_col=None, header=None)
df.columns = HEADERS
df = df.groupby("Gene").apply(dedup)
df.to_csv(sys.stdout, sep="\t", index=False)

df = pd.read_table(sys.stdin, index_col=None, header=0)
df = df.groupby("Gene", group_keys=False).apply(dedup).reset_index(drop=True)
df.to_csv(sys.stdout, sep="\t", index=False, header=False)

if __name__ == "__main__":
main()

0 comments on commit 7c4b41b

Please sign in to comment.