Skip to content

Commit

Permalink
ignore three_prime_seq old scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
David Koppstein committed Apr 18, 2023
1 parent fbe4796 commit f032cb8
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linux.yml
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: flake
run: |
micromamba activate snakePipes_CI
flake8 --ignore=E501,E722,E402 --exclude docs/conf.py .
flake8 --ignore=E501,E722,E402 --exclude docs/conf.py snakePipes/shared/tools/three_prime_seq/* .
CI:
runs-on: ubuntu-latest
steps:
Expand Down
32 changes: 16 additions & 16 deletions snakePipes/shared/tools/three_prime_seq/clusterPAS.py
Expand Up @@ -31,22 +31,22 @@ def cluster_pas(args):
#cluster start, cluster end, max count, gene, annotation, summit
clusters[gene][pas] = [pas, pas + 1, count, gene, annotation, pas, strand, chrom]
else:
newCluster = True
for c in clusters[gene]:
if pas > clusters[gene][c][0] - window and pas < clusters[gene][c][1] + window:
clusters[gene][c][0] = min(pas, clusters[gene][c][0])
clusters[gene][c][1] = max(pas + 1, clusters[gene][c][1])
clusters[gene][c][2] += count
if count > clusters[gene][c][2]:
#clusters[gene][c][2] = count
clusters[gene][c][4] = annotation
clusters[gene][c][5] = pas
newCluster = False
break

#not close to any existing cluster => new cluster
if newCluster:
clusters[gene][pas] = [pas, pas + 1, count, gene, annotation, pas, strand, chrom]
newCluster = True
for c in clusters[gene]:
if pas > clusters[gene][c][0] - window and pas < clusters[gene][c][1] + window:
clusters[gene][c][0] = min(pas, clusters[gene][c][0])
clusters[gene][c][1] = max(pas + 1, clusters[gene][c][1])
clusters[gene][c][2] += count
if count > clusters[gene][c][2]:
#clusters[gene][c][2] = count
clusters[gene][c][4] = annotation
clusters[gene][c][5] = pas
newCluster = False
break

#not close to any existing cluster => new cluster
if newCluster:
clusters[gene][pas] = [pas, pas + 1, count, gene, annotation, pas, strand, chrom]

line = In.readline()

Expand Down
16 changes: 11 additions & 5 deletions snakePipes/shared/tools/three_prime_seq/dedup_clusterPAS.py
Expand Up @@ -5,7 +5,7 @@

"""
Read in post-processed output of clusterPAS from STDIN, add header,
deduplicate the Gene column (4th), write to STDOUT.
deduplicate the Gene column (4th), write to STDOUT.
"""


Expand All @@ -22,15 +22,21 @@


def dedup(sub_df):
new_ids = ["_".join([str(gene), str(i)]) for (i, gene) in enumerate(sub_df['Gene'])]
new_ids = ["_".join([str(gene), str(i)]) for
(i, gene) in enumerate(sub_df['Gene'])]
sub_df['Gene'] = new_ids
return sub_df


def main():
df = pd.read_table(sys.stdin, index_col=None, header=0)
df = df.groupby("Gene", group_keys=False).apply(dedup).reset_index(drop=True)
df = (
df.groupby("Gene", group_keys=False)
.apply(dedup)
.reset_index(drop=True)
)
df.to_csv(sys.stdout, sep="\t", index=False, header=False)


if __name__ == "__main__":
main()
main()

0 comments on commit f032cb8

Please sign in to comment.