Skip to content

Commit

Permalink
Add expression-level filter
Browse files Browse the repository at this point in the history
  • Loading branch information
kpj committed Feb 14, 2016
1 parent 04ba0ed commit 2d2fd0d
Showing 1 changed file with 19 additions and 8 deletions.
27 changes: 19 additions & 8 deletions scripts/rare_codon_positions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from fasta_parser import FastaParser
from sequence_analyzer import DNAAnalyzer
from utils import find_all_positions
from gene_expression_analysis import extract_expression_levels, group_expression_levels


def get_rare_codons(codu):
Expand All @@ -37,7 +38,7 @@ def get_codon_positions(codons, genes):
res[c].extend([p/len(g.seq) for p in pos])
return res

def plot_positions(positions):
def plot_positions(positions, label):
""" Plot given positions as histogram
"""
pos = list(itertools.chain(*positions))
Expand All @@ -46,7 +47,7 @@ def plot_positions(positions):
pos, np.arange(0, 1.0001, 0.01),
facecolor='khaki')

plt.title('Rare codon position overview')
plt.title('Rare codon position overview (%s)' % label)
plt.xlabel('relative position in gene')
plt.ylabel('count')

Expand All @@ -55,24 +56,34 @@ def plot_positions(positions):
plt.savefig('rarest_codon_positions.pdf')
#plt.show()

def get_codu(genes, group):
""" Compute codon usage for all genes or only for certain expression group if file is given
"""
exprs = extract_expression_levels(sys.argv[2]) if len(sys.argv) == 3 else None
groups = {'all': genes} if exprs is None else group_expression_levels(genes, exprs)
select = 'all' if exprs is None else group

dnana = DNAAnalyzer(strict=False)
codu = dnana.get_avg_codon_usage(groups[select])

return codu, select

def main():
""" Generate overview
"""
farser = FastaParser(sys.argv[1])
genes = farser.parse()

dnana = DNAAnalyzer(strict=False)
codu = dnana.get_avg_codon_usage(genes)

codu, label = get_codu(genes, 'weak')
rarest = get_rare_codons(codu)
pos = get_codon_positions(rarest.values(), genes)

plot_positions(pos.values())
plot_positions(pos.values(), label)


if __name__ == '__main__':
if len(sys.argv) != 2:
print('Usage: %s <fasta file>' % sys.argv[0])
if len(sys.argv) != 2 and len(sys.argv) != 3:
print('Usage: %s <fasta file> [expression file]' % sys.argv[0])
sys.exit(1)

main()

0 comments on commit 2d2fd0d

Please sign in to comment.