Skip to content

Commit

Permalink
Minor: added summary of mapping & percentage of broken sites
Browse files Browse the repository at this point in the history
  • Loading branch information
krassowski committed Jan 12, 2018
1 parent 2c143df commit d69bd78
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
3 changes: 2 additions & 1 deletion website/imports/sites/site_importer.py
Expand Up @@ -198,7 +198,8 @@ def map_sites_to_isoforms(self, sites: DataFrame) -> DataFrame:

old_len = len(sites)
sites.dropna(axis=0, inplace=True, subset=['sequence', 'residue'])
print(f'Dropped {old_len - len(sites)} sites due to lack of sequence or residue')
diff = old_len - len(sites)
print(f'Dropped {diff} ({diff/old_len * 100}%) sites due to lack of sequence or residue')

# nothing to map
if sites.empty:
Expand Down
14 changes: 11 additions & 3 deletions website/imports/sites/site_mapper.py
Expand Up @@ -100,12 +100,14 @@ def map_sites_by_sequence(self, sites: DataFrame) -> DataFrame:
"""
print('Mapping sites to isoforms')

mapped_cnt = 0
mapped_sites = []
self.already_warned = set()
self.has_gene_names = 'gene' in sites.columns

for site in tqdm(sites.itertuples(index=False), total=len(sites)):

was_mapped = False
protein = None
positions = {}

Expand All @@ -122,9 +124,6 @@ def map_sites_by_sequence(self, sites: DataFrame) -> DataFrame:
# create rows with sites
for isoform, matched_positions in positions.items():

if not matched_positions:
continue

for position in matched_positions:

# _replace() returns new namedtuple with replaced values;
Expand All @@ -134,6 +133,15 @@ def map_sites_by_sequence(self, sites: DataFrame) -> DataFrame:
position=position
)
mapped_sites.append(new_site)
was_mapped = True

if was_mapped:
mapped_cnt += 1

print(
f'Successfully mapped {mapped_cnt} '
f'({mapped_cnt / len(sites) * 100}%) sites'
)

return DataFrame(mapped_sites)

Expand Down

0 comments on commit d69bd78

Please sign in to comment.