Skip to content
This repository has been archived by the owner on Aug 24, 2022. It is now read-only.

Commit

Permalink
Merge pull request #59 from lgoldbach/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
lgoldbach committed Mar 11, 2020
2 parents 8229e0a + 3d7b295 commit 6cab7a0
Show file tree
Hide file tree
Showing 25 changed files with 1,232 additions and 45 deletions.
78 changes: 50 additions & 28 deletions index_generator/index_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ def fetch_indices(self, index_type):
self.indices[index_type.pk]['i7'] = self.to_list(
index_type.format,
index_type.pk,
index_type.read_type,
index_type.indices_i7.all(),
)

if self.mode == 'dual':
self.indices[index_type.pk]['i5'] = self.to_list(
index_type.format,
index_type.pk,
index_type.read_type,
index_type.indices_i5.all(),
)

Expand Down Expand Up @@ -93,14 +95,14 @@ def fetch_pairs(self, index_type, char_coord, num_coord, direction):

for pair in index_pairs:
index1 = self.create_index_dict(
index_type.format, index_type.pk,
index_type.format, index_type.pk, index_type.read_type,
pair.index1.prefix, pair.index1.number,
pair.index1.index, pair.coordinate,
)

if self.mode == 'dual':
index2 = self.create_index_dict(
index_type.format, index_type.pk,
index_type.format, index_type.pk, index_type.read_type,
pair.index2.prefix, pair.index2.number,
pair.index2.index, pair.coordinate,
)
Expand Down Expand Up @@ -149,18 +151,19 @@ def get_pairs(self, index_type_id):
""" Return a list of index pairs for a given index type id. """
return self.pairs.get(index_type_id, [])

def to_list(self, format, index_type, indices):
def to_list(self, format, index_type, read_type, indices):
""" Return a list of index dicts. """
return list(map(lambda x: self.create_index_dict(
format, index_type, x.prefix, x.number, x.index), indices))
format, index_type, read_type, x.prefix, x.number, x.index), indices))

@staticmethod
def create_index_dict(format='', index_type='', prefix='',
def create_index_dict(format='', index_type='', read_type='', prefix='',
number='', index='', coordinate='',
is_library=False):
return {
'format': format,
'index_type': index_type,
'read_type' : read_type,
'prefix': prefix,
'number': number,
'index': index,
Expand Down Expand Up @@ -271,10 +274,15 @@ def validate_index_types(self, records):
raise ValueError('Mixed single/dual indices are not allowed.')
self.mode = 'dual' if is_dual[0] else 'single'

index_read_type = [x.read_type for x in index_types]
if len(set(index_read_type)) != 1:
raise ValueError('Mixed long-read and short-read indices are not allowed')

index_lengths = [x.index_length for x in index_types]
if len(set(index_lengths)) != 1:
raise ValueError('Index Types with mixed index lengths ' +
'are not allowed.')

self.index_length = int(index_lengths[0])

return index_types
Expand Down Expand Up @@ -315,9 +323,10 @@ def generate(self):
tube_samples.append(sample)

# If the number of samples with index type 'plate' is large enough,
# or read_type is "long"
# take pairs in the selected order (don't actually generate them)
if len(plate_samples) > self.MAX_RANDOM_SAMPLES:
pairs = self.find_pairs_fixed(plate_samples)
if len(plate_samples) > self.MAX_RANDOM_SAMPLES or self.samples[0].index_type.read_type == 'long':
pairs = self.find_pairs_fixed(plate_samples, init_index_pairs)
for pair in pairs:
init_index_pairs.append(pair)
init_indices_i7.append(pair[0])
Expand Down Expand Up @@ -347,7 +356,6 @@ def generate(self):

# Find index pairs
pairs = self.find_pairs(plate_samples, depths, init_pairs)

# Extract indices from the pairs
# for pair in pairs[1:]:
for pair in pairs[len(init_pairs):]:
Expand Down Expand Up @@ -398,7 +406,7 @@ def idx_dict(class_model, index, index_type):
index=index, index_type=index_type)
if idx:
idx = self.index_registry.create_index_dict(
index_type.format, index_type.pk, idx[0].prefix,
index_type.format, index_type.pk, index_type.read_type, idx[0].prefix,
idx[0].number, idx[0].index, is_library=True)
else:
idx = self.index_registry.create_index_dict(
Expand Down Expand Up @@ -469,10 +477,10 @@ def find_indices(self, samples, depths, index_group, init_indices):
'single' and not x['is_library']]
return library_indices + plate_indices + \
self.sort_indices(tube_indices)

raise ValueError(f'Could not generate indices "{index_group}" ' +
'for the selected samples.')


def find_index(self, sample, index_group, current_indices, depths):
""" Helper function for `find_indices()`. """
indices_in_result = [x['index'] for x in current_indices]
Expand All @@ -494,20 +502,22 @@ def find_index(self, sample, index_group, current_indices, depths):
indices_in_result, depths, sample)

for index in indices:
converted_index = self.convert_index(index['index'])
scores = self.calculate_scores(
sample, converted_index, color_distribution, total_depth)
avg_score = sum(scores) / self.index_length
if avg_score < result_index['avg_score']:
result_index = {'avg_score': avg_score, 'index': index}
if sample.index_type.read_type == 'long':
result_index = {'avg_score': 999, 'index': index} # don't need to check score
else:
converted_index = self.convert_index(index['index'])
scores = self.calculate_scores(
sample, converted_index, color_distribution, total_depth)
avg_score = sum(scores) / self.index_length
if avg_score < result_index['avg_score']:
result_index = {'avg_score': avg_score, 'index': index}

return result_index

def find_pairs(self, samples, depths, init_pairs):
""" Generate index pairs for given samples. """
if not any(samples):
return init_pairs

pairs = list(init_pairs)

for sample in samples:
Expand Down Expand Up @@ -555,26 +565,32 @@ def find_pair(self, sample, depths, current_pairs):
indices_in_result, depths, sample)

for pair in pairs:
converted_index = self.convert_index(
self._concat_index_pair(pair))
scores = self.calculate_scores(
sample, converted_index, color_distribution, total_depth)
avg_score = sum(scores) / index_length
if avg_score < result_pair['avg_score']:
if sample.index_type.read_type == 'long':
result_pair = {
'avg_score': avg_score,
'avg_score': 999,
'pair': (pair.index1, pair.index2),
}
else:
converted_index = self.convert_index(
self._concat_index_pair(pair))
scores = self.calculate_scores(
sample, converted_index, color_distribution, total_depth)
avg_score = sum(scores) / index_length
if avg_score < result_pair['avg_score']:
result_pair = {
'avg_score': avg_score,
'pair': (pair.index1, pair.index2),
}

return result_pair

def find_pairs_fixed(self, plate_samples):
def find_pairs_fixed(self, plate_samples, init_index_pairs):
"""
Return subsequent index pairs from the Index Registry
starting from the first one.
"""
result = []

indices_in_result = [(x[0]['index'], x[1]['index']) for x in init_index_pairs]
# Group by index type
samples_dict = OrderedDict()
for sample in plate_samples:
Expand All @@ -584,6 +600,14 @@ def find_pairs_fixed(self, plate_samples):

for index_type_id, samples in samples_dict.items():
pairs = self.index_registry.get_pairs(index_type_id)
# ensure uniqueness
if self.mode == 'single':
pairs = [
x for x in pairs
if (x.index1['index'], x.index2['index']) not in indices_in_result
]
if len(samples) > len(pairs):
raise IndexError(f'Not enough indices of type {sample.index_type} for given number of samples')
for i, sample in enumerate(samples):
pair = pairs[i]
result.append((pair.index1, pair.index2))
Expand All @@ -594,15 +618,13 @@ def calculate_color_distribution(self, indices, sequencing_depths, sample):
total_depth = 0
index_length = len(indices[0])
color_distribution = [{'G': 0, 'R': 0} for _ in range(index_length)]

for i, index in enumerate(indices):
idx = self.convert_index(index)
for cycle in range(index_length):
color = idx[cycle]
color_distribution[cycle][color] += sequencing_depths[i]
total_depth += sequencing_depths[i]
total_depth += sample.sequencing_depth

return color_distribution, total_depth

def calculate_scores(self, current_sample, current_converted_index,
Expand Down
25 changes: 25 additions & 0 deletions library/migrations/0002_auto_20200227_1634.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.28 on 2020-02-27 15:34
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('library', '0001_initial'),
]

operations = [
migrations.AlterField(
model_name='library',
name='index_i5',
field=models.CharField(blank=True, max_length=24, null=True, verbose_name='Index I5'),
),
migrations.AlterField(
model_name='library',
name='index_i7',
field=models.CharField(blank=True, max_length=24, null=True, verbose_name='Index I7'),
),
]
2 changes: 1 addition & 1 deletion library_sample_shared/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class IndexTypeAdmin(admin.ModelAdmin):

fieldsets = (
(None, {
'fields': ('name', 'index_length', 'format', 'is_dual',
'fields': ('name', 'read_type', 'index_length', 'format', 'is_dual',
'indices_i7', 'indices_i5',),
}),
)
Expand Down
20 changes: 20 additions & 0 deletions library_sample_shared/migrations/0007_auto_20200207_1056.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.28 on 2020-02-07 09:56
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('library_sample_shared', '0006_auto_20190109_1432'),
]

operations = [
migrations.AlterField(
model_name='barcodecounter',
name='year',
field=models.PositiveSmallIntegerField(default=2020, unique=True),
),
]
20 changes: 20 additions & 0 deletions library_sample_shared/migrations/0008_auto_20200220_1152.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.28 on 2020-02-20 10:52
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('library_sample_shared', '0007_auto_20200207_1056'),
]

operations = [
migrations.AlterField(
model_name='indextype',
name='index_length',
field=models.CharField(choices=[('6', '6'), ('8', '8'), ('24', '24')], default='8', max_length=2, verbose_name='Index Length'),
),
]
30 changes: 30 additions & 0 deletions library_sample_shared/migrations/0009_auto_20200220_1547.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.28 on 2020-02-20 14:47
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('library_sample_shared', '0008_auto_20200220_1152'),
]

operations = [
migrations.AddField(
model_name='indextype',
name='read_type',
field=models.CharField(choices=[('short', 'short read'), ('long', 'long read')], default='short', max_length=11, verbose_name='Read Type'),
),
migrations.AlterField(
model_name='indexi5',
name='index',
field=models.CharField(max_length=24, verbose_name='Index'),
),
migrations.AlterField(
model_name='indexi7',
name='index',
field=models.CharField(max_length=24, verbose_name='Index'),
),
]
20 changes: 20 additions & 0 deletions library_sample_shared/migrations/0010_auto_20200306_1606.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.28 on 2020-03-06 15:06
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('library_sample_shared', '0009_auto_20200220_1547'),
]

operations = [
migrations.AlterField(
model_name='indextype',
name='index_length',
field=models.CharField(choices=[('6', '6'), ('8', '8'), ('10', '10'), ('12', '12'), ('14', '14'), ('16', '16'), ('18', '18'), ('20', '20'), ('22', '22'), ('24', '24')], default='8', max_length=2, verbose_name='Index Length'),
),
]
20 changes: 20 additions & 0 deletions library_sample_shared/migrations/0011_auto_20200309_1644.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.28 on 2020-03-09 15:44
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('library_sample_shared', '0010_auto_20200306_1606'),
]

operations = [
migrations.AlterField(
model_name='indextype',
name='index_length',
field=models.CharField(choices=[('6', '6'), ('8', '8'), ('10', '10'), ('12', '12'), ('24', '24')], default='8', max_length=2, verbose_name='Index Length'),
),
]
25 changes: 25 additions & 0 deletions library_sample_shared/migrations/0012_auto_20200310_1107.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.28 on 2020-03-10 10:07
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('library_sample_shared', '0011_auto_20200309_1644'),
]

operations = [
migrations.AlterField(
model_name='indexi5',
name='index',
field=models.CharField(max_length=4, verbose_name='Index'),
),
migrations.AlterField(
model_name='indexi7',
name='index',
field=models.CharField(max_length=4, verbose_name='Index'),
),
]

0 comments on commit 6cab7a0

Please sign in to comment.