-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Issue 11 #20
Issue 11 #20
Changes from 2 commits
9558159
69fda54
67743a3
8470760
22e05bb
ee0e066
f80736e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,28 +27,40 @@ | |
TEMPLATES = pkg_resources.resource_filename('q2_intervention', 'assets') | ||
|
||
|
||
def _check_inputs(state_1, state_2): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you rename this, maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I intend to validate more inputs in a future PR so will use |
||
if state_1 == state_2: | ||
raise ValueError(( | ||
'You have chosen the same value for state_1 and state_2. These ' | ||
'parameters must be given different values.')) | ||
|
||
|
||
def _get_group_pairs(df, group_value, individual_id_column='SubjectID', | ||
group_column='Group', state_column='time_point', | ||
state_values=['1', '2'], drop_duplicates=True): | ||
state_values=['1', '2'], drop_replicates='error'): | ||
results = [] | ||
group_members = df[group_column] == group_value | ||
group_md = df[group_members] | ||
for individual_id in set(group_md[individual_id_column]): | ||
result = [] | ||
for state_value in state_values: | ||
state_value = df[state_column].dtype.type(state_value) | ||
individual_id = \ | ||
df[individual_id_column].dtype.type(individual_id) | ||
individual_id = df[individual_id_column].dtype.type(individual_id) | ||
_state = df[state_column] == state_value | ||
_ind = df[individual_id_column] == individual_id | ||
individual_at_state_idx = group_md[_state & _ind].index | ||
if len(individual_at_state_idx) > 1: | ||
print("Multiple values for {0} {1} at {2} {3} ({4})".format( | ||
individual_id_column, individual_id, state_column, | ||
state_value, ' '.join(map(str, individual_at_state_idx)))) | ||
if drop_duplicates: | ||
if drop_replicates == 'error': | ||
raise ValueError(( | ||
'Replicate values for individual {0} at state {1}. ' | ||
'Remove replicate values from input files or set ' | ||
'drop_replicates parameter to select how replicates ' | ||
'are handled.')) | ||
elif drop_replicates == 'drop': | ||
break | ||
else: | ||
elif drop_replicates == 'random': | ||
individual_at_state_idx = [choice(individual_at_state_idx)] | ||
elif len(individual_at_state_idx) == 0: | ||
print("No values for {0} {1} at {2} {3}".format( | ||
|
@@ -355,7 +367,7 @@ def _visualize(output_dir, multiple_group_test=False, pairwise_tests=False, | |
def _stats_and_visuals(output_dir, pairs, metric, group_column, | ||
state_column, state_1, state_2, | ||
individual_id_column, parametric, palette, | ||
drop_duplicates, | ||
drop_replicates, | ||
multiple_group_test=True, pairwise_tests=True, | ||
paired_difference_tests=True, boxplot=True): | ||
# kruskal test or ANOVA between groups | ||
|
@@ -381,10 +393,10 @@ def _stats_and_visuals(output_dir, pairs, metric, group_column, | |
|
||
summary = pd.Series( | ||
[metric, group_column, state_column, state_1, state_2, | ||
individual_id_column, parametric, drop_duplicates], | ||
individual_id_column, parametric, drop_replicates], | ||
index=['Metric', 'Group column', 'State column', 'State 1', | ||
'State 2', 'Individual ID column', 'Parametric', | ||
'Drop duplicates'], | ||
'Drop replicates'], | ||
name='Paired difference tests') | ||
|
||
_visualize(output_dir, multiple_group_test, pairwise_tests, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,7 +44,8 @@ | |
'state_1': Str, | ||
'state_2': Str, | ||
'parametric': Bool, | ||
'drop_duplicates': Bool, | ||
'drop_replicates': Str % Choices( | ||
['error', 'random', 'drop', 'mean', 'median']), | ||
} | ||
|
||
base_parameter_descriptions = { | ||
|
@@ -55,9 +56,9 @@ | |
'across which samples are paired.'), | ||
'individual_id_column': ( | ||
'Metadata column containing subject IDs to use for pairing ' | ||
'samples. WARNING: if duplicates exist for an individual ID at ' | ||
'samples. WARNING: if replicates exist for an individual ID at ' | ||
'either state_1 or state_2, that subject will be dropped and ' | ||
'reported in standard output by default. Set duplicates="ignore" ' | ||
'reported in standard output by default. Set replicates="ignore" ' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
'to instead randomly select one member, and use --verbose to list ' | ||
'conflicts.'), | ||
'palette': 'Color palette to use for generating boxplots.', | ||
|
@@ -72,10 +73,13 @@ | |
'parametric': ('Perform parametric (ANOVA and t-tests) or non-' | ||
'parametric (Kruskal-Wallis, Wilcoxon, and Mann-' | ||
'Whitney U tests) statistical tests.'), | ||
'drop_duplicates': ( | ||
'If True, will discard all subject IDs with duplicate samples ' | ||
'at either state_1 or state_2. If False, will instead ' | ||
'choose one representative at random from among duplicates.') | ||
'drop_replicates': ( | ||
'Choose how replicate samples are handled. If replicates are ' | ||
'detected, "error" causes method to fail; "drop" will discard all ' | ||
'subject IDs with replicate samples at either state_1 or state_2; ' | ||
'"random" chooses one representative at random from among ' | ||
'replicates; "mean" and "median" compute average values across ' | ||
'replicates.') | ||
} | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this should be called
drop_replicates
- maybereplicate_handling
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍