Skip to content

Commit

Permalink
Merge b7b1868 into 7d07db8
Browse files Browse the repository at this point in the history
  • Loading branch information
jvivian committed May 30, 2019
2 parents 7d07db8 + b7b1868 commit d044bdf
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 11 deletions.
13 changes: 4 additions & 9 deletions gene_outlier_detection/lib.py
Expand Up @@ -94,7 +94,7 @@ def anova_distances(
df: pd.DataFrame,
genes: List[str],
group: str = "tissue",
n_genes=2000,
percent_genes=0.10,
):
"""
Calculates distance to each group via pairwise distance using top N ANOVA genes
Expand All @@ -104,19 +104,14 @@ def anova_distances(
df: background dataset
genes: genes to use for pairwise distance
group: Column to use as class discriminator
n_genes: Number of ANOVA genes to use
percent_genes: Percent of ANOVA genes to use for pairwise distance
Returns:
DataFrame of pairwise distances
"""
click.echo(f"Ranking background datasets by {group} via ANOVA")
if n_genes >= len(genes):
click.secho(
f"# of ANOVA genes {n_genes} greater than {len(genes)}", fg="yellow"
)
skb_genes = genes
else:
skb_genes = select_k_best_genes(df, genes, n=n_genes)
n_genes = int(percent_genes * len(genes))
skb_genes = select_k_best_genes(df, genes, n=n_genes)
dist = pairwise_distances(np.array(sample[skb_genes]).reshape(1, -1), df[skb_genes])
dist = pd.DataFrame([dist.ravel(), df["tissue"]]).T
dist.columns = ["Distance", "Group"]
Expand Down
3 changes: 1 addition & 2 deletions tests/test_gene_outlier_detection.py
Expand Up @@ -106,9 +106,8 @@ def test_anova_distances(load_data):

sample, df, genes = load_data
dist = anova_distances(sample, df, genes)
print(dist)
assert list(dist.Group) == ["Thyroid", "Brain"]
assert [int(x) for x in dist.MedianDistance] == [57, 131]
assert [int(x) for x in dist.MedianDistance] == [63, 142]


def test_run_model(model_output):
Expand Down

0 comments on commit d044bdf

Please sign in to comment.