Skip to content

Commit

Permalink
[MRG] Document sourmash.figusage and behavior (#859)
Browse files Browse the repository at this point in the history
* add an initial notebook showing sourmash.fig stuff

* add reordered matrix output option to sourmash plot with --csv

* add reference to new notebook

* update notebook

* add test for plot --csv
  • Loading branch information
ctb committed Jul 22, 2020
1 parent 468e02c commit f8d0262
Show file tree
Hide file tree
Showing 7 changed files with 396 additions and 7 deletions.
6 changes: 6 additions & 0 deletions doc/api-example.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ data/GCF_000783305.1 0.0 0.0 1.0
Note that the comparisons are quite quick; most of the time is spent in
making the minhashes, which can be saved and loaded easily.

## Plotting dendrograms and matrices

If you're interested in building comparison matrices and dendrograms,
please see the notebook
[Building plots from `sourmash compare` output](plotting-compare.md).

## Saving and loading signature files

Signature files encapsulate MinHashes in JSON, and provide a way to
Expand Down
330 changes: 330 additions & 0 deletions doc/plotting-compare.ipynb

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions doc/tutorials.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ If you are a Python programmer, you might also be interested in our [API example

If you prefer R, we have [a short guide to using sourmash output with R](other-languages.md).

## Customizing matrix and dendrogram plots in Python

If you're interested in customizing the output of `sourmash plot`,
which produces comparison matrices and dendrograms, please see
[Building plots from `sourmash compare` output](plotting-compare.md).

## Contents:

```{toctree}
Expand Down
5 changes: 5 additions & 0 deletions sourmash/cli/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def subparser(subparsers):
subparser.add_argument(
'--output-dir', metavar='DIR', help='directory for output plots'
)
subparser.add_argument(
'--csv', metavar='F',
help='write clustered matrix and labels out in CSV format (with column'
' headers) to this file'
)


def main(args):
Expand Down
15 changes: 14 additions & 1 deletion sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def plot(args):
notify('wrote dendrogram to: {}', dendrogram_out)

### make the dendrogram+matrix:
fig = sourmash_fig.plot_composite_matrix(D, labeltext,
(fig, rlabels, rmat) = sourmash_fig.plot_composite_matrix(D, labeltext,
show_labels=args.labels,
show_indices=args.indices,
vmin=args.vmin,
Expand All @@ -263,6 +263,19 @@ def plot(args):
for i, name in enumerate(labeltext):
print_results('{}\t{}', i, name)

# write out re-ordered matrix and labels
if args.csv:
with FileOutput(args.csv, 'wt') as csv_fp:
w = csv.writer(csv_fp)
w.writerow(rlabels)

for i in range(len(rlabels)):
y = []
for j in range(len(rlabels)):
y.append('{}'.format(rmat[i][j]))
w.writerow(y)
notify('Wrote clustered matrix and labels out to {}', args.csv)


def import_csv(args):
"Import a CSV file full of signatures/hashes."
Expand Down
14 changes: 8 additions & 6 deletions sourmash/fig.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def plot_composite_matrix(D, labeltext, show_labels=True, show_indices=True,
dendrolabels = [str(i) for i in range(len(labeltext))]

Z1 = sch.dendrogram(Y, orientation='left', labels=dendrolabels,
no_labels=not show_indices)
no_labels=not show_indices, get_leaves=True)
ax1.set_xticks([])

xstart = 0.45
Expand All @@ -57,15 +57,17 @@ def plot_composite_matrix(D, labeltext, show_labels=True, show_indices=True,
xstart = 0.315
scale_xstart = xstart + width + 0.01

# plot matrix
axmatrix = fig.add_axes([xstart, 0.1, width, 0.6])

# (this reorders D by the clustering in Z1)
# re-order labels along rows, top to bottom
idx1 = Z1['leaves']
reordered_labels = [ labeltext[i] for i in reversed(idx1) ]

# reorder D by the clustering in the dendrogram
D = D[idx1, :]
D = D[:, idx1]

# show matrix
axmatrix = fig.add_axes([xstart, 0.1, width, 0.6])

im = axmatrix.matshow(D, aspect='auto', origin='lower',
cmap=pylab.cm.YlGnBu, vmin=vmin, vmax=vmax)
axmatrix.set_xticks([])
Expand All @@ -75,4 +77,4 @@ def plot_composite_matrix(D, labeltext, show_labels=True, show_indices=True,
axcolor = fig.add_axes([scale_xstart, 0.1, 0.02, 0.6])
pylab.colorbar(im, cax=axcolor)

return fig
return fig, reordered_labels, D
27 changes: 27 additions & 0 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,33 @@ def test_plot_override_labeltext_fail():
assert '3 labels != matrix size, exiting' in err


@utils.in_tempdir
def test_plot_reordered_labels_csv(c):
files = utils.get_test_data('demo/*.sig')
files = glob.glob(files)
files.sort()
assert len(files) == 7

c.run_sourmash('compare', '-o', 'cmp', *files)
c.run_sourmash('plot', 'cmp', '--csv', 'neworder.csv')

with open(c.output('neworder.csv'), 'rt') as fp:
out_mat = fp.readlines()

# turns out to be hard to guarantee output order, so... just make sure
# matrix labels are in different order than inputs!

header = out_mat[0].strip().split(',')

files = [ os.path.basename(x)[:-4] + '.fastq.gz' for x in files ]

print(files)
print(header)

assert set(files) == set(header) # same file names...
assert files != header # ...different order.


def test_plot_subsample_1():
with utils.TempDirectory() as location:
testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
Expand Down

0 comments on commit f8d0262

Please sign in to comment.