In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# needed to call snakemake from jupyter notebook
import nest_asyncio
nest_asyncio.apply()

In [3]:
from metagraph_workflows import cli
from pathlib import Path

from metagraph_workflows import workflow_configs

## Building Index

In [4]:
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio import SeqIO

In [5]:
SeqRecord("AGTACACTGGTAGTACACTGGTAGTACACTGGT", '1')

SeqRecord(seq='AGTACACTGGTAGTACACTGGTAGTACACTGGT', id='1', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [6]:
seqs = [
            SeqRecord(Seq("AGTACACTGGTAGTACACTGGTAGTACACTGGT"), id='seq1'),
            SeqRecord(Seq("A"*40), 'seq2'),
            SeqRecord(Seq("TTTCACTCTTTGATAGCAGCATGCTTAGTACTAAGCTAAGTCTCCAAGATTGTCGAGTCAGTCGCTTCATTTCTTCCTACCTGATACTAGTATGACTTGATCCTCCCGCTGCACGTAAAACCACAAAAGATACACTACTTAATTACCAGTAGAAATATACAATCAATGCAGTCATAGAATCGGAGGACAATACTTTGCCAAGCAGGGTTT"), "seq3")
]

In [7]:
fasta_file = Path('/tmp/my.fasta')

In [8]:
with open(fasta_file, "w") as f:
    SeqIO.write(seqs, f, 'fasta')

In [9]:
!cat {fasta_file}

>seq1 <unknown description>
AGTACACTGGTAGTACACTGGTAGTACACTGGT
>seq2 <unknown description>
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
>seq3 <unknown description>
TTTCACTCTTTGATAGCAGCATGCTTAGTACTAAGCTAAGTCTCCAAGATTGTCGAGTCA
GTCGCTTCATTTCTTCCTACCTGATACTAGTATGACTTGATCCTCCCGCTGCACGTAAAA
CCACAAAAGATACACTACTTAATTACCAGTAGAAATATACAATCAATGCAGTCATAGAAT
CGGAGGACAATACTTTGCCAAGCAGGGTTT


In [10]:
sample_list_path = Path('/tmp/paths.txt')

In [11]:
def create_sample_list_path(path_list, out_path):
    with open(out_path, "w") as f:
        f.write('\n'.join([str(p) for p in path_list]))

create_sample_list_path([fasta_file], sample_list_path)

In [12]:
output_dir = Path('/tmp/output_test')
output_dir_primary =  Path('/tmp/output_test_primary')

In [13]:
!cat {sample_list_path}

/tmp/my.fasta

In [14]:
# checking metagraph version
!metagraph --help 2>&1 | head

#############################
### Welcome to MetaGraph! ###
#############################

Metagraph: comprehensive metagenome graph representation -- Version 0.1

Usage: metagraph <command> [command specific options]

Available commands:
	build		construct a graph object from input sequence


In [15]:
cli.run_build_workflow(output_dir_primary, sample_list_path, build_primary_graph=True, force=True, k=5, verbose=False, 
                       annotation_labels_source=workflow_configs.AnnotationLabelsSource.SEQUENCE_HEADERS)

Building DAG of jobs...
Using shell: /usr/local/bin/bash
Provided cores: 1 (use --cores to define parallelism)
Rules claiming more threads will be scaled down.
Job counts:
	count	jobs
	1	all
	1	annotate
	1	annotate_row_diff_brwt
	1	build_joint_graph
	1	build_joint_primary
	1	generate_column_list
	1	primarize_joint_graph
	1	relax_row_diff_brwt
	1	transform_rd_stage0
	1	transform_rd_stage1
	1	transform_rd_stage2
	11
	count	jobs
	1	all
	1	annotate
	1	annotate_row_diff_brwt
	1	build_joint_graph
	1	build_joint_primary
	1	generate_column_list
	1	primarize_joint_graph
	1	relax_row_diff_brwt
	1	transform_rd_stage0
	1	transform_rd_stage1
	1	transform_rd_stage2
	11

2021-10-19 11:50:40,274 - INFO: 
[Tue Oct 19 11:50:40 2021]
2021-10-19 11:50:40,276 - INFO: [Tue Oct 19 11:50:40 2021]
rule build_joint_graph:
    input: /tmp/paths.txt
    output: /tmp/output_test_primary/graph_canonical.dbg
    log: /tmp/output_test_primary/logs/build_joint_graph.log
    jobid: 3
    resources: mem_mb=4048, disk_mb


2021-10-19 11:50:42,797 - INFO: 
[Tue Oct 19 11:50:43 2021]
2021-10-19 11:50:43,098 - INFO: [Tue Oct 19 11:50:43 2021]
Finished job 9.
2021-10-19 11:50:43,100 - INFO: Finished job 9.
7 of 11 steps (64%) done
2021-10-19 11:50:43,101 - INFO: 7 of 11 steps (64%) done

2021-10-19 11:50:43,105 - INFO: 
[Tue Oct 19 11:50:43 2021]
2021-10-19 11:50:43,108 - INFO: [Tue Oct 19 11:50:43 2021]
rule transform_rd_stage2:
    input: /tmp/output_test_primary/graph.dbg, /tmp/output_test_primary/columns.txt, /tmp/output_test_primary/rd_cols/vectors.row_reduction
    output: /tmp/output_test_primary/graph.dbg.anchors, /tmp/output_test_primary/rd_cols/DONE
    log: /tmp/output_test_primary/logs/transform_rd_stage2.log
    jobid: 6
    resources: mem_mb=4048
2021-10-19 11:50:43,111 - INFO: rule transform_rd_stage2:
    input: /tmp/output_test_primary/graph.dbg, /tmp/output_test_primary/columns.txt, /tmp/output_test_primary/rd_cols/vectors.row_reduction
    output: /tmp/output_test_primary/graph.dbg.anchor

True

## Querying Index

In [16]:
!ls {output_dir}

[34mcolumns[m[m                             graph.dbg.succ
columns.txt                         graph.dbg.succ_boundary
graph.dbg                           graph.relax.row_diff_brwt.annodbg
graph.dbg.anchors                   graph.row_diff_brwt.annodbg
graph.dbg.pred                      graph.row_diff_brwt.annodbg.linkage
graph.dbg.pred_boundary             [34mlogs[m[m
graph.dbg.rd_succ                   [34mrd_cols[m[m


In [17]:
graph_path = output_dir / 'graph.dbg'
annotations = output_dir / 'graph.relax.row_diff_brwt.annodbg'

In [18]:
# some "magic" to start a metagraph server on the commandline
import asyncio
import atexit

s = asyncio.create_subprocess_shell(f"metagraph server_query -i {graph_path} -a {annotations}")
process = asyncio.run(s)

def kill_server():
    process.kill()

atexit.register(kill_server) # shutting down server, when jupyter kernel exits

<function __main__.kill_server()>

In [19]:
# https://metagraph.ethz.ch/static/docs/api.html#install-api
from metagraph import client
cl = client.GraphClient('localhost', '5555')

In [20]:
seqs[0].seq

Seq('AGTACACTGGTAGTACACTGGTAGTACACTGGT')

In [21]:
# we should find the sequence back!
cl.search([str(seqs[0].seq), str(seqs[0].seq.reverse_complement())  ])

Unnamed: 0,kmer_count,sample,seq_description
0,29,seq1,0
1,11,seq3,0
2,17,seq3,1
