In [7]:
import synthetic;
import json
pathToMotifs = "motifs.txt";
loadedMotifs = synthetic.LoadedEncodeMotifs(pathToMotifs, pseudocountProb=0.001)

ctcfSampler = synthetic.PwmSamplerFromLoadedMotifs(loadedMotifs,motifName="CTCF_known1")


In [8]:
spi1Sampler = synthetic.BestHitPwmFromLoadedMotifs(loadedMotifs,motifName="SPI1_known1", bestHitMode="logOdds")

singleCtcfEmbedder = synthetic.SubstringEmbedder(substringGenerator=ctcfSampler
                                                ,positionGenerator=synthetic.UniformPositionGenerator())

In [9]:
singleSpi1Embedder = synthetic.SubstringEmbedder(substringGenerator=spi1Sampler
                                                ,positionGenerator=synthetic.OutsideCentralBp(400))

truncatedPoissonGenerator=synthetic.MinMaxWrapper(quantityGenerator=synthetic.PoissonQuantityGenerator(3)
                                                    ,theMin=1,theMax=5)

In [10]:
zeroInflater = synthetic.ZeroInflater(quantityGenerator=synthetic.PoissonQuantityGenerator(5), zeroProb=0.5)

In [12]:
singleIRFembedder = synthetic.SubstringEmbedder(
                substringGenerator=synthetic.BestHitPwmFromLoadedMotifs(loadedMotifs,motifName="IRF_known1",bestHitMode="logOdds")
                ,positionGenerator=synthetic.UniformPositionGenerator())

embedInBackground = synthetic.EmbedInABackground(
    backgroundGenerator=synthetic.ZeroOrderBackgroundGenerator(seqLength=500) 
    , embedders=[
        synthetic.RepeatedEmbedder(
            embedder=singleCtcfEmbedder
            ,quantityGenerator=truncatedPoissonGenerator
        ),
        synthetic.XOREmbedder(
            embedder1=singleSpi1Embedder
            ,embedder2=singleIRFembedder
            ,probOfFirst=0.5 
        )
    ]
);

In [14]:
sequenceSet = synthetic.GenerateSequenceNTimes(embedInBackground, 10)
print json.dumps(sequenceSet.getJsonableObject(), indent=4)

{
    "numSeq": 10, 
    "singleSequenceGenerator": {
        "class": "EmbedInABackground", 
        "namePrefix": "synth", 
        "backgroundGenerator": {
            "class": "zeroOrderMarkovBackground", 
            "length": 500, 
            "distribution": {
                "A": 0.3, 
                "C": 0.2, 
                "G": 0.2, 
                "T": 0.3
            }
        }, 
        "embedders": [
            {
                "class": "RepeatedEmbedder", 
                "embedder": {
                    "substringGenerator": {
                        "class": "PwmSampler", 
                        "motifName": "CTCF_known1", 
                        "loadedMotifs": {
                            "fileName": "motifs.txt", 
                            "pseudocountProb": 0.001, 
                            "background": {
                                "A": 0.3, 
                                "C": 0.2, 
                                "G": 0.2, 
                 

In [17]:
for generatedSeq in sequenceSet.generateSequences():
    #print generatedSeq.seqName, generatedSeq.seq, " | ".join(str((x.what, x.startPos)) for x in generatedSeq.embeddings)
    print generatedSeq.seqName, " | ".join(str((x.what, x.startPos)) for x in generatedSeq.embeddings)

synth12 ('CTTCCACTAGGTGGCGGCA', 153) | ('ATTCCACAAGGTGGCGTTA', 231) | ('CCACCACGAGGGGGCGGCC', 67) | ('AGAGGAAG', 481)
synth13 ('TCGCCACCAGGGGGCGGTC', 27) | ('GAAAAGCGAAACC', 120)
synth14 ('TAACCAGTAGGGGGCGGTG', 58) | ('GAAAAGCGAAACC', 279)
synth15 ('TGGCCAGGAGGTGGAACCT', 350) | ('GAGCCACCAGGTGGCGCTC', 115) | ('AGAGGAAG', 22)
synth16 ('TGACCACCAGGGGGCTACC', 463) | ('CGTCCACTAGGCGGCGGAC', 57) | ('AGAGGAAG', 5)
synth17 ('TAGCCAGAAGAGGGCGCTG', 22) | ('AGAGGAAG', 483)
synth18 ('GGACCTGTAGGGGACGCCC', 6) | ('ATTCCAGTAGGGGTCGCCC', 431) | ('GAAAAGCGAAACC', 189)
synth19 ('AGACCACTGGGTGTCAGAG', 234) | ('ACGCCACCAGGGGGCAGCG', 280) | ('TCACCAGTAGAGGGCGGTA', 325) | ('TGGCCAGCAGGGGACACCG', 24) | ('CCGCCAGCAGAGGGCGCTC', 421) | ('GAAAAGCGAAACC', 90)
synth20 ('CTTCCAGCAGAGGGCAGTG', 118) | ('TCTACACCAGGTGGCGCTA', 411) | ('TCGCCGCCAGGGGGCGCTA', 245) | ('GAAAAGCGAAACC', 451)
synth21 ('TAACCAGTAGGGGGCAGTA', 302) | ('TAGCCACGAGAGGGCGCCC', 466) | ('AGAGGAAG', 10)


In [8]:
str(loadedMotifs.__class__)

"<class 'synthetic.LoadedEncodeMotifs'>"