# Simulated interactions

This notebook generates simulated datasets of phylogenies of interacting
groups of species.

There are two kinds of simulated data : Null data sets, consisting of two randomly generated
trees with random links between leafs, and "perfect" data sets, consisting of identical trees
with leaves linked in a bijection.

In [None]:
## Null datasets

import dendropy
import os

N_min, N_max = 10, 200

for i in xrange(50) :
    
    N = randint( N_min, N_max )
    M = randint( N_min, N_max )
    L = randint( 0.5*( N+M ), 4.0*( N+M ) )

    host_taxa =  [ 'host_'  + str(x) for x in xrange(N) ]
    guest_taxa = [ 'guest_' + str(x) for x in xrange(M) ]
    
    ht = dendropy.simulate.treesim.birth_death_tree( 1.0, 0.5, 
                                                     #birth_rate_sd=0.5, death_rate_sd=0.5,
                                                     ntax=len(host_taxa) )
    
    gt = dendropy.simulate.treesim.birth_death_tree( 1.0, 0.5, 
                                                     #birth_rate_sd=0.5, death_rate_sd=0.5,
                                                     ntax=len(guest_taxa) )
    
    for leaf,name in zip(ht.leaf_node_iter(),host_taxa) :
        leaf.taxon.label = name
    
    for leaf,name in zip(gt.leaf_node_iter(),guest_taxa) :
        leaf.taxon.label = name
    
    lm = zeros( (M,N) )
    for j in xrange(L) :
        lm[ randint(M), randint(N) ] = 1
    
    lmdf = pd.DataFrame( lm, columns=host_taxa, index=guest_taxa, dtype=int )
    
    path = 'data/simulated/null/null' + str(i) +'/'
    if not os.path.exists( path ) :
        os.mkdir( path )

    ht.write( file=open( path + 'host.tree', 'w'),  schema='newick' )
    gt.write( file=open( path + 'guest.tree', 'w'), schema='newick' )
    lmdf.to_csv( path + 'links.csv' )

In [None]:
## "Perfect" datasets

import dendropy
import os

N_min, N_max = 10, 200

for i in xrange(50) :
    
    N = randint( N_min, N_max )
    
    path = 'data/simulated/perfect/perfect' + str(i) +'/'
    if not os.path.exists( path ) :
        os.mkdir( path )
        
    host_taxa =  [ 'host_'  + str(x) for x in xrange(N) ]
    guest_taxa = [ 'guest_' + str(x) for x in xrange(N) ]
    
    t = dendropy.simulate.treesim.birth_death_tree( 1.0, 0.5, 
                                                    #birth_rate_sd=0.5, death_rate_sd=0.5,
                                                    ntax=len(host_taxa) )
        
    for leaf,name in zip(t.leaf_node_iter(),host_taxa) :
        leaf.taxon.label = name
    
    t.write( file=open( path + 'host.tree', 'w'),  schema='newick' )
    
    for leaf,name in zip(t.leaf_node_iter(),guest_taxa) :
        leaf.taxon.label = name
    
    t.write( file=open( path + 'guest.tree', 'w'), schema='newick' )
    
    lm = zeros( (N,N) )
    for j in xrange(N) :
        lm[ j, j ] = 1
    
    lmdf = pd.DataFrame( lm, columns=host_taxa, index=guest_taxa, dtype=int )
    
    lmdf.to_csv( path + 'links.csv' )