# Sedge grasses and smut fungi

This notebook rebuilds the co-phylogeny of sedge grasses and their smut fungi 
parasites found in [*Phylogenetic congruence of parasitic smut fungi 
(Anthracoidea, Anthracoideaceae) and their host plants (Carex, Cyperaceae): 
Cospeciation or host-shift speciation?*](http://www.amjbot.org/content/102/7/1108.full),
by Marcial Escudero.



In [7]:
sedge_outgroup = { 
    'Dulichium arundinacea'    : [ 'DQ998949', 'DQ998895', 'DQ999002' ],
    'Eriophorum angustifolium' : [ 'DQ998950', 'DQ998896', 'DQ999003' ],
    'Eriophorum vaginatum'     : [ 'AY242008', 'AY242009', 'AY757692' ],
    'Trichophorum alpinum'     : [ 'AY757432', 'AY757400', 'AY757496' ],
    'Trichophorum cespitosum'  : [ 'DQ998951', 'DQ998897', 'DQ999004' ],
    }

sedge_ingroup = {
    'Unicinia rubra'       : [ 'AY012628', 'AY012629', 'AY115852' ],
    'Carex rupestris'      : [ 'AY244521', 'AY244522', 'AF164934' ],
    'Kobresia myosuroides' : [ 'AY242036', 'AY242037', 'AY757566', 'AF284985' ],
    'Carex curvula'        : [ 'AY242030', 'AY242031', 'AY757564' ],
    'Carex baldensis'      : [ 'GU176152', 'EF363121', 'GU176107' ],
    'Carex chordorrhiza'   : [ 'AY757409', 'AY757389', 'AY757485' ],
    'Carex heleonastes'    : [ 'AY757418', 'AY757388', 'AY757484' ],
    'Carex brunnescens'    : [ 'DQ115114', 'DQ115115', 'AY757481' ],
    'Carex lachenalii'     : [ 'EU288556', 'EU288440' ],
    'Carex arenaria'       : [ 'DQ115100', 'DQ115101' ],
    'Carex dioica'         : [ 'DQ115146', 'DQ115147', 'AF191816' ],
    'Carex davalliana'     : [ 'EU288548', 'EU288432' ],
    'Carex parallela'      : [ 'EU001046', 'EU001195', 'GQ244737' ],
    'Carex paniculata'     : [ 'EU288562', 'DQ115237', 'EU288446' ],
    'Carex echinata'       : [ 'AY757415', 'AY757381', 'AY757477' ],
    'Carex muricata'       : [ 'AF285036', 'AF284934' ],
    'Carex ornithopoda'    : [ 'AY278269', 'KR857280' ],
    'Carex panicea'        : [ 'DQ998937', 'DQ998883', 'DQ998990' ],
    'Carex meadii'         : [ 'KR857284', 'KR857281' ],
    'Carex atrofusca'      : [ 'AY278313', 'AM085581' ],
    'Carex flacca'         : [ 'DQ998915', 'DQ998862', 'DQ998968' ],
    'Carex buxbaumii'      : [ 'EU288545', 'EU288428' ],
    'Carex digitata'       : [ 'AY757624', 'AY757684', 'AY757552' ],
    'Carex bigelowii'      : [ 'GQ223580', 'GQ223498', 'GQ244699' ],
    'Carex ferruginea'     : [ 'EU288550', 'EU288434' ],
    'Carex glacilalis'     : [ 'AY757625', 'AY757685', 'AY757553' ],
    'Carex pilulifera'     : [ 'AF284975', 'AY325438', 'AF284873' ],
    'Carex alba'           : [ 'AY278259', 'KR857282' ],
    'Carex firma'          : [ 'AF284995', 'AF284893' ],
    'Carex sempervirens'   : [ 'EU288565', 'EU288449' ],
    'Carex capillaris'     : [ 'DQ998905', 'DQ998852', 'DQ998958' ],
    'Carex limosa'         : [ 'AY757595', 'AY757656', 'AY757522' ],
    'Carex magellanica'    : [ 'AY278292', 'AY757655', 'AY757521' ],
    'Carex hostiana'       : [ 'EU288555', 'JX409830', 'EU288439' ],
    'Carex hirta'          : [ 'EU288553', 'EU288437' ],
    'Carex elata'          : [ 'AY770470', 'AY770440', 'HM590257' ],
    'Carex globularis'     : [ 'AF285049', 'AF284947' ],
    'Carex vesicaria'      : [ 'AY278289', 'KR857283' ],
    'Carex rostrata'       : [ 'EU288564', 'EU288448' ],
    'Carex riparia'        : [ 'AY757571', 'AY757633', 'AY757498' ],
    'Carex lasiocarpa'     : [ 'DQ998925', 'DQ998872', 'DQ998978' ]
    }

In [3]:
smut_outgroup = {
    'Sporisorium monakai'           : [ 'AY740161' ], 
    'Sporisorium pseudechinolaenae' : [ 'AY740139' ], 
    'Sporisorium manilense'         : [ 'AY740112' ],
    'Shivasia solida'               : [ 'JF966729' ],
    'Shivasia solida'               : [ 'JF966730' ]
    }
    
smut_ingroup = {
    'Anthracoidea caricis-meadii' : [ 'JN863083' ], 
    'Anthracoidea carphae'        : [ 'AY563614', 'DQ875359' ], 
    'Anthracoidea sclerotiformis' : [ 'AY563613', 'DQ363331' ],
    'Anthracoidea curvulae'       : [ 'AY563612', 'AY563611' ],
    'Anthracoidea elynae'         : [ 'AY563610', 'AY563609' ],
    'Anthracoidea sp 1'           : [ 'AY563608' ],
    'Anthracoidea aspera'         : [ 'AY563607' ],
    'Anthracoidea arenaria'       : [ 'AY563606' ],
    'Anthracoidea inclusa'        : [ 'AY563605' ],
    'Anthracoidea subinclusa'     : [ 'AY563604', 'AY563603', 'AY563602' ],
    'Anthracoidea heterospora'    : [ 'AY563601', 'AY563600' ],
    'Anthracoidea baldensis'      : [ 'AY563599' ],
    'Anthracoidea rupestris'      : [ 'AY563598' ],
    'Anthracoidea vankyi'         : [ 'AY563597' ],
    'Anthracoidea capillaris'     : [ 'AY563596' ],
    'Anthracoidea caricis-albae'  : [ 'AY563595', 'AY563594' ],
    'Anthracoidea globularis'     : [ 'AY563593' ],
    'Anthracoidea irregularis'    : [ 'AY563592', 'AY563591', 'AY563590' ],
    'Anthracoidea caricis'        : [ 'AY563589' ],
    'Anthracoidea sp 2'           : [ 'AY563588' ],
    'Anthracoidea sempervirentis' : [ 'AY563587', 'AY563586', 'AY563585' ],
    'Anthracoidea misandrae'      : [ 'AY563584' ],
    'Anthracoidea lasiocarpae'    : [ 'AY563583' ],
    'Anthracoidea buxbaumii'      : [ 'AY563582' ],
    'Anthracoidea hostianae'      : [ 'AY563581' ],
    'Anthracoidea paniceae'       : [ 'AY563580' ],
    'Anthracoidea karii'          : [ 'AY563579', 'AY563578', 'AY563577', 'AY563576',
                                      'AY563575', 'AY563574', 'DQ875358' ],
    'Anthracoidea limosa'         : [ 'AY563573', 'AY563572' ],
    'Anthracoidea turfosa'        : [ 'AY563571', 'AY563570', 'AY563569' ],
    'Anthracoidea bigelowii'      : [ 'AY563568', 'AY563567', 'AY563566' ],
    'Anthracoidea pratensis'      : [ 'AY563565', 'AY563564', 'AY563563', 'DQ875360' ]
    }

In [12]:
from Bio import Entrez
from Bio.SeqIO import parse
from Bio.SeqRecord import SeqRecord
from StringIO import StringIO
import pandas

Entrez.email = "ryneches@ucdavis.edu"

sedge_genes = {
    '5.8S' : [ '5.8S ribosomal RNA' ],
    '1F'   : [ 'external transcribed spacer' ],
    'TRNL' : [ 'tRNA-Leu', 'trnL' ],
    }

def match_gene( description, nametable ) :
    for gene in nametable.keys() :
        for genestring in nametable[gene] :
            if description.__contains__( genestring ) :
                return gene
                
sedge_genetable = {}

for key in sedge_outgroup.keys() :
    sedge_genetable[key] = {}
    for accn in sedge_outgroup[key] :
        fasta = Entrez.efetch( db='nucleotide', id=accn, rettype='fasta', retmode='fasta' ).read()
        record = parse( StringIO( fasta ) , 'fasta' ).next()
        gene = match_gene( record.description, sedge_genes )
        sedge_genetable[key][gene] = record
    print key, ':', ', '.join( sedge_genetable[key].keys() )
    
for key in sedge_ingroup.keys() :
    sedge_genetable[key] = {}
    for accn in sedge_ingroup[key] :
        fasta = Entrez.efetch( db='nucleotide', id=accn, rettype='fasta', retmode='fasta' ).read()
        record = parse( StringIO( fasta ) , 'fasta' ).next()
        gene = match_gene( record.description, sedge_genes )
        sedge_genetable[key][gene] = record
        #print record.description
    print key, ':', ', '.join( sedge_genetable[key].keys() )

sedge_genetable = pandas.DataFrame( sedge_genetable ).T
sedge_genetable.applymap( len )

Trichophorum cespitosum : TRNL, 5.8S, 1F
Eriophorum vaginatum : TRNL, 5.8S, 1F
Trichophorum alpinum : TRNL, 5.8S, 1F
Dulichium arundinacea : TRNL, 5.8S, 1F
Eriophorum angustifolium : TRNL, 5.8S, 1F
Carex parallela : TRNL, 5.8S, 1F
Carex flacca : TRNL, 5.8S, 1F
Carex glacilalis : TRNL, 5.8S, 1F
Carex arenaria : 5.8S, 1F
Carex firma : TRNL, 5.8S
Carex curvula : TRNL, 5.8S, 1F
Carex heleonastes : TRNL, 5.8S, 1F
Carex limosa : TRNL, 5.8S, 1F
Carex ferruginea : TRNL, 5.8S
Carex magellanica : TRNL, 5.8S, 1F
Carex brunnescens : TRNL, 5.8S, 1F
Carex ornithopoda : 5.8S, 1F
Carex sempervirens : TRNL, 5.8S
Carex davalliana : TRNL, 5.8S
Carex paniculata : TRNL, 5.8S, 1F
Carex dioica : TRNL, 5.8S, 1F
Carex buxbaumii : TRNL, 5.8S
Carex meadii : 5.8S, 1F
Carex rostrata : TRNL, 5.8S
Carex chordorrhiza : TRNL, 5.8S, 1F
Carex capillaris : TRNL, 5.8S, 1F
Carex rupestris : TRNL, 5.8S, 1F
Carex echinata : TRNL, 5.8S, 1F
Carex lachenalii : TRNL, 5.8S
Carex alba : 5.8S, 1F
Carex atrofusca : TRNL, 5.8S
Carex 

TypeError: ("object of type 'float' has no len()", u'occurred at index 1F')

In [11]:
record

SeqRecord(seq=Seq('ATAAAAAGCCCTTTTGATTTTCAAAATATTTCTTCTTTATTTTGATTTTGATGA...ATA', SingleLetterAlphabet()), id='AY115852.1', name='AY115852.1', description='AY115852.1 Uncinia rubra trnL-trnF intergenic spacer region, chloroplast sequence', dbxrefs=[])