In [13]:
import networkx as nx

In [54]:
G = nx.read_gpickle('Minto_Flats Final Graph.pkl')
G.nodes(data=True)[0]

('A/shorebird/Delaware Bay/331/2008',
 {'band_num': None,
  'cloacal_id': None,
  'collection_date': Timestamp('2008-05-17 00:00:00'),
  'host': 'IRD:Shorebird/Avian',
  'isolation_date': Timestamp('2008-05-17 00:00:00'),
  'oral_id': None,
  'reassortant': True,
  'state': 'Delaware',
  'subtype': '(N6)',
  'web_tag': None})

In [55]:
G.edges(data=True)[0]

('A/American black duck/Wisconsin/11OS3191/2011',
 'A/mallard/Wisconsin/11OS3098/2011',
 {'edge_type': 'reassortant',
  'keep': True,
  'pwi': 7.9405219999999996,
  'segments': {2: 0.97409299999999999,
   3: 0.99682999999999999,
   4: 0.99592800000000004,
   6: 0.99720900000000001,
   8: 0.99768800000000002}})

In [56]:
# Add in timedelta information.
for sc, sk, d in G.edges(data=True):
    sc_time = G.node[sc]['isolation_date']
    sk_time = G.node[sk]['isolation_date']
    d['timedelta'] = (sk_time - sc_time).days

In [57]:
# Clean up the "reassortant" status 
for n, d in G.nodes(data=True):
    G.node[n]['reassortant'] = False

reassortants_by_edges = set()
for sc, sk, d in G.edges(data=True):
    if d['edge_type'] == 'reassortant':
        reassortants_by_edges.add(sk)
        G.node[sk]['reassortant'] = True
len(reassortants_by_edges)

622

In [58]:
G.edges(data=True)[0]

('A/American black duck/Wisconsin/11OS3191/2011',
 'A/mallard/Wisconsin/11OS3098/2011',
 {'edge_type': 'reassortant',
  'keep': True,
  'pwi': 7.9405219999999996,
  'segments': {2: 0.97409299999999999,
   3: 0.99682999999999999,
   4: 0.99592800000000004,
   6: 0.99720900000000001,
   8: 0.99768800000000002},
  'timedelta': 1})

In [59]:
# Add in a weighting for the reassortant edges, called "reassortant weighting".
from itertools import combinations

for n, d in G.nodes(data=True):
    if G.node[n]['reassortant'] == True:
        in_edges = G.in_edges(n, data=True)
        
        for (sc1, sk1, d1), (sc2, sk2, d2) in combinations(in_edges, 2):
            if 'reassortment_involvement' not in d1.keys():
                G.edge[sc1][sk1]['reassortment_involvement'] = 0
                G.edge[sc1][sk1]['reassortment_weighting'] = 0
            if 'reassortment_involvement' not in d2.keys():
                G.edge[sc2][sk2]['reassortment_involvement'] = 0
                G.edge[sc2][sk2]['reassortment_weighting'] = 0
            if set(d1['segments'].keys()).union(d2['segments'].keys()) == set(range(1,9)):
                G.edge[sc1][sk1]['reassortment_involvement'] += 1
                G.edge[sc2][sk2]['reassortment_involvement'] += 1
                
        in_edges = G.in_edges(n, data=True)
        total = sum([d['reassortment_involvement'] for sc, sk, d in in_edges])
        for (sc, sk, d) in in_edges:
            G.edge[sc][sk]['reassortment_weighting'] = 2 * d['reassortment_involvement']/float(total)

In [60]:
# Add in a weighting for the non-reassortant edges, called "full complement downweighting"

for n, d in G.nodes(data=True):
    if G.node[n]['reassortant'] == False:
        in_edges = G.in_edges(n, data=True)
        num_edges = len(in_edges)
        
        for sc, sk, d in in_edges:
            if 'full_complement_weighting' not in d.keys():
                G.edge[sc][sk]['full_complement_weighting'] = 1 / float(num_edges)
                
        

In [61]:
G.edges(data=True)[6]

('A/mallard/Interior Alaska/10BM12374R0/2010',
 'A/mallard/Interior Alaska/10BM12350R0/2010',
 {'5season_transmission': 'Fledge-Fledge',
  'edge_type': 'full_complement',
  'family_transmission': 'Intrafamily',
  'foraging_transmission': 'Intraforaging',
  'full_complement_weighting': 0.2,
  'order_transmission': 'Intra-Order',
  'pwi': 8.0,
  'segments': {1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 6: 1.0, 7: 1.0, 8: 1.0},
  'species_transmission': 'Intraspecies',
  'time_delta': datetime.timedelta(0),
  'timedelta': 0,
  'tr_timeclass': 'Direct Transmission',
  'trtype': 'Intra-Species',
  'weight': 8.0,
  'weighting_timedelta': 0.5})

In [62]:
import pandas as pd
node_data = []
for n, d in G.nodes(data=True):
    data = d
    data['isolate_name'] = n
    
    node_data.append(data)
    
pd.DataFrame(node_data).to_csv('20150511 Minto Flats Nodelist.csv')

In [63]:
edge_data = []
for sc, sk, d in G.edges(data=True):
    data = dict()
    data['source'] = sc
    data['sink'] = sk
    
    for k, v in d.items():
        if isinstance(v, dict):
            for k2, v2 in v.items():
                data[k2] = v2
        else:
            data[k] = v
            
    edge_data.append(data)
    
pd.DataFrame(edge_data).to_csv('20150511 Minto Flats Edgelist.csv')

In [64]:
reassortants_by_nodes = set()
for n, d in G.nodes(data=True):
    if d['reassortant'] == True:
        reassortants_by_nodes.add(n)
        
len(reassortants_by_nodes)

622