In [None]:
dists = '../out/associations/gene_distances.tsv'
kmer_hits = '../out/associations/summary_cont_lmm_kmer.tsv'
names = '../out/associations/associated_ogs.final.tsv'

In [None]:
# plotting imports
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text

sns.set_style('white')

plt.rc('font', size=11)
plt.rc('xtick', labelsize=11)
plt.rc('ytick', labelsize=11)
plt.rc('axes', labelsize=12, titlesize=12)
plt.rc('legend', fontsize=11)

In [None]:
import numpy as np
import pandas as pd
import networkx as nx

In [None]:
u = pd.read_table(names)
n = u.set_index('OG')['preferred_og_name'].to_dict()
nsize = u.set_index('OG')['representative_protein_length']

In [None]:
m = pd.read_table(dists)

In [None]:
k = pd.read_table(kmer_hits,
                  index_col=0)
k['normalized_hits'] = (k['hits'] / nsize)
k.index = [n.get(x, x)
           for x in k.index]
k = k[k['specific_hits'] > 0]
kh = k['normalized_hits'].to_dict()

Top gene hits
---

In [None]:
k.sort_values('normalized_hits',
              ascending=False)

In [None]:
plt.figure(figsize=(8, 6.5))

plt.scatter(k['avg_maf'],
            k['maxp'],
            c=k['normalized_hits'],
            s=500,
            edgecolors='grey',
            cmap=plt.get_cmap('Blues'),
            vmin=0,
            vmax=k['normalized_hits'].max())

cbar = plt.colorbar()
cbar.ax.set_ylabel('hits\n(normalized to gene length)')

texts = [plt.text(x, y, t,
                  ha='center', va='center')
         for (x, y), t in zip(k[['avg_maf', 'maxp']].values,
                              k.index)]
adjust_text(texts,
            arrowprops=dict(arrowstyle='->', color='k'),
            force_points=15)

plt.yticks(range(12, 18, 2),
           ['1E-%d' % x for x in range(12, 18, 2)])
plt.xlabel('average MAF')
plt.ylabel('maximum association pvalue');

Gene hits distances
---

In [None]:
k = pd.read_table(kmer_hits,
                  index_col=0)
k['normalized_hits'] = (k['hits'] / nsize)
genes = set(k[k['normalized_hits'] >= 0.1].index)

In [None]:
d = m[(m['replicon'] == True) &
      (m['set'] == 'real') &
      (m['og1'] != m['og2']) &
      (m['strain'] == 'IAI39') &
      (m['og1'].isin(genes)) &
      (m['og2'].isin(genes))].groupby(['og1', 'og2']).min()['distance'].reset_index()
r = m[(m['replicon'] == True) &
      (m['set'] == 'random') &
      (m['og1'] != m['og2']) &
      ((m['strain'] == 'IAI39') |
       (m['strain'] == 'IAI01'))].groupby(['og1', 'og2']).min()['distance'].reset_index()

In [None]:
plt.figure(figsize=(7, 3))

sns.kdeplot(np.log10(d['distance']),
            label='associated OGs',
            color='r')
sns.kdeplot(np.log10(r['distance']),
            label='random genes',
            color='grey')

sns.despine(left=True)
plt.yticks([])
plt.xticks(range(0, 10, 2),
           ['$10^{%d}$' % x for x in range(0, 10, 2)])
plt.xlabel('minimum observed gene distance');

In [None]:
d = m[(m['replicon'] == True) &
      (m['set'] == 'real') &
      (m['og1'] != m['og2']) &
      (m['strain'] == 'IAI39') &
      (m['og1'].isin(genes)) &
      (m['og2'].isin(genes)) &
      (m['distance'] <= 10000)].groupby(['og1', 'og2']).min()['distance'].reset_index()

In [None]:
g = nx.from_pandas_edgelist(d,
                            source='og1',
                            target='og2',
                            edge_attr='distance')

In [None]:
graph_pos = nx.layout.spring_layout(g, k=0.45)

In [None]:
plt.figure(figsize=(9, 9))

# Draw nodes
nx.draw_networkx_nodes(g, graph_pos,
                       # Node size depends on gene hits
                       node_size=[kh.get(n.get(x, x), min(kh.values()))*1000
                                  for x in g.nodes()],
                       node_color=sns.xkcd_rgb['light grey'],
                       edgecolors='k')
# Draw edges
nx.draw_networkx_edges(g, graph_pos,
                       # Width depends on minimum distance
                       width=1,
#                        width=[x['distance']/2000 for x in dict(g.edges()).values()],
                       color='grey')
# Draw labels
nx.draw_networkx_labels(g, graph_pos,
                        {x:n.get(x, x) for x in g.nodes()})

sns.despine(bottom=True, left=True)
plt.xticks([])
plt.yticks([])
pass

<h5><a href="javascript:toggle()" target="_self">toggle source code</a></h5>