Pinned imports and utility functions. Use in other notebooks, e.g.:

```
%run imports_20150407.ipynb
```

## Imports

In [1]:
# python standard library
import sys
import os
import operator
import itertools
import collections
import functools
import glob
import csv
import datetime
import bisect
import sqlite3
import subprocess
import random
import gc
import shutil
import shelve

In [2]:
# general purpose third party packages

import cython
%reload_ext Cython

import numpy as np
nnz = np.count_nonzero

import scipy
import scipy.stats
import scipy.spatial.distance

import numexpr

import h5py

import tables

import bcolz

import pandas

import IPython
# from IPython.html.widgets import interact, interactive
# from IPython.html import widgets
from IPython.display import clear_output, display, HTML



import statsmodels

import sklearn
import sklearn.decomposition
import sklearn.manifold


import sqlalchemy



import petl as etl
etl.config.display_index_header = True

import humanize
# VERSION (0, 4) doesn't match PyPI (0.5.1)
from humanize import naturalsize, intcomma, intword

In [1]:
# plotting setup
%matplotlib inline
import matplotlib as plt
from matplotlib.colors import ListedColormap
from matplotlib.gridspec import GridSpec

import matplotlib_venn as venn

import seaborn as sns
sns.set_context('paper')
sns.set_style('white')
sns.set_style('ticks')

rcParams = plt.rcParams
rcParams['font.size'] = 9
rcParams['axes.labelsize'] = 9
rcParams['xtick.labelsize'] = 9
rcParams['ytick.labelsize'] = 9
rcParams['legend.fontsize'] = 9
rcParams['axes.linewidth'] = .5
rcParams['lines.linewidth'] = .5
rcParams['patch.linewidth'] = .5
rcParams['font.family'] = 'arial'
rcParams['ytick.direction'] = 'out'
rcParams['xtick.direction'] = 'out'
rcParams['savefig.jpeg_quality'] = 100
rcParams['savefig.dpi'] = 120
rcParams['lines.markeredgewidth'] = .5

In [4]:
# bio third party packages

import Bio

import pyfasta
# no version identifier, cannot verify version

import pysam

import pysamstats

import petlx
import petlx.bio

import vcf

In [5]:
import allel
# be flexible about version
print('allel', allel.__version__)

allel 0.11


## Utility functions

In [6]:
import sys


def log(*msg):
    print(' '.join(map(str, msg)), file=sys.stdout)
    sys.stdout.flush()
    
from contextlib import contextmanager

@contextmanager
def timer(*msg):
    before = datetime.datetime.now()
    try:
        yield
    except:
        after = datetime.datetime.now()
        elapsed = (after - before).total_seconds()
        done = 'errored after %s' % humanize.naturaldelta(elapsed)
        if not msg:
            msg = done
        else:
            msg = ', '.join(map(str, msg)) + ', ' + done
        print(msg, file=sys.stderr)
        sys.stderr.flush()   
        raise
    else:
        after = datetime.datetime.now()
        elapsed = (after - before).total_seconds()
        done = 'done in %s' % humanize.naturaldelta(elapsed)
        if not msg:
            msg = done
        else:
            msg = ', '.join(map(str, msg)) + ', ' + done
        print(msg, file=sys.stdout)
        sys.stdout.flush()

In [7]:
def cache_hdf5(path, *names, **h5dcreate_kwargs):
    h5dcreate_kwargs.setdefault('chunks', True)
    h5dcreate_kwargs.setdefault('compression', 'gzip')
    def decorator(f):
        def wrapper(*args, **kwargs):
            result = None

            # be verbose
            verbose = kwargs.pop('verbose', True)
            # don't repeat yourself
            dry = kwargs.pop('dry', True)
            # skip loading if cached
            skip = kwargs.pop('skip', False)
            
            # group name
            grp = '/'.join(map(str, args))
            
            with h5py.File(path, mode='a') as h5f:
                h5g = h5f.require_group(grp)
                
                if dry and '__success__' in h5g.attrs:
                    # no need to build
                    if skip:
                        if verbose:
                            log('skipping', *args)
                    else:
                        if verbose:
                            log('loading', *args)
                        if len(names) == 1:
                            result = h5g[names[0]][:]
                        else:
                            result = [h5g[n][:] for n in names]
                        
                else:
                    # need to build
                    if verbose:
                        log('building', *args)
                    
                    # reset success mark if present
                    if '__success__' in h5g.attrs:
                        del h5g.attrs['__success__']
                        
                    # compute result
                    result = f(*args)
                    
                    if verbose:
                        log('saving', *args)
                    if len(names) == 1:
                        n = names[0]
                        if n in h5g:
                            del h5g[n]
                        h5g.create_dataset(n, data=result, **h5dcreate_kwargs)
                    else:
                        for n, r in zip(names, result):
                            if n in h5g:
                                del h5g[n]
                            h5g.create_dataset(n, data=r, **h5dcreate_kwargs)
                            
                    # mark success
                    h5g.attrs['__success__'] = True

            return result
        return wrapper
    return decorator
                

In [8]:
autosomes = '2R', '2L', '3R', '3L'
chromosomes = autosomes + ('X',)


class GenomeFigure(object):
    
    def __init__(self, genome, *args, **kwargs):
        self.chromosomes = kwargs.pop('chromosomes', ['2R', '2L', '3R', '3L', 'X'])
        maxchrsize = max(np.array(genome[chrom]).size for chrom in self.chromosomes)
        fig = plt.figure(*args, **kwargs)
        self.fig = fig
        self.ax = dict()
        for i, chrom in enumerate(self.chromosomes):
            ax = fig.add_subplot(3, 2, i+1)
            self.ax[chrom] = ax
            S = np.array(genome[chrom])
            if i % 2 == 1:
                sns.despine(ax=ax, offset=10, top=True, left=True, right=False)
                ax.set_xlim(0, maxchrsize)
                ax.yaxis.tick_right()
                ax.yaxis.set_label_position('right')
            else:
                ax.set_xlim((S.size)-(maxchrsize), S.size)
                ax.yaxis.tick_left()
                sns.despine(ax=ax, offset=10, top=True, left=False, right=True)
            ax.set_xticks(range(0, S.size, int(5e6)))
            ax.set_xticklabels(range(0, int(S.size/1e6), 5))
            ax.set_title(chrom, fontweight='bold')
            ax.xaxis.tick_bottom()
        fig.tight_layout()
        
    def apply(self, f, **kwargs):
        chromosomes = kwargs.pop('chromosomes', self.chromosomes)
        for chrom in chromosomes:
            ax = self.ax[chrom]
            f(chrom, ax, **kwargs)
        
        
def subplots(*args, **kwargs):
    fig, ax = plt.subplots(*args, **kwargs)
    sns.despine(ax=ax, offset=10)
    return fig, ax