Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
horta committed Jun 6, 2017
2 parents e3a4707 + c4dc54f commit e4b1cdc
Show file tree
Hide file tree
Showing 11 changed files with 324 additions and 226 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
# TODO for 1.1.0

- [ ] limix.qtl.qtl_test_lmm: 10k by 500k datasets, low memory footprint
(i.e., <=60 GB).
- [ ] limix.qtl.qtl_test_glmm: 10k by 500k datasets, low memory footprint
(i.e., <=60 GB).
- [ ] Both limix.qtl.qtl_test_glmm and limix.qtl.qtl_test_lmm should accept
and take advantage of dask array.
- [ ] limix.qtl.qtl_test_lmm: show progress bar like limix.qtl.qtl_test_glmm.
- [ ] limix.qtl.qtl_test_lmm, limix.qtl.qtl_test_glmm: raise exception for
wrong parameters (NaN, Inf) and coerce sequences to numpy arrays.
- [ ] Standardize arguments, set labels, set title, set colors, for plotting
functions: limix.plot.plot_manhattan, limix.plot.qqplot,
limix.plot.plot_kinship.
- [ ] Define ConvergenceError exception to be raised whenever our methods
doesn't converge.
- [ ] Convert documentation style to numpy style, of every function.

# TODO for 1.2.0

- [ ] Nicely print a summary of a QTL and heritability result.
- [ ] I/O for BGEN file format.
- [ ] Add full-world QTL and heritability analysis into readthedocs.

# Limix

[![PyPI-License](https://img.shields.io/pypi/l/limix.svg?style=flat-square)](https://pypi.python.org/pypi/limix/)
Expand Down
3 changes: 2 additions & 1 deletion limix/heritability/test/test_heritability.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def test_heritability_estimate_poisson():
z = dot(G, random.randn(100)) / sqrt(100)
y = random.poisson(exp(z))

assert_allclose(estimate(y, 'poisson', K, verbose=False), 0.6998737749318877)
assert_allclose(
estimate(y, 'poisson', K, verbose=False), 0.6998737749318877)
2 changes: 2 additions & 0 deletions limix/io/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from os.path import exists


def file_type(filepath):
imexts = ['.png', '.bmp', '.jpg', 'jpeg']
if filepath.endswith('.hdf5') or filepath.endswith('.h5'):
Expand All @@ -16,5 +17,6 @@ def file_type(filepath):
return 'image'
return 'unknown'


def _is_bed(filepath):
return all([exists(filepath + ext) for ext in ['.bed', '.bim', '.fam']])
8 changes: 7 additions & 1 deletion limix/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
.. autofunction:: limix.plot.qqplot
Power plots
^^^^^^^^^^^
.. autofunction:: limix.plot.plot_power_curve
Kinship
^^^^^^^
Expand All @@ -30,6 +35,7 @@
from .manhattan import plot_manhattan
from .kinship import plot_kinship
from .image import see_image
from .power import plot_power_curve

__all__ = ['plot_manhattan', 'qqplot', 'plot_normal', 'plot_kinship',
'see_image']
'see_image', 'plot_power_curve']
291 changes: 129 additions & 162 deletions limix/plot/manhattan.py
Original file line number Diff line number Diff line change
@@ -1,191 +1,158 @@
# Copyright(c) 2014, The LIMIX developers (Christoph Lippert, Paolo Francesco Casale, Oliver Stegle)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import scipy as sp


def plot_manhattan(pv, position=None, posCum=None, chromBounds=None,
thr=None, qv=None, lim=None, xticklabels=True,
alphaNS=0.1, alphaS=0.5, colorNS='DarkBlue',
colorS='Orange', ax=None, thr_plotting=None,
labelS=None, labelNS=None):
r"""Produce a manhattan plot.
from __future__ import division

Args:
pv (array_like): pvalues.
from numpy import arange, asarray, cumsum, flipud, log10

position (list or :class:`pandas.DataFrame`, optional):
positions in chromosome/chromosomal basepair position format.
It can be specified as

- list ``[chrom, pos]`` where ``chrom`` and ``pos`` are
*ndarray* with chromosome values and basepair positions;
- pandas DataFrame of chromosome values (key='chrom') and
basepair positions (key='pos').
def plot_manhattan(df,
alpha=None,
null_style=dict(alpha=0.1, color='DarkBlue'),
alt_style=dict(alpha=0.5, color='Orange'),
ax=None):
r"""Produce a manhattan plot.
Alternatively, variant positions can be specified by
setting directly the cumulative position (``posCum``).
Parameters
----------
df : :class:`pandas.DataFrame`
A Pandas DataFrame containing columns pv for p-values, pos for
base-pair positions, and chrom for chromossome names..
alpha : float
Threshold for significance. Defaults to 0.01 significance level
(bonferroni-adjusted).
ax : :class:`matplotlib.axes.AxesSubplot`:
The target handle for this figure. If None, the current axes is set.
Returns
-------
:class:`matplotlib.axes.AxesSubplot`
Axes object.
posCum (array_like, optional):
cumulative position.
It has effect only if ``position`` is not specified.
By default, ``posCum`` is set to ``range(S)`` where
`S` is the number of variants.
Examples
--------
.. plot::
from numpy.random import RandomState
from numpy import arange, ones, kron
import pandas as pd
from limix.plot import plot_manhattan
from matplotlib import pyplot as plt
random = RandomState(1)
pv = random.rand(5000)
pv[1200:1250] = random.rand(50)**4
chrom = kron(arange(1,6), ones(1000))
pos = kron(ones(5), arange(1,1001))
data = dict(pv=pv, chrom=chrom, pos=pos)
plot_manhattan(pd.DataFrame(data=data))
plt.tight_layout()
plt.show()
"""

chromBounds (array_like, optional):
chrom boundaries on cumulative positions.
It has effect only if ``position`` is not specified.
If neither ``position`` nor ``chromBounds`` are
specified, chomosome boundaries are not plotted.
import matplotlib.pyplot as plt

qv (array_like, optional):
qvalues.
If provided, threshold for significance is set
on qvalues but pvalues are plotted.
ax = plt.gca() if ax is None else ax

thr (float, optional):
threshold for significance.
The default is 0.01 significance level (bonferroni-adjusted)
if qvs are not specified, or 0.01 FDR if qvs specified.
if 'pos' not in df:
df['pos'] = arange(df.shape[0])

lim (float, optional):
top limit on y-axis.
The default value is -1.2*log(pv.min()).
if 'label' not in df:
chrom = df['chrom'].astype(int).astype(str)
pos = df['pos'].astype(int).astype(str)
df['label'] = (
'chrom' + chrom + '_pos' + pos)

xticklabels (bool, optional):
if true, xtick labels are printed.
The default value is True.
df = _abs_pos(df)

alphaNS (float, optional):
transparency value of non-significant variants.
Must be in [0, 1].
if alpha is None:
alpha = 0.01 / df.shape[0]

alphaS (float, optional):
transparency of significant variants. Must be in [0, 1].
ytop = -1.2 * log10(min(df['pv'].min(), alpha))

ax (:class:`matplotlib.axes.AxesSubplot`):
the target handle for this figure.
If None, the current axes is set.
_plot_chrom_strips(ax, df, ytop)
_plot_points(ax, df, alpha, null_style, alt_style)
_set_frame(ax, df, ytop)

thr_plotting (float):
if specified, only P-values that are smaller
than thr_plotting are plotted.
ax.set_ylabel('-log$_{10}$pv')
ax.set_xlabel('chromosome')

labelS (str):
optional plotting label for significant variants.
_set_ticks(ax, _chrom_bounds(df))

labelNS (str):
optional plotting label for non significnat loci.
return ax

Returns:
:class:`matplotlib.axes.AxesSubplot`: matplotlib subplot

Examples
--------
def _set_frame(ax, df, ytop):
ax.set_ylim(0, ytop)
ax.set_xlim(0, df['abs_pos'].max())

.. plot::
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)

from numpy.random import RandomState
from numpy import arange, ones, kron
from limix.plot import plot_manhattan
from matplotlib import pyplot as plt
random = RandomState(1)

pv = random.rand(5000)
pv[1200:1250] = random.rand(50)**4
def _plot_points(ax, df, alpha, null_style, alt_style):
null_df = df.loc[df['pv'] >= alpha, :]
alt_df = df.loc[df['pv'] < alpha, :]

chrom = kron(arange(1,6), ones(1000))
pos = kron(ones(5), arange(1,1001))
ax.plot(null_df['abs_pos'], -log10(null_df['pv']), '.', ms=5, **null_style)
ax.plot(alt_df['abs_pos'], -log10(alt_df['pv']), '.', ms=5, **alt_style)

fig = plt.figure(1, figsize=(8,3))
plt.subplot(111)
plot_manhattan(pv, [chrom, pos])
plt.tight_layout()
for i in range(alt_df.shape[0]):
x = alt_df['abs_pos'].values[i]
y = -log10(alt_df['pv'].values[i])
_annotate(ax, x, y, alt_df['label'].values[i])

plt.show()
"""
import matplotlib.pylab as plt
from limix.util import estCumPos
if ax is None:
ax = plt.gca()

if position is not None:
posCum, chromBounds = estCumPos(position, return_chromstart=True)
elif posCum is None:
posCum = sp.arange(len(pv))

if thr is None:
thr = 0.01 / float(posCum.shape[0])

if lim is None:
lim = -1.2 * sp.log10(sp.minimum(pv.min(), thr))

if chromBounds is None:
chromBounds = sp.array([[0, posCum.max()]])
else:
chromBounds = sp.concatenate([chromBounds, sp.array([posCum.max()])])

n_chroms = chromBounds.shape[0]
for chrom_i in range(0, n_chroms - 1, 2):
plt.fill_between(posCum, 0, lim, where=(posCum > chromBounds[chrom_i]) & (
posCum < chromBounds[chrom_i + 1]), facecolor='LightGray', linewidth=0, alpha=0.5)

if thr_plotting is not None:
if pv is not None:
i_small = pv < thr_plotting
elif qv is not None:
i_small = qv < thr_plotting

if qv is not None:
qv = qv[i_small]
if pv is not None:
pv = pv[i_small]
if posCum is not None:
posCum = posCum[i_small]

if qv is None:
Isign = pv < thr
else:
Isign = qv < thr

plt.plot(posCum[~Isign], -sp.log10(pv[~Isign]), '.',
color=colorNS, ms=5, alpha=alphaNS, label=labelNS)
plt.plot(posCum[Isign], -sp.log10(pv[Isign]), '.',
color=colorS, ms=5, alpha=alphaS, label=labelS)

if qv is not None:
plt.plot([0, posCum.max()], [-sp.log10(thr), -
sp.log10(thr)], '--', color='Gray')

plt.ylim(0, lim)

plt.ylabel('-log$_{10}$pv')
plt.xlim(0, posCum.max())
xticks = sp.array([chromBounds[i:i + 2].mean()
for i in range(chromBounds.shape[0] - 1)])
ax.set_xticks(xticks)
plt.xticks(fontsize=6)

if xticklabels:
ax.set_xticklabels(sp.arange(1, n_chroms + 1))
plt.xlabel('Chromosome')
else:
ax.set_xticklabels([])
def _plot_chrom_strips(ax, df, ytop):
uchroms = df['chrom'].unique()
for i in range(0, len(uchroms), 2):
ax.fill_between(
df['abs_pos'],
0,
ytop,
where=df['chrom'] == uchroms[i],
facecolor='LightGray',
linewidth=0,
alpha=0.5)

ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)

def _set_ticks(ax, chrom_bounds):
n = len(chrom_bounds) - 1
xticks = asarray([chrom_bounds[i:i + 2].mean() for i in range(n)])
ax.set_xticks(xticks)
ax.tick_params(axis='x', which='both', labelsize=6)
ax.set_xticklabels(arange(1, n + 2))
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')

return ax

def _abs_pos(df):
uchroms = df['chrom'].unique()
chrom_ends = [df['pos'][df['chrom'] == c].max() for c in uchroms]

offset = flipud(cumsum(chrom_ends)[:-1])

df['abs_pos'] = df['pos'].copy()

uchroms = list(reversed(uchroms))
for i in range(len(offset)):
ix = df['chrom'] == uchroms[i]
df.loc[ix, 'abs_pos'] = df.loc[ix, 'abs_pos'] + offset[i]

return df


def _chrom_bounds(df):
uchroms = df['chrom'].unique()
v = [df['abs_pos'][df['chrom'] == c].min() for c in uchroms]
return asarray(v + [df['abs_pos'].max()])


def _annotate(ax, x, y, text):
ax.annotate(
text,
xy=(x, y),
xytext=(-18, 18),
textcoords='offset points',
fontsize=6,
ha='center',
va='bottom',
bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.3),
arrowprops=dict(
arrowstyle='->', connectionstyle='arc3,rad=0.5', color='red'))

0 comments on commit e4b1cdc

Please sign in to comment.