In [1]:
import hail as hl
import hail.expr.aggregators as agg

In [2]:
hl.init()

Running on Apache Spark version 2.2.0
SparkUI available at http://10.1.7.107:4042
Welcome to
     __  __     <>__
    / /_/ /__  __/ /
   / __  / _ `/ / /
  /_/ /_/\_,_/_/_/   version devel-3e72697c3f47
NOTE: This is a beta version. Interfaces may change
  during the beta period. We recommend pulling
  the latest changes weekly.


In [4]:
schema = {'variant': hl.tstr, 'rsid': hl.tstr, 'nCompleteSamples': hl.tint32,
         'AC': hl.tfloat64, 'ytx': hl.tfloat64, 'beta': hl.tfloat64,
         'se': hl.tfloat64, 'tstat': hl.tfloat64, 'pval': hl.tfloat64}

standing_height = hl.import_table('/Users/maccum/manhattan/50.assoc.tsv', key='variant', types=schema)

2018-06-27 15:03:28 Hail: INFO: Reading table with no type imputation
  Loading column 'variant' as type 'str' (user-specified)
  Loading column 'rsid' as type 'str' (user-specified)
  Loading column 'nCompleteSamples' as type 'int32' (user-specified)
  Loading column 'AC' as type 'float64' (user-specified)
  Loading column 'ytx' as type 'float64' (user-specified)
  Loading column 'beta' as type 'float64' (user-specified)
  Loading column 'se' as type 'float64' (user-specified)
  Loading column 'tstat' as type 'float64' (user-specified)
  Loading column 'pval' as type 'float64' (user-specified)



In [5]:
standing_height = standing_height.rename({"variant": "contig:pos:ref:alt1,alt2"})

In [6]:
sh_path = '/Users/maccum/manhattan/data/standing_height/'

In [7]:
standing_height.write(sh_path, overwrite=True)

2018-06-27 15:05:27 Hail: INFO: wrote 10894596 items in 35 partitions


In [8]:
standing_height = hl.read_table(sh_path)

In [9]:
standing_height.describe()

----------------------------------------
Global fields:
    None
----------------------------------------
Row fields:
    'contig:pos:ref:alt1,alt2': str 
    'rsid': str 
    'nCompleteSamples': int32 
    'AC': float64 
    'ytx': float64 
    'beta': float64 
    'se': float64 
    'tstat': float64 
    'pval': float64 
----------------------------------------
Key: ['contig:pos:ref:alt1,alt2']
----------------------------------------


In [10]:
standing_height.count()

10894596

In [3]:
sh2_path = '/Users/maccum/manhattan/data/standing_height2/'

In [11]:
# get table with global_position, neg_log_pval, color

colors = {'1' : "#F73A12", '2' : "#BFF712", '3' : "#F7B912", '4' : "#F78112", 
                '5' : "#1DA14F", '6' : "#651DA1", '7' : "#26DAE3", '8' : "#768CCC", 
                '9' : "#CF19EC", '10' : "#A11D7F", '11' : "#EC195C", '12' : "#19EC43", 
                '13' : "#30666F", '14' : "#F7CA48", '15' : "#48F770", '16' : "#7A48F7", 
                '17' : "#F74863", '18' : "#322C2D", '19' : "#B9C147", '20' : "#B7B0B1", 
                '21' : "#64C1B9", '22' : "#349C21", 'X' : "#2D396E"}

sh = standing_height.annotate(v = hl.parse_variant(standing_height['contig:pos:ref:alt1,alt2']))
sh = sh.annotate_globals(color_dict = colors)
sh = sh.annotate(global_position = sh.v.locus.global_position(),
                neg_log_pval = -hl.log(sh.pval),
                color = sh.color_dict[sh.v.locus.contig])
sh = sh.key_by('global_position').select('neg_log_pval', 'color')

sh.write(sh2_path, overwrite=True)

2018-06-27 15:09:28 Hail: INFO: Ordering unsorted dataset with network shuffle
2018-06-27 15:12:17 Hail: INFO: wrote 10894596 items in 35 partitions


In [12]:
sh.describe()

----------------------------------------
Global fields:
    'color_dict': dict<str, str> 
----------------------------------------
Row fields:
    'global_position': int64 
    'neg_log_pval': float64 
    'color': str 
----------------------------------------
Key: ['global_position']
----------------------------------------


In [13]:
max_global_position = sh.aggregate(agg.max(sh.global_position))
max_nlp = sh.aggregate(agg.max(sh.neg_log_pval))
(max_global_position, max_nlp)

2018-06-27 15:13:53 Hail: INFO: Ordering unsorted dataset with network shuffle
2018-06-27 15:16:04 Hail: INFO: Ordering unsorted dataset with network shuffle


(2880965782, 670.1746941369828)

In [1]:
import generator
from generator.generator import Generator

In [2]:
caffeineGen = Generator('../plots/caffeine_plots', '../data/caffeine.ht', regenerate=False)
caffeineGen.generate(2, new_log_file=True)
caffeineGen.generate(3)
#caffeineGen.generate(4)
#caffeineGen.generate(5)
#caffeineGen.generate(6)

Initializing Spark and Hail with default parameters...
Running on Apache Spark version 2.2.0
SparkUI available at http://10.1.7.107:4042
Welcome to
     __  __     <>__
    / /_/ /__  __/ /
   / __  / _ `/ / /
  /_/ /_/\_,_/_/_/   version devel-3e72697c3f47
NOTE: This is a beta version. Interfaces may change
  during the beta period. We recommend pulling
  the latest changes weekly.


Zoom level: 2 |██████████████████████████████████████████████████| 100.0% Complete
Zoom level: 3 |██████████████████████████████████████████████████| 100.0% Complete


In [5]:
standingHeightGen = Generator('../plots/sh_plots', sh2_path, regenerate=False, max_position=2880965782, max_nlp=670.1746941369828)
standingHeightGen.generate(2, new_log_file=True)

Zoom level: 2 |████████████--------------------------------------| 25.0% CompleteZoom level: 2 |█████████████████████████-------------------------| 50.0% CompleteZoom level: 2 |█████████████████████████████████████-------------| 75.0% CompleteZoom level: 2 |██████████████████████████████████████████████████| 100.0% Complete


In [6]:
standingHeightGen.generate(3)

Zoom level: 3 |████████████████████████████████████████----------| 81.2% Complete

2018-06-27 17:39:39 Hail: INFO: Ordering unsorted dataset with network shuffle


Zoom level: 3 |███████████████████████████████████████████-------| 87.5% Complete

2018-06-27 17:43:15 Hail: INFO: Ordering unsorted dataset with network shuffle


Zoom level: 3 |██████████████████████████████████████████████----| 93.8% Complete

2018-06-27 17:44:49 Hail: INFO: Ordering unsorted dataset with network shuffle


Zoom level: 3 |██████████████████████████████████████████████████| 100.0% Complete


In [7]:
standingHeightGen.generate(4)

Zoom level: 4 |█-------------------------------------------------| 3.1% Complete

2018-06-27 17:57:37 Hail: INFO: Ordering unsorted dataset with network shuffle


Zoom level: 4 |██------------------------------------------------| 4.7% Complete

2018-06-27 17:59:12 Hail: INFO: Ordering unsorted dataset with network shuffle


Zoom level: 4 |████----------------------------------------------| 9.4% Complete

2018-06-27 18:03:08 Hail: INFO: Ordering unsorted dataset with network shuffle


Zoom level: 4 |█████---------------------------------------------| 10.9% Complete

2018-06-27 18:04:52 Hail: INFO: Ordering unsorted dataset with network shuffle


Zoom level: 4 |████████------------------------------------------| 17.2% Complete

2018-06-27 18:10:02 Hail: INFO: Ordering unsorted dataset with network shuffle


Zoom level: 4 |█████████-----------------------------------------| 18.8% Complete

KeyboardInterrupt: 