In [1]:
import hail as hl
import hail.expr.aggregators as agg

In [2]:
hl.init(quiet=True)

In [3]:
schema = {'variant': hl.tstr, 'rsid': hl.tstr, 'nCompleteSamples': hl.tint32,
         'AC': hl.tfloat64, 'ytx': hl.tfloat64, 'beta': hl.tfloat64,
         'se': hl.tfloat64, 'tstat': hl.tfloat64, 'pval': hl.tfloat64}

standing_height_mt_path = '/Users/maccum/manhattan/data/standing_height/'

manhattan_table_path = '/Users/maccum/manhattan/data/standing_height2/'

colors = {'1' : "#F73A12", '2' : "#BFF712", '3' : "#F7B912", '4' : "#F78112", 
                '5' : "#1DA14F", '6' : "#651DA1", '7' : "#26DAE3", '8' : "#768CCC", 
                '9' : "#CF19EC", '10' : "#A11D7F", '11' : "#EC195C", '12' : "#19EC43", 
                '13' : "#30666F", '14' : "#F7CA48", '15' : "#48F770", '16' : "#7A48F7", 
                '17' : "#F74863", '18' : "#322C2D", '19' : "#B9C147", '20' : "#B7B0B1", 
                '21' : "#64C1B9", '22' : "#349C21", 'X' : "#2D396E"}

In [None]:
standing_height = hl.import_table('/Users/maccum/manhattan/50.assoc.tsv', key='variant', types=schema)

In [5]:
standing_height = standing_height.rename({"variant": "contig:pos:ref:alt1,alt2"})

In [7]:
standing_height.write(standing_height_mt_path, overwrite=True)

2018-06-27 15:05:27 Hail: INFO: wrote 10894596 items in 35 partitions


In [13]:
standing_height = hl.read_table(standing_height_mt_path)

In [14]:
standing_height.describe()

----------------------------------------
Global fields:
    None
----------------------------------------
Row fields:
    'contig:pos:ref:alt1,alt2': str 
    'rsid': str 
    'nCompleteSamples': int32 
    'AC': float64 
    'ytx': float64 
    'beta': float64 
    'se': float64 
    'tstat': float64 
    'pval': float64 
----------------------------------------
Key: ['contig:pos:ref:alt1,alt2']
----------------------------------------


In [15]:
standing_height.count()

10894596

In [11]:
# get table with global_position, neg_log_pval, color
sh = standing_height.annotate(v = hl.parse_variant(standing_height['contig:pos:ref:alt1,alt2']))
sh = sh.annotate_globals(color_dict = colors)
sh = sh.annotate(global_position = sh.v.locus.global_position(),
                neg_log_pval = -hl.log(sh.pval),
                color = sh.color_dict[sh.v.locus.contig])
sh = sh.key_by('global_position').select('neg_log_pval', 'color')

sh.write(manhattan_table_path, overwrite=True)

2018-06-27 15:09:28 Hail: INFO: Ordering unsorted dataset with network shuffle
2018-06-27 15:12:17 Hail: INFO: wrote 10894596 items in 35 partitions


In [None]:
sh = hl.read_table(manhattan_table_path)

In [12]:
sh.describe()

----------------------------------------
Global fields:
    'color_dict': dict<str, str> 
----------------------------------------
Row fields:
    'global_position': int64 
    'neg_log_pval': float64 
    'color': str 
----------------------------------------
Key: ['global_position']
----------------------------------------


In [13]:
max_global_position = sh.aggregate(agg.max(sh.global_position))
max_nlp = sh.aggregate(agg.max(sh.neg_log_pval))
(max_global_position, max_nlp)

2018-06-27 15:13:53 Hail: INFO: Ordering unsorted dataset with network shuffle
2018-06-27 15:16:04 Hail: INFO: Ordering unsorted dataset with network shuffle


(2880965782, 670.1746941369828)

In [4]:
#import generator
#from generator.generator import Generator
import plotgen as pg
#from plotgen.src.gen.gen import PlotGenerator
#help(pg)

In [5]:
#caffeineGen = Generator('../plots/caffeine_plots', '../data/caffeine.ht', regenerate=False)
caffeineGen = pg.PlotGenerator(root_folder='/Users/maccum/manhattan/plots/new_caffeine_plots', 
                            regenerate=False,
                            table_path='/Users/maccum/manhattan/data/caffeine.ht')
caffeineGen.generate(2, new_log_file=True)
caffeineGen.generate(3)
caffeineGen.generate(4)
caffeineGen.generate(5)
caffeineGen.generate(6)
caffeineGen.generate(7)

Zoom level: 7 |██████████████████████████████████████████████████| 100.0% Complete


In [5]:
#standingHeightGen = Generator('../plots/sh_plots', sh2_path, regenerate=False, 
#                              max_position=2880965782, max_nlp=670.1746941369828)
standingHeightGen = pg.PlotGenerator(root_folder='../plots/new_standing_height_plots', 
                                  regenerate=False, 
                                  table_path=manhattan_table_path,
                                  x_axis_range=[-5,2880965782+5], 
                                  y_axis_range=[0,670.1746941369828+1])

In [8]:
standingHeightGen.generate(2, new_log_file=True)

Zoom level: 2 |██████████████████████████████████████████████████| 100.0% Complete


In [7]:
standingHeightGen.generate(3)

Zoom level: 3 |██████████████████████████████████████████████████| 100.0% Complete


In [6]:
standingHeightGen.generate(4)

Zoom level: 4 |██████████████████████████████████████████████████| 100.0% Complete


In [7]:
standingHeightGen.generate(5)

Zoom level: 5 |██████████████████████████████████████████████████| 100.0% Complete


In [19]:
standingHeightGen.generate(6) # took 9 hours

Zoom level: 6 |██████████████████████████████████████████████████| 100.0% Complete


In [None]:
standingHeightGen.generate(7)

Zoom level: 7 |--------------------------------------------------| 0.6% Complete

In [None]:

#for print("zone: "+str(zone.a_range)+" "+str(zone.b_range))
    #print(zone.a_range.extend(zone.b_range)) # MUTATES object; do not do
#print(standingHeightGen.empty_tiles)
#for zrc in standingHeightGen.empty_tiles:
#    print(zrc)

import csv
with open('empty_zones.csv', "w") as output:
    writer = csv.writer(output, lineterminator='\n')
    for zone in standingHeightGen.empty_zones.zones:
        writer.writerow(zone.a_range + zone.b_range)

with open('empty_tiles.csv', 'w') as output:
    writer = csv.writer(output, lineterminator='\n')
    for zcr in standingHeightGen.empty_tiles:
        writer.writerow(zcr)