20/01/16 13:14:54 INFO PlinkFileFormat$: hlsUsage:[plinkRead,{"includeSampleIds":true,"mergeFidIid":true}] 20/01/16 13:15:02 INFO PlinkFileFormat: Reading variants [0, 849480] 20/01/16 13:15:02 INFO PlinkFileFormat: Reading variants [849480, 1698959] 20/01/16 13:15:02 INFO PlinkFileFormat: Reading variants [1698959, 2548438] 20/01/16 13:15:06 INFO PlinkFileFormat: Reading variants [2548438, 3397918] 20/01/16 13:15:06 INFO PlinkFileFormat: Reading variants [3397918, 4247397] 20/01/16 13:15:06 INFO PlinkFileFormat: Reading variants [4247397, 5096876] 20/01/16 13:15:07 INFO PlinkFileFormat: Reading variants [5096876, 5946356] 20/01/16 13:15:07 INFO PlinkFileFormat: Reading variants [5946356, 6795835] 20/01/16 13:15:07 INFO PlinkFileFormat: Reading variants [6795835, 7645314] 20/01/16 13:15:07 INFO PlinkFileFormat: Reading variants [8494793, 9344273] 20/01/16 13:15:07 INFO PlinkFileFormat: Reading variants [9344273, 10193752] 20/01/16 13:15:07 INFO PlinkFileFormat: Reading variants [7645314, 8494793] 20/01/16 13:15:07 INFO PlinkFileFormat: Reading variants [10193752, 11043231] 20/01/16 13:15:08 INFO PlinkFileFormat: Reading variants [11043231, 11892711] 20/01/16 13:15:08 INFO PlinkFileFormat: Reading variants [11892711, 12742190] 20/01/16 13:15:08 INFO PlinkFileFormat: Reading variants [12742190, 13591669] 20/01/16 13:15:17 INFO PlinkFileFormat: Reading variants [25484379, 52671826] 20/01/16 13:15:17 ERROR Executor: Exception in task 30.0 in stage 0.0 (TID 30) java.io.EOFException: Cannot seek to a negative offset at org.apache.hadoop.fs.FSInputChecker.seek(FSInputChecker.java:399) at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) at org.apache.hadoop.fs.ChecksumFileSystem$FSDataBoundedInputStream.seek(ChecksumFileSystem.java:330) at io.projectglow.plink.PlinkFileFormat$$anonfun$buildReader$1.apply(PlinkFileFormat.scala:118) at io.projectglow.plink.PlinkFileFormat$$anonfun$buildReader$1.apply(PlinkFileFormat.scala:90) at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:148) at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:132) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:124) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 20/01/16 13:15:17 WARN TaskSetManager: Lost task 30.0 in stage 0.0 (TID 30, localhost, executor driver): java.io.EOFException: Cannot seek to a negative offset at org.apache.hadoop.fs.FSInputChecker.seek(FSInputChecker.java:399) at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) at org.apache.hadoop.fs.ChecksumFileSystem$FSDataBoundedInputStream.seek(ChecksumFileSystem.java:330) at io.projectglow.plink.PlinkFileFormat$$anonfun$buildReader$1.apply(PlinkFileFormat.scala:118) at io.projectglow.plink.PlinkFileFormat$$anonfun$buildReader$1.apply(PlinkFileFormat.scala:90) at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:148) at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:132) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:124) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 20/01/16 13:15:17 ERROR TaskSetManager: Task 30 in stage 0.0 failed 1 times; aborting job org.apache.spark.SparkException: Job aborted due to stage failure: Task 30 in stage 0.0 failed 1 times, most recent failure: Lost task 30.0 in stage 0.0 (TID 30, localhost, executor driver): java.io.EOFException: Cannot seek to a negative offset at org.apache.hadoop.fs.FSInputChecker.seek(FSInputChecker.java:399) at org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) at org.apache.hadoop.fs.ChecksumFileSystem$FSDataBoundedInputStream.seek(ChecksumFileSystem.java:330) at io.projectglow.plink.PlinkFileFormat$$anonfun$buildReader$1.apply(PlinkFileFormat.scala:118) at io.projectglow.plink.PlinkFileFormat$$anonfun$buildReader$1.apply(PlinkFileFormat.scala:90) at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:148) at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:132) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:124) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889) org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877) org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876) scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876) org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) scala.Option.foreach(Option.scala:257) org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926) org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110) org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059) org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048) org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737) org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) org.apache.spark.SparkContext.runJob(SparkContext.scala:2082) org.apache.spark.SparkContext.runJob(SparkContext.scala:2101) org.apache.spark.SparkContext.runJob(SparkContext.scala:2126) org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:945) org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) org.apache.spark.rdd.RDD.withScope(RDD.scala:363) org.apache.spark.rdd.RDD.collect(RDD.scala:944) org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299) org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2836) org.apache.spark.sql.Dataset$$anonfun$count$1.apply(Dataset.scala:2835) org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370) org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369) org.apache.spark.sql.Dataset.count(Dataset.scala:2835) ammonite.$sess.cmd3$Helper.(cmd3.sc:1) ammonite.$sess.cmd3$.(cmd3.sc:7) ammonite.$sess.cmd3$.(cmd3.sc:-1) java.io.EOFException: Cannot seek to a negative offset org.apache.hadoop.fs.FSInputChecker.seek(FSInputChecker.java:399) org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:62) org.apache.hadoop.fs.ChecksumFileSystem$FSDataBoundedInputStream.seek(ChecksumFileSystem.java:330) io.projectglow.plink.PlinkFileFormat$$anonfun$buildReader$1.apply(PlinkFileFormat.scala:118) io.projectglow.plink.PlinkFileFormat$$anonfun$buildReader$1.apply(PlinkFileFormat.scala:90) org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:148) org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:132) org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:124) org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source) org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) org.apache.spark.scheduler.Task.run(Task.scala:123) org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) java.lang.Thread.run(Thread.java:748)