From 1f2d9f06d25ba90373d2a50e7852313c8292421e Mon Sep 17 00:00:00 2001
From: Danielle Sucher <d@stripe.com>
Date: Fri, 21 Nov 2014 13:18:34 -0500
Subject: [PATCH] Initial open source commit

(Herringbone is a common pattern for parquet flooring, whee!)
---
 .gitignore                                    |   10 +
 LICENSE                                       |   21 +
 README.md                                     |   40 +
 bin/herringbone                               |   72 ++
 herringbone-impala/pom.xml                    |  118 ++
 .../herringbone/impala/Connection.scala       |   65 ++
 .../stripe/herringbone/impala/Cursor.scala    |   98 ++
 .../herringbone/impala/Exceptions.scala       |    7 +
 .../herringbone/impala/ImpalaClient.scala     |   16 +
 .../herringbone/impala/ImpalaValue.scala      |   43 +
 .../src/main/thrift/ImpalaService.thrift      |  177 +++
 .../src/main/thrift/Status.thrift             |   32 +
 .../src/main/thrift/beeswax.thrift            |  175 +++
 .../src/main/thrift/cli_service.thrift        | 1015 +++++++++++++++++
 .../src/main/thrift/fb303.thrift              |  112 ++
 .../src/main/thrift/hive_metastore.thrift     |  528 +++++++++
 herringbone-main/pom.xml                      |  168 +++
 .../herringbone/CompactInputFormat.scala      |  168 +++
 .../com/stripe/herringbone/CompactJob.scala   |   98 ++
 .../com/stripe/herringbone/FlattenJob.scala   |   78 ++
 .../com/stripe/herringbone/ParquetLoad.scala  |   45 +
 .../scala/com/stripe/herringbone/TsvJob.scala |   98 ++
 .../herringbone/flatten/FlatConsumer.scala    |  108 ++
 .../herringbone/flatten/FlatConverter.scala   |   54 +
 .../herringbone/flatten/ParquetFlatConf.scala |   11 +
 .../flatten/ParquetFlatMapper.scala           |   29 +
 .../herringbone/flatten/TypeFlattener.scala   |   59 +
 .../stripe/herringbone/load/FieldUtils.scala  |   53 +
 .../stripe/herringbone/load/HadoopFs.scala    |   39 +
 .../stripe/herringbone/load/HiveLoader.scala  |   76 ++
 .../load/HiveServer2Connection.scala          |   35 +
 .../herringbone/load/ImpalaLoader.scala       |  122 ++
 .../herringbone/load/ParquetLoadConf.scala    |   18 +
 .../herringbone/load/ParquetLoader.scala      |    9 +
 .../herringbone/util/ParquetUtils.scala       |   36 +
 .../src/main/thrift/ImpalaService.thrift      |  177 +++
 .../src/main/thrift/Status.thrift             |   32 +
 .../src/main/thrift/beeswax.thrift            |  175 +++
 .../src/main/thrift/cli_service.thrift        | 1015 +++++++++++++++++
 herringbone-main/src/main/thrift/fb303.thrift |  112 ++
 .../src/main/thrift/hive_metastore.thrift     |  528 +++++++++
 .../src/test/resources/test.parquet           |  Bin 0 -> 916 bytes
 .../stripe/herringbone/FlattenJobTest.scala   |   22 +
 .../flatten/FlatConverterTest.scala           |   61 +
 .../flatten/TypeFlattenerTest.scala           |   95 ++
 .../herringbone/load/FieldUtilsTest.scala     |   49 +
 pom.xml                                       |   17 +
 47 files changed, 6116 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100755 bin/herringbone
 create mode 100644 herringbone-impala/pom.xml
 create mode 100644 herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Connection.scala
 create mode 100644 herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Cursor.scala
 create mode 100644 herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Exceptions.scala
 create mode 100644 herringbone-impala/src/main/scala/com/stripe/herringbone/impala/ImpalaClient.scala
 create mode 100644 herringbone-impala/src/main/scala/com/stripe/herringbone/impala/ImpalaValue.scala
 create mode 100644 herringbone-impala/src/main/thrift/ImpalaService.thrift
 create mode 100644 herringbone-impala/src/main/thrift/Status.thrift
 create mode 100644 herringbone-impala/src/main/thrift/beeswax.thrift
 create mode 100644 herringbone-impala/src/main/thrift/cli_service.thrift
 create mode 100644 herringbone-impala/src/main/thrift/fb303.thrift
 create mode 100644 herringbone-impala/src/main/thrift/hive_metastore.thrift
 create mode 100644 herringbone-main/pom.xml
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/CompactInputFormat.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/CompactJob.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/FlattenJob.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/ParquetLoad.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/TsvJob.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/flatten/FlatConsumer.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/flatten/FlatConverter.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/flatten/ParquetFlatConf.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/flatten/ParquetFlatMapper.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/flatten/TypeFlattener.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/load/FieldUtils.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/load/HadoopFs.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/load/HiveLoader.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/load/HiveServer2Connection.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/load/ImpalaLoader.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/load/ParquetLoadConf.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/load/ParquetLoader.scala
 create mode 100644 herringbone-main/src/main/scala/com/stripe/herringbone/util/ParquetUtils.scala
 create mode 100644 herringbone-main/src/main/thrift/ImpalaService.thrift
 create mode 100644 herringbone-main/src/main/thrift/Status.thrift
 create mode 100644 herringbone-main/src/main/thrift/beeswax.thrift
 create mode 100644 herringbone-main/src/main/thrift/cli_service.thrift
 create mode 100644 herringbone-main/src/main/thrift/fb303.thrift
 create mode 100644 herringbone-main/src/main/thrift/hive_metastore.thrift
 create mode 100644 herringbone-main/src/test/resources/test.parquet
 create mode 100644 herringbone-main/src/test/scala/com/stripe/herringbone/FlattenJobTest.scala
 create mode 100644 herringbone-main/src/test/scala/com/stripe/herringbone/flatten/FlatConverterTest.scala
 create mode 100644 herringbone-main/src/test/scala/com/stripe/herringbone/flatten/TypeFlattenerTest.scala
 create mode 100644 herringbone-main/src/test/scala/com/stripe/herringbone/load/FieldUtilsTest.scala
 create mode 100644 pom.xml

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3c6ba44
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+target/
+data/
+.idea/
+*.pyc
+*.iml
+# ignore ROC plots
+*.pdf
+.tddium*
+
+.DS_Store
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..2754f88
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014- Stripe, Inc. (https://stripe.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..fcbf773
--- /dev/null
+++ b/README.md
@@ -0,0 +1,40 @@
+Herringbone
+===========
+
+Herringbone is a suite of tools for working with parquet files on hdfs, and with impala and hive.
+
+The available commands are:
+
+`flatten`: transform a directory of parquet files with a nested structure into a directory of parquet files with a flat schema that can be loaded into impala or hive (neither of which support nested schemas)
+
+    $ herringbone flatten -i /path/to/input/directory -o /path/to/output/directory
+
+`load`: load a directory of parquet files (which must have a flat schema) into impala or hive (defaulting to impala)
+
+    $ herringbone load [--hive] [-u] -d db_name -t table -p /path/to/parquet/directory
+
+`tsv`: transform a directory of parquet files into a directory of tsv files (which you can concat properly later with `hadoop fs -getmerge /path/to/tsvs`)
+
+    $ herringbone tsv -i /path/to/input/directory -o /path/to/output/directory
+
+`compact`: transform a directory of parquet files into a directory of fewer larger parquet files
+
+    $ herringbone compact -i /path/to/input/directory -o /path/to/output/directory
+
+See `herringbone COMMAND --help` for more information on a specific command.
+
+Building
+--------
+
+You'll need thrift 0.9.1 on your path.
+
+    $ git clone github.com/stripe/herringbone
+    $ cd herringbone
+    $ mvn package
+
+Authors
+-------
+
+ - [Avi Bryant](http://twitter.com/avibryant)
+ - [Danielle Sucher](http://twitter.com/daniellesucher)
+ - [Jeff Balogh](http://twitter.com/jbalogh)
diff --git a/bin/herringbone b/bin/herringbone
new file mode 100755
index 0000000..0a5e6c6
--- /dev/null
+++ b/bin/herringbone
@@ -0,0 +1,72 @@
+#!/usr/bin/env ruby
+
+usage = <<-USAGE
+Herringbone is a suite of tools for working with parquet files on hdfs.
+
+The available commands are:
+
+flatten: Transform a directory of parquet files with a nested structure into a directory of parquet files with a flat schema that can be loaded into impala or hive
+
+load: Load a directory of parquet files (which must have a flat schema) into impala or hive (defaults to impala)
+
+tsv: Transform a directory of parquet files into a directory of tsv files (which you can concat properly later with `hadoop fs -getmerge /path/to/tsvs`)
+
+compact: Transform a directory of parquet files into a directory of fewer larger parquet files
+
+
+Example usage:
+
+`herringbone flatten -i /path/to/input/directory -o /path/to/output/directory`
+
+`herringbone load [--hive] [-u] -d db_name -t table -p /path/to/parquet/directory`
+
+`herringbone tsv -i /path/to/input/directory -o /path/to/output/directory`
+
+`herringbone compact -i /path/to/input/directory -o /path/to/output/directory`
+
+
+See 'herringbone COMMAND --help' for more information on a specific command.
+
+
+  USAGE
+
+command_jobs = {
+  'compact' => 'CompactJob',
+  'load' => 'ParquetLoad',
+  'flatten' => 'FlattenJob',
+  'tsv' => 'TsvJob',
+}
+
+# Validate the given command and print usage if needed.
+command = ARGV.shift
+JOB = command_jobs[command]
+
+if ['-h', '--help'].include?(command)
+  puts usage
+  exit 0
+elsif !JOB
+  STDERR.puts "\nError: #{command} is not an available command\n\n"
+  puts "#{'=' * 30}\n\n"
+  puts usage
+  exit 1
+end
+
+jar_path = File.join(
+  File.dirname(__FILE__),
+  '../',
+  'herringbone-main',
+  'target',
+  'herringbone-0.0.1-jar-with-dependencies.jar'
+)
+JAR = File.expand_path(jar_path)
+
+ENV["HADOOP_CLASSPATH"] = JAR
+ENV["HADOOP_USER_CLASSPATH_FIRST"] = "true"
+
+exec(
+  "hadoop",
+  "jar",
+  JAR,
+  "com.stripe.herringbone.#{JOB}",
+  *ARGV
+)
diff --git a/herringbone-impala/pom.xml b/herringbone-impala/pom.xml
new file mode 100644
index 0000000..f90c0f0
--- /dev/null
+++ b/herringbone-impala/pom.xml
@@ -0,0 +1,118 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>com.stripe</groupId>
+  <artifactId>herringbone-impala</artifactId>
+  <version>0.0.2</version>
+  <packaging>jar</packaging>
+
+  <name>Herringbone Impala</name>
+
+  <pluginRepositories>
+    <pluginRepository>
+      <id>dtrott</id>
+      <url>http://maven.davidtrott.com/repository</url>
+    </pluginRepository>
+  </pluginRepositories>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.1</version>
+        <configuration>
+          <source>1.6</source>
+          <target>1.6</target>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>2.3.1</version>
+      </plugin>
+
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <version>2.4.3</version>
+      </plugin>
+
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+        <version>3.1.6</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>compile</goal>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.thrift.tools</groupId>
+        <artifactId>maven-thrift-plugin</artifactId>
+        <version>0.1.11</version>
+        <configuration>
+          <checkStaleness>true</checkStaleness>
+          <thriftExecutable>thrift</thriftExecutable>
+        </configuration>
+        <executions>
+          <execution>
+            <id>thrift-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>compile</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>thrift-test-sources</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+  </build>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <scala.version>2.10.3</scala.version>
+    <maven.compiler.source>1.6</maven.compiler.source>
+    <maven.compiler.target>1.6</maven.compiler.target>
+  </properties>
+
+  <repositories>
+    <repository>
+      <id>cloudera-releases</id>
+      <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+  </repositories>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <version>0.9.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <version>1.5.2</version>
+    </dependency>
+  </dependencies>
+</project>
+
+
diff --git a/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Connection.scala b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Connection.scala
new file mode 100644
index 0000000..11f7a9a
--- /dev/null
+++ b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Connection.scala
@@ -0,0 +1,65 @@
+package com.stripe.herringbone.impala
+
+import org.apache.thrift.transport.TSocket
+import org.apache.thrift.protocol.TBinaryProtocol
+
+import com.cloudera.impala.thrift.ImpalaService.{Client => ClouderaImpalaClient}
+import com.cloudera.beeswax.api._
+
+import scala.annotation.tailrec
+import scala.collection.JavaConversions._
+
+case class Connection(host: String, port: Int) {
+  var isOpen = false
+  lazy val socket = new TSocket(host, port)
+  lazy val client = new ClouderaImpalaClient(new TBinaryProtocol(socket))
+
+  open
+
+  def open = {
+    if (!isOpen) {
+      socket.open
+      client.ResetCatalog
+      isOpen = true
+    }
+  }
+
+  def close = {
+    if (isOpen) {
+      socket.close
+      isOpen = false
+    }
+  }
+
+  // Refresh the metadata store.
+  def refresh = {
+    if (!isOpen) throw ConnectionException("Connection closed")
+    client.ResetCatalog
+  }
+
+  // Perform a query, and pass in a function that will be called with each
+  // row of the results
+  def query(raw: String)(fn: Seq[ImpalaValue] => Unit) {
+    val cursor = execute(raw)
+    cursor.foreach { row => fn(row) }
+    cursor.close
+  }
+
+  // Perform a query and return a cursor for iterating over the results.
+  // You probably want to call cursor.close when you're done with it.
+  def execute(raw: String): Cursor = {
+    if (!isOpen) throw ConnectionException("Connection closed")
+    validateQuery(raw)
+
+    val query = new Query
+    query.query = raw
+
+    val handle = client.query(query)
+    Cursor(handle, client)
+  }
+
+  private def validateQuery(raw: String) = {
+    val words = raw.split("\\s+")
+    if (words.isEmpty) throw InvalidQueryException("Empty query")
+  }
+}
diff --git a/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Cursor.scala b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Cursor.scala
new file mode 100644
index 0000000..394af69
--- /dev/null
+++ b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Cursor.scala
@@ -0,0 +1,98 @@
+package com.stripe.herringbone.impala
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema
+
+import com.cloudera.impala.thrift.ImpalaService.{Client => ClouderaImpalaClient}
+import com.cloudera.beeswax.api._
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.JavaConversions._
+
+case class Cursor(handle: QueryHandle, client: ClouderaImpalaClient) {
+  var done = false
+  var isOpen = true
+  var rowBuffer = ArrayBuffer.empty[Seq[ImpalaValue]]
+  val bufferSize = 1024
+  private lazy val metadata: ResultsMetadata = client.get_results_metadata(handle)
+
+  def foreach(fn: Seq[ImpalaValue] => Unit) = {
+    var row = fetchRow
+    while (row.isDefined) {
+      fn(row.get)
+      row = fetchRow
+    }
+  }
+
+  def fetchRow: Option[Seq[ImpalaValue]] = {
+    if (rowBuffer.isEmpty) {
+      if (done) {
+        None
+      } else {
+        fetchMore
+        fetchRow
+      }
+    } else {
+      val row = rowBuffer.head
+      rowBuffer = rowBuffer.tail
+      Some(row)
+    }
+  }
+
+  // Close the cursor on the remote server. Once a cursor is closed, you
+  // can no longer fetch any rows from it.
+  def close = {
+    if (!isOpen) {
+      isOpen = false
+      client.close(handle)
+    }
+  }
+
+  // Returns true if there are any more rows to fetch.
+  def hasMore = !done || !rowBuffer.isEmpty
+
+  def runtime_profile = client.GetRuntimeProfile(handle)
+
+  private def fetchMore = {
+    while (!done && rowBuffer.size < bufferSize) {
+      fetchBatch
+    }
+  }
+
+  private def fetchBatch = {
+    if (!isOpen) throw CursorException("Cursor has expired or been closed")
+
+    try {
+      val response = client.fetch(handle, false, bufferSize)
+      validateQueryState(client.get_state(handle))
+
+      val rows  = response.data.map { row => parseRow(row) }
+      rowBuffer ++= rows
+
+      if (!response.has_more) {
+        done = true
+        close
+      }
+    } catch {
+      case e: BeeswaxException => {
+        isOpen = false
+        throw e
+      }
+      case e: Exception => throw e
+    }
+  }
+
+  private def parseRow(row: String) = {
+    val fields = row.split(metadata.delim)
+
+    metadata.schema.getFieldSchemas.zip(fields).map { case(schema, rawValue) =>
+      ImpalaValue(rawValue, schema.getName, schema.getType)
+    }
+  }
+
+  private def validateQueryState(state: QueryState) = {
+    if (state == QueryState.EXCEPTION) {
+      close
+      throw CursorException("The query was aborted")
+    }
+  }
+}
diff --git a/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Exceptions.scala b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Exceptions.scala
new file mode 100644
index 0000000..17725e3
--- /dev/null
+++ b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/Exceptions.scala
@@ -0,0 +1,7 @@
+package com.stripe.herringbone.impala
+
+case class ConnectionException(message: String) extends Exception
+case class CursorException(message: String) extends Exception
+case class InvalidQueryException(message: String) extends Exception
+case class ParsingException(message: String) extends Exception
+
diff --git a/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/ImpalaClient.scala b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/ImpalaClient.scala
new file mode 100644
index 0000000..a873abf
--- /dev/null
+++ b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/ImpalaClient.scala
@@ -0,0 +1,16 @@
+package com.stripe.herringbone.impala
+
+case class ImpalaClient(host: String, port: Int) {
+  lazy val connection = Connection(host, port)
+
+  def execute(raw: String) {
+    query(raw){ row =>
+      println(row.map { _.raw }.mkString(" "))
+    }
+  }
+
+  def query(raw: String)(fn: Seq[ImpalaValue] => Unit) {
+    println(raw)
+    connection.query(raw){ row => fn(row) }
+  }
+}
diff --git a/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/ImpalaValue.scala b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/ImpalaValue.scala
new file mode 100644
index 0000000..bf0b375
--- /dev/null
+++ b/herringbone-impala/src/main/scala/com/stripe/herringbone/impala/ImpalaValue.scala
@@ -0,0 +1,43 @@
+package com.stripe.herringbone.impala
+
+import java.text.SimpleDateFormat
+
+case class ImpalaValue(raw: String, fieldName: String, fieldType: String) {
+  lazy val convertedValue = convertRawValue(raw)
+
+  private def convertRawValue(raw: String): Option[Any] = {
+    if (raw == "NULL") {
+      None
+    } else {
+      val converted = fieldType match {
+        case "string" => raw
+        case "boolean" => convertBoolean(raw)
+        case "tinyint" | "smallint" | "int" | "bigint" => raw.toInt
+        case "double" | "float" | "decimal" => raw.toDouble
+        case "timestamp" => convertTimestamp(raw)
+        case _ => throw ParsingException("Unknown type: " + fieldType)
+      }
+      Some(converted)
+    }
+  }
+
+  private def convertBoolean(raw: String) = {
+    try {
+      raw.toBoolean
+    } catch {
+      case e: java.lang.IllegalArgumentException =>
+        throw ParsingException("Invalid value for boolean: " + raw)
+    }
+  }
+
+  private def convertTimestamp(raw: String) = {
+    val formatStr = if (raw.indexOf(".") == -1) {
+      "YYYY-MM-DD HH:MM:SS"
+    } else {
+      "YYYY-MM-DD HH:MM:SS.sssssssss"
+    }
+
+    val dateFormat = new SimpleDateFormat(formatStr)
+    dateFormat.parse(raw)
+  }
+}
diff --git a/herringbone-impala/src/main/thrift/ImpalaService.thrift b/herringbone-impala/src/main/thrift/ImpalaService.thrift
new file mode 100644
index 0000000..1246ca4
--- /dev/null
+++ b/herringbone-impala/src/main/thrift/ImpalaService.thrift
@@ -0,0 +1,177 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace cpp impala
+namespace java com.cloudera.impala.thrift
+namespace rb impala.protocol
+
+include "Status.thrift"
+include "beeswax.thrift"
+include "cli_service.thrift"
+
+// ImpalaService accepts query execution options through beeswax.Query.configuration in
+// key:value form. For example, the list of strings could be:
+//     "num_nodes:1", "abort_on_error:false"
+// The valid keys are listed in this enum. They map to TQueryOptions.
+// Note: If you add an option or change the default, you also need to update:
+// - ImpalaInternalService.thrift: TQueryOptions
+// - ImpaladClientExecutor.getBeeswaxQueryConfigurations()
+// - ImpalaServer::SetQueryOptions()
+// - ImpalaServer::TQueryOptionsToMap()
+enum TImpalaQueryOptions {
+  // if true, abort execution on the first error
+  ABORT_ON_ERROR,
+
+  // maximum # of errors to be reported; Unspecified or 0 indicates backend default
+  MAX_ERRORS,
+
+  // if true, disable llvm codegen
+  DISABLE_CODEGEN,
+
+  // batch size to be used by backend; Unspecified or a size of 0 indicates backend
+  // default
+  BATCH_SIZE,
+
+  // a per-machine approximate limit on the memory consumption of this query;
+  // unspecified or a limit of 0 means no limit;
+  // otherwise specified either as:
+  // a) an int (= number of bytes);
+  // b) a float followed by "M" (MB) or "G" (GB)
+  MEM_LIMIT,
+
+  // specifies the degree of parallelism with which to execute the query;
+  // 1: single-node execution
+  // NUM_NODES_ALL: executes on all nodes that contain relevant data
+  // NUM_NODES_ALL_RACKS: executes on one node per rack that holds relevant data
+  // > 1: executes on at most that many nodes at any point in time (ie, there can be
+  //      more nodes than numNodes with plan fragments for this query, but at most
+  //      numNodes would be active at any point in time)
+  // Constants (NUM_NODES_ALL, NUM_NODES_ALL_RACKS) are defined in JavaConstants.thrift.
+  NUM_NODES,
+
+  // maximum length of the scan range; only applicable to HDFS scan range; Unspecified or
+  // a length of 0 indicates backend default;
+  MAX_SCAN_RANGE_LENGTH,
+
+  // Maximum number of io buffers (per disk)
+  MAX_IO_BUFFERS,
+
+  // Number of scanner threads.
+  NUM_SCANNER_THREADS,
+
+  // If true, Impala will try to execute on file formats that are not fully supported yet
+  ALLOW_UNSUPPORTED_FORMATS,
+
+  // if set and > -1, specifies the default limit applied to a top-level SELECT statement
+  // with an ORDER BY but without a LIMIT clause (ie, if the SELECT statement also has
+  // a LIMIT clause, this default is ignored)
+  DEFAULT_ORDER_BY_LIMIT,
+
+  // DEBUG ONLY:
+  // If set to
+  //   "[<backend number>:]<node id>:<TExecNodePhase>:<TDebugAction>",
+  // the exec node with the given id will perform the specified action in the given
+  // phase. If the optional backend number (starting from 0) is specified, only that
+  // backend instance will perform the debug action, otherwise all backends will behave
+  // in that way.
+  // If the string doesn't have the required format or if any of its components is
+  // invalid, the option is ignored.
+  DEBUG_ACTION,
+
+  // If true, raise an error when the DEFAULT_ORDER_BY_LIMIT has been reached.
+  ABORT_ON_DEFAULT_LIMIT_EXCEEDED,
+
+  // Compression codec for parquet when inserting into parquet tables.
+  // Valid values are "snappy", "gzip" and "none"
+  // Leave blank to use default.
+  PARQUET_COMPRESSION_CODEC,
+
+  // HBase scan query option. If set and > 0, HBASE_CACHING is the value for
+  // "hbase.client.Scan.setCaching()" when querying HBase table. Otherwise, use backend
+  // default.
+  // If the value is too high, then the hbase region server will have a hard time (GC
+  // pressure and long response times). If the value is too small, then there will be
+  // extra trips to the hbase region server.
+  HBASE_CACHING,
+
+  // HBase scan query option. If set, HBase scan will always set
+  // "hbase.client.setCacheBlocks" to CACHE_BLOCKS. Default is false.
+  // If the table is large and the query is doing big scan, set it to false to
+  // avoid polluting the cache in the hbase region server.
+  // If the table is small and the table is used several time, set it to true to improve
+  // performance.
+  HBASE_CACHE_BLOCKS,
+}
+
+// The summary of an insert.
+struct TInsertResult {
+  // Number of appended rows per modified partition. Only applies to HDFS tables.
+  // The keys represent partitions to create, coded as k1=v1/k2=v2/k3=v3..., with the
+  // root in an unpartitioned table being the empty string.
+  1: required map<string, i64> rows_appended
+}
+
+// Response from a call to PingImpalaService
+struct TPingImpalaServiceResp {
+  // The Impala service's version string.
+  1: string version
+}
+
+// Parameters for a ResetTable request which will invalidate a table's metadata.
+// DEPRECATED.
+struct TResetTableReq {
+  // Name of the table's parent database.
+  1: required string db_name
+
+  // Name of the table.
+  2: required string table_name
+}
+
+// For all rpc that return a TStatus as part of their result type,
+// if the status_code field is set to anything other than OK, the contents
+// of the remainder of the result type is undefined (typically not set)
+service ImpalaService extends beeswax.BeeswaxService {
+  // Cancel execution of query. Returns RUNTIME_ERROR if query_id
+  // unknown.
+  // This terminates all threads running on behalf of this query at
+  // all nodes that were involved in the execution.
+  // Throws BeeswaxException if the query handle is invalid (this doesn't
+  // necessarily indicate an error: the query might have finished).
+  Status.TStatus Cancel(1:beeswax.QueryHandle query_id)
+      throws(1:beeswax.BeeswaxException error);
+
+  // Invalidates all catalog metadata, forcing a reload
+  // DEPRECATED; execute query "invalidate metadata" to refresh metadata
+  Status.TStatus ResetCatalog();
+
+  // Invalidates a specific table's catalog metadata, forcing a reload on the next access
+  // DEPRECATED; execute query "refresh <table>" to refresh metadata
+  Status.TStatus ResetTable(1:TResetTableReq request)
+
+  // Returns the runtime profile string for the given query handle.
+  string GetRuntimeProfile(1:beeswax.QueryHandle query_id)
+      throws(1:beeswax.BeeswaxException error);
+
+  // Closes the query handle and return the result summary of the insert.
+  TInsertResult CloseInsert(1:beeswax.QueryHandle handle)
+      throws(1:beeswax.QueryNotFoundException error, 2:beeswax.BeeswaxException error2);
+
+  // Client calls this RPC to verify that the server is an ImpalaService. Returns the
+  // server version.
+  TPingImpalaServiceResp PingImpalaService();
+}
+
+// Impala HiveServer2 service
+service ImpalaHiveServer2Service extends cli_service.TCLIService {
+}
diff --git a/herringbone-impala/src/main/thrift/Status.thrift b/herringbone-impala/src/main/thrift/Status.thrift
new file mode 100644
index 0000000..8906d1e
--- /dev/null
+++ b/herringbone-impala/src/main/thrift/Status.thrift
@@ -0,0 +1,32 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace cpp impala
+namespace java com.cloudera.impala.thrift
+namespace rb impala.protocol
+
+enum TStatusCode {
+  OK,
+  CANCELLED,
+  ANALYSIS_ERROR,
+  NOT_IMPLEMENTED_ERROR,
+  RUNTIME_ERROR,
+  MEM_LIMIT_EXCEEDED,
+  INTERNAL_ERROR
+}
+
+struct TStatus {
+  1: required TStatusCode status_code
+  2: list<string> error_msgs
+}
diff --git a/herringbone-impala/src/main/thrift/beeswax.thrift b/herringbone-impala/src/main/thrift/beeswax.thrift
new file mode 100644
index 0000000..2707457
--- /dev/null
+++ b/herringbone-impala/src/main/thrift/beeswax.thrift
@@ -0,0 +1,175 @@
+/*
+ * Licensed to Cloudera, Inc. under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  Cloudera, Inc. licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Interface for interacting with Beeswax Server
+ */
+
+namespace java com.cloudera.beeswax.api
+namespace py beeswaxd
+namespace cpp beeswax
+namespace rb impala.protocol.beeswax
+
+include "hive_metastore.thrift"
+
+// A Query
+struct Query {
+  1: string query;
+  // A list of HQL commands to execute before the query.
+  // This is typically defining UDFs, setting settings, and loading resources.
+  3: list<string> configuration;
+
+  // User and groups to "act as" for purposes of Hadoop.
+  4: string hadoop_user;
+}
+
+typedef string LogContextId
+
+enum QueryState {
+  CREATED,
+  INITIALIZED,
+  COMPILED,
+  RUNNING,
+  FINISHED,
+  EXCEPTION
+}
+
+struct QueryHandle {
+  1: string id;
+  2: LogContextId log_context;
+}
+
+struct QueryExplanation {
+  1: string textual
+}
+
+struct Results {
+  // If set, data is valid.  Otherwise, results aren't ready yet.
+  1: bool ready,
+  // Columns for the results
+  2: list<string> columns,
+  // A set of results
+  3: list<string> data,
+  // The starting row of the results
+  4: i64 start_row,
+  // Whether there are more results to fetch
+  5: bool has_more
+}
+
+/**
+ * Metadata information about the results.
+ * Applicable only for SELECT.
+ */
+struct ResultsMetadata {
+  /** The schema of the results */
+  1: hive_metastore.Schema schema,
+  /** The directory containing the results. Not applicable for partition table. */
+  2: string table_dir,
+  /** If the results are straight from an existing table, the table name. */
+  3: string in_tablename,
+  /** Field delimiter */
+  4: string delim,
+}
+
+exception BeeswaxException {
+  1: string message,
+  // Use get_log(log_context) to retrieve any log related to this exception
+  2: LogContextId log_context,
+  // (Optional) The QueryHandle that caused this exception
+  3: QueryHandle handle,
+  4: optional i32 errorCode = 0,
+  5: optional string SQLState = "     "
+}
+
+exception QueryNotFoundException {
+}
+
+/** Represents a Hadoop-style configuration variable. */
+struct ConfigVariable {
+  1: string key,
+  2: string value,
+  3: string description
+}
+
+service BeeswaxService {
+  /**
+   * Submit a query and return a handle (QueryHandle). The query runs asynchronously.
+   */
+  QueryHandle query(1:Query query) throws(1:BeeswaxException error),
+
+  /**
+   * run a query synchronously and return a handle (QueryHandle).
+   */
+  QueryHandle executeAndWait(1:Query query, 2:LogContextId clientCtx)
+                        throws(1:BeeswaxException error),
+
+  /**
+   * Get the query plan for a query.
+   */
+  QueryExplanation explain(1:Query query)
+                        throws(1:BeeswaxException error),
+
+  /**
+   * Get the results of a query. This is non-blocking. Caller should check
+   * Results.ready to determine if the results are in yet. The call requests
+   * the batch size of fetch.
+   */
+  Results fetch(1:QueryHandle query_id, 2:bool start_over, 3:i32 fetch_size=-1)
+              throws(1:QueryNotFoundException error, 2:BeeswaxException error2),
+
+  /**
+   * Get the state of the query
+   */
+  QueryState get_state(1:QueryHandle handle) throws(1:QueryNotFoundException error),
+
+  /**
+   * Get the result metadata
+   */
+  ResultsMetadata get_results_metadata(1:QueryHandle handle)
+                                    throws(1:QueryNotFoundException error),
+
+  /**
+   * Used to test connection to server.  A "noop" command.
+   */
+  string echo(1:string s)
+
+  /**
+   * Returns a string representation of the configuration object being used.
+   * Handy for debugging.
+   */
+  string dump_config()
+
+  /**
+   * Get the log messages related to the given context.
+   */
+  string get_log(1:LogContextId context) throws(1:QueryNotFoundException error)
+
+  /*
+   * Returns "default" configuration.
+   */
+  list<ConfigVariable> get_default_configuration(1:bool include_hadoop)
+
+  /*
+   * closes the query with given handle
+   */
+  void close(1:QueryHandle handle) throws(1:QueryNotFoundException error,
+                            2:BeeswaxException error2)
+
+  /*
+   * clean the log context for given id
+   */
+  void clean(1:LogContextId log_context)
+}
diff --git a/herringbone-impala/src/main/thrift/cli_service.thrift b/herringbone-impala/src/main/thrift/cli_service.thrift
new file mode 100644
index 0000000..24a3558
--- /dev/null
+++ b/herringbone-impala/src/main/thrift/cli_service.thrift
@@ -0,0 +1,1015 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Coding Conventions for this file:
+//
+// Structs/Enums/Unions
+// * Struct, Enum, and Union names begin with a "T",
+//   and use a capital letter for each new word, with no underscores.
+// * All fields should be declared as either optional or required.
+//
+// Functions
+// * Function names start with a capital letter and have a capital letter for
+//   each new word, with no underscores.
+// * Each function should take exactly one parameter, named TFunctionNameReq,
+//   and should return either void or TFunctionNameResp. This convention allows
+//   incremental updates.
+//
+// Services
+// * Service names begin with the letter "T", use a capital letter for each
+//   new word (with no underscores), and end with the word "Service".
+
+namespace java org.apache.hive.service.cli.thrift
+namespace cpp apache.hive.service.cli.thrift
+namespace rb impala.protocol.hive
+
+// List of protocol versions. A new token should be
+// added to the end of this list every time a change is made.
+enum TProtocolVersion {
+  HIVE_CLI_SERVICE_PROTOCOL_V1
+}
+
+enum TTypeId {
+  BOOLEAN_TYPE,
+  TINYINT_TYPE,
+  SMALLINT_TYPE,
+  INT_TYPE,
+  BIGINT_TYPE,
+  FLOAT_TYPE,
+  DOUBLE_TYPE,
+  STRING_TYPE,
+  TIMESTAMP_TYPE,
+  BINARY_TYPE,
+  ARRAY_TYPE,
+  MAP_TYPE,
+  STRUCT_TYPE,
+  UNION_TYPE,
+  USER_DEFINED_TYPE,
+  DECIMAL_TYPE
+}
+
+const set<TTypeId> PRIMITIVE_TYPES = [
+  TTypeId.BOOLEAN_TYPE
+  TTypeId.TINYINT_TYPE
+  TTypeId.SMALLINT_TYPE
+  TTypeId.INT_TYPE
+  TTypeId.BIGINT_TYPE
+  TTypeId.FLOAT_TYPE
+  TTypeId.DOUBLE_TYPE
+  TTypeId.STRING_TYPE
+  TTypeId.TIMESTAMP_TYPE
+  TTypeId.BINARY_TYPE,
+  TTypeId.DECIMAL_TYPE
+]
+
+const set<TTypeId> COMPLEX_TYPES = [
+  TTypeId.ARRAY_TYPE
+  TTypeId.MAP_TYPE
+  TTypeId.STRUCT_TYPE
+  TTypeId.UNION_TYPE
+  TTypeId.USER_DEFINED_TYPE
+]
+
+const set<TTypeId> COLLECTION_TYPES = [
+  TTypeId.ARRAY_TYPE
+  TTypeId.MAP_TYPE
+]
+
+const map<TTypeId,string> TYPE_NAMES = {
+  TTypeId.BOOLEAN_TYPE: "BOOLEAN",
+  TTypeId.TINYINT_TYPE: "TINYINT",
+  TTypeId.SMALLINT_TYPE: "SMALLINT",
+  TTypeId.INT_TYPE: "INT",
+  TTypeId.BIGINT_TYPE: "BIGINT",
+  TTypeId.FLOAT_TYPE: "FLOAT",
+  TTypeId.DOUBLE_TYPE: "DOUBLE",
+  TTypeId.STRING_TYPE: "STRING",
+  TTypeId.TIMESTAMP_TYPE: "TIMESTAMP",
+  TTypeId.BINARY_TYPE: "BINARY",
+  TTypeId.ARRAY_TYPE: "ARRAY",
+  TTypeId.MAP_TYPE: "MAP",
+  TTypeId.STRUCT_TYPE: "STRUCT",
+  TTypeId.UNION_TYPE: "UNIONTYPE"
+  TTypeId.DECIMAL_TYPE: "DECIMAL"
+}
+
+// Thrift does not support recursively defined types or forward declarations,
+// which makes it difficult to represent Hive's nested types.
+// To get around these limitations TTypeDesc employs a type list that maps
+// integer "pointers" to TTypeEntry objects. The following examples show
+// how different types are represented using this scheme:
+//
+// "INT":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     }
+//   ]
+// }
+//
+// "ARRAY<INT>":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.array_entry {
+//       object_type_ptr = 1
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     }
+//   ]
+// }
+//
+// "MAP<INT,STRING>":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.map_entry {
+//       key_type_ptr = 1
+//       value_type_ptr = 2
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = STRING_TYPE
+//     }
+//   ]
+// }
+
+typedef i32 TTypeEntryPtr
+
+// Type entry for a primitive type.
+struct TPrimitiveTypeEntry {
+  // The primitive type token. This must satisfy the condition
+  // that type is in the PRIMITIVE_TYPES set.
+  1: required TTypeId type
+}
+
+// Type entry for an ARRAY type.
+struct TArrayTypeEntry {
+  1: required TTypeEntryPtr objectTypePtr
+}
+
+// Type entry for a MAP type.
+struct TMapTypeEntry {
+  1: required TTypeEntryPtr keyTypePtr
+  2: required TTypeEntryPtr valueTypePtr
+}
+
+// Type entry for a STRUCT type.
+struct TStructTypeEntry {
+  1: required map<string, TTypeEntryPtr> nameToTypePtr
+}
+
+// Type entry for a UNIONTYPE type.
+struct TUnionTypeEntry {
+  1: required map<string, TTypeEntryPtr> nameToTypePtr
+}
+
+struct TUserDefinedTypeEntry {
+  // The fully qualified name of the class implementing this type.
+  1: required string typeClassName
+}
+
+// We use a union here since Thrift does not support inheritance.
+union TTypeEntry {
+  1: TPrimitiveTypeEntry primitiveEntry
+  2: TArrayTypeEntry arrayEntry
+  3: TMapTypeEntry mapEntry
+  4: TStructTypeEntry structEntry
+  5: TUnionTypeEntry unionEntry
+  6: TUserDefinedTypeEntry userDefinedTypeEntry
+}
+
+// Type descriptor for columns.
+struct TTypeDesc {
+  // The "top" type is always the first element of the list.
+  // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE
+  // type, then subsequent elements represent nested types.
+  1: required list<TTypeEntry> types
+}
+
+// A result set column descriptor.
+struct TColumnDesc {
+  // The name of the column
+  1: required string columnName
+
+  // The type descriptor for this column
+  2: required TTypeDesc typeDesc
+
+  // The ordinal position of this column in the schema
+  3: required i32 position
+
+  4: optional string comment
+}
+
+// Metadata used to describe the schema (column names, types, comments)
+// of result sets.
+struct TTableSchema {
+  1: required list<TColumnDesc> columns
+}
+
+// A Boolean column value.
+struct TBoolValue {
+  // NULL if value is unset.
+  1: optional bool value
+}
+
+// A Byte column value.
+struct TByteValue {
+  // NULL if value is unset.
+  1: optional byte value
+}
+
+// A signed, 16 bit column value.
+struct TI16Value {
+  // NULL if value is unset
+  1: optional i16 value
+}
+
+// A signed, 32 bit column value
+struct TI32Value {
+  // NULL if value is unset
+  1: optional i32 value
+}
+
+// A signed 64 bit column value
+struct TI64Value {
+  // NULL if value is unset
+  1: optional i64 value
+}
+
+// A floating point 64 bit column value
+struct TDoubleValue {
+  // NULL if value is unset
+  1: optional double value
+}
+
+struct TStringValue {
+  // NULL if value is unset
+  1: optional string value
+}
+
+union TColumn {
+  1: list<TBoolValue> boolColumn
+  2: list<TByteValue> byteColumn
+  3: list<TI16Value> i16Column
+  4: list<TI32Value> i32Column
+  5: list<TI64Value> i64Column
+  6: list<TDoubleValue> doubleColumn
+  7: list<TStringValue> stringColumn
+}
+
+// A single column value in a result set.
+// Note that Hive's type system is richer than Thrift's,
+// so in some cases we have to map multiple Hive types
+// to the same Thrift type. On the client-side this is
+// disambiguated by looking at the Schema of the
+// result set.
+union TColumnValue {
+  1: TBoolValue   boolVal      // BOOLEAN
+  2: TByteValue   byteVal      // TINYINT
+  3: TI16Value    i16Val       // SMALLINT
+  4: TI32Value    i32Val       // INT
+  5: TI64Value    i64Val       // BIGINT, TIMESTAMP
+  6: TDoubleValue doubleVal    // FLOAT, DOUBLE
+  7: TStringValue stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL
+}
+
+// Represents a row in a rowset.
+struct TRow {
+  1: required list<TColumnValue> colVals
+}
+
+// Represents a rowset
+struct TRowSet {
+  // The starting row offset of this rowset.
+  1: required i64 startRowOffset
+  2: required list<TRow> rows
+  3: optional list<TColumn> columns
+}
+
+// The return status code contained in each response.
+enum TStatusCode {
+  SUCCESS_STATUS,
+  SUCCESS_WITH_INFO_STATUS,
+  STILL_EXECUTING_STATUS,
+  ERROR_STATUS,
+  INVALID_HANDLE_STATUS
+}
+
+// The return status of a remote request
+struct TStatus {
+  1: required TStatusCode statusCode
+
+  // If status is SUCCESS_WITH_INFO, info_msgs may be populated with
+  // additional diagnostic information.
+  2: optional list<string> infoMessages
+
+  // If status is ERROR, then the following fields may be set
+  3: optional string sqlState  // as defined in the ISO/IEF CLI specification
+  4: optional i32 errorCode    // internal error code
+  5: optional string errorMessage
+}
+
+// The state of an operation (i.e. a query or other
+// asynchronous operation that generates a result set)
+// on the server.
+enum TOperationState {
+  // The operation has been initialized
+  INITIALIZED_STATE,
+
+  // The operation is running. In this state the result
+  // set is not available.
+  RUNNING_STATE,
+
+  // The operation has completed. When an operation is in
+  // this state its result set may be fetched.
+  FINISHED_STATE,
+
+  // The operation was canceled by a client
+  CANCELED_STATE,
+
+  // The operation was closed by a client
+  CLOSED_STATE,
+
+  // The operation failed due to an error
+  ERROR_STATE,
+
+  // The operation is in an unrecognized state
+  UKNOWN_STATE,
+}
+
+
+// A string identifier. This is interpreted literally.
+typedef string TIdentifier
+
+// A search pattern.
+//
+// Valid search pattern characters:
+// '_': Any single character.
+// '%': Any sequence of zero or more characters.
+// '\': Escape character used to include special characters,
+//      e.g. '_', '%', '\'. If a '\' precedes a non-special
+//      character it has no special meaning and is interpreted
+//      literally.
+typedef string TPattern
+
+
+// A search pattern or identifier. Used as input
+// parameter for many of the catalog functions.
+typedef string TPatternOrIdentifier
+
+struct THandleIdentifier {
+  // 16 byte globally unique identifier
+  // This is the public ID of the handle and
+  // can be used for reporting.
+  1: required binary guid,
+
+  // 16 byte secret generated by the server
+  // and used to verify that the handle is not
+  // being hijacked by another user.
+  2: required binary secret,
+}
+
+// Client-side handle to persistent
+// session information on the server-side.
+struct TSessionHandle {
+  1: required THandleIdentifier sessionId
+}
+
+// The subtype of an OperationHandle.
+enum TOperationType {
+  EXECUTE_STATEMENT,
+  GET_TYPE_INFO,
+  GET_CATALOGS,
+  GET_SCHEMAS,
+  GET_TABLES,
+  GET_TABLE_TYPES,
+  GET_COLUMNS,
+  GET_FUNCTIONS,
+  UNKNOWN,
+}
+
+// Client-side reference to a task running
+// asynchronously on the server.
+struct TOperationHandle {
+  1: required THandleIdentifier operationId
+  2: required TOperationType operationType
+
+  // If hasResultSet = TRUE, then this operation
+  // generates a result set that can be fetched.
+  // Note that the result set may be empty.
+  //
+  // If hasResultSet = FALSE, then this operation
+  // does not generate a result set, and calling
+  // GetResultSetMetadata or FetchResults against
+  // this OperationHandle will generate an error.
+  3: required bool hasResultSet
+
+  // For operations that don't generate result sets,
+  // modifiedRowCount is either:
+  //
+  // 1) The number of rows that were modified by
+  //    the DML operation (e.g. number of rows inserted,
+  //    number of rows deleted, etc).
+  //
+  // 2) 0 for operations that don't modify or add rows.
+  //
+  // 3) < 0 if the operation is capable of modifiying rows,
+  //    but Hive is unable to determine how many rows were
+  //    modified. For example, Hive's LOAD DATA command
+  //    doesn't generate row count information because
+  //    Hive doesn't inspect the data as it is loaded.
+  //
+  // modifiedRowCount is unset if the operation generates
+  // a result set.
+  4: optional double modifiedRowCount
+}
+
+
+// OpenSession()
+//
+// Open a session (connection) on the server against
+// which operations may be executed.
+struct TOpenSessionReq {
+  // The version of the HiveServer2 protocol that the client is using.
+  1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1
+
+  // Username and password for authentication.
+  // Depending on the authentication scheme being used,
+  // this information may instead be provided by a lower
+  // protocol layer, in which case these fields may be
+  // left unset.
+  2: optional string username
+  3: optional string password
+
+  // Configuration overlay which is applied when the session is
+  // first created.
+  4: optional map<string, string> configuration
+}
+
+struct TOpenSessionResp {
+  1: required TStatus status
+
+  // The protocol version that the server is using.
+  2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1
+
+  // Session Handle
+  3: optional TSessionHandle sessionHandle
+
+  // The configuration settings for this session.
+  4: optional map<string, string> configuration
+}
+
+
+// CloseSession()
+//
+// Closes the specified session and frees any resources
+// currently allocated to that session. Any open
+// operations in that session will be canceled.
+struct TCloseSessionReq {
+  1: required TSessionHandle sessionHandle
+}
+
+struct TCloseSessionResp {
+  1: required TStatus status
+}
+
+
+
+enum TGetInfoType {
+  CLI_MAX_DRIVER_CONNECTIONS =           0,
+  CLI_MAX_CONCURRENT_ACTIVITIES =        1,
+  CLI_DATA_SOURCE_NAME =                 2,
+  CLI_FETCH_DIRECTION =                  8,
+  CLI_SERVER_NAME =                      13,
+  CLI_SEARCH_PATTERN_ESCAPE =            14,
+  CLI_DBMS_NAME =                        17,
+  CLI_DBMS_VER =                         18,
+  CLI_ACCESSIBLE_TABLES =                19,
+  CLI_ACCESSIBLE_PROCEDURES =            20,
+  CLI_CURSOR_COMMIT_BEHAVIOR =           23,
+  CLI_DATA_SOURCE_READ_ONLY =            25,
+  CLI_DEFAULT_TXN_ISOLATION =            26,
+  CLI_IDENTIFIER_CASE =                  28,
+  CLI_IDENTIFIER_QUOTE_CHAR =            29,
+  CLI_MAX_COLUMN_NAME_LEN =              30,
+  CLI_MAX_CURSOR_NAME_LEN =              31,
+  CLI_MAX_SCHEMA_NAME_LEN =              32,
+  CLI_MAX_CATALOG_NAME_LEN =             34,
+  CLI_MAX_TABLE_NAME_LEN =               35,
+  CLI_SCROLL_CONCURRENCY =               43,
+  CLI_TXN_CAPABLE =                      46,
+  CLI_USER_NAME =                        47,
+  CLI_TXN_ISOLATION_OPTION =             72,
+  CLI_INTEGRITY =                        73,
+  CLI_GETDATA_EXTENSIONS =               81,
+  CLI_NULL_COLLATION =                   85,
+  CLI_ALTER_TABLE =                      86,
+  CLI_ORDER_BY_COLUMNS_IN_SELECT =       90,
+  CLI_SPECIAL_CHARACTERS =               94,
+  CLI_MAX_COLUMNS_IN_GROUP_BY =          97,
+  CLI_MAX_COLUMNS_IN_INDEX =             98,
+  CLI_MAX_COLUMNS_IN_ORDER_BY =          99,
+  CLI_MAX_COLUMNS_IN_SELECT =            100,
+  CLI_MAX_COLUMNS_IN_TABLE =             101,
+  CLI_MAX_INDEX_SIZE =                   102,
+  CLI_MAX_ROW_SIZE =                     104,
+  CLI_MAX_STATEMENT_LEN =                105,
+  CLI_MAX_TABLES_IN_SELECT =             106,
+  CLI_MAX_USER_NAME_LEN =                107,
+  CLI_OJ_CAPABILITIES =                  115,
+
+  CLI_XOPEN_CLI_YEAR =                   10000,
+  CLI_CURSOR_SENSITIVITY =               10001,
+  CLI_DESCRIBE_PARAMETER =               10002,
+  CLI_CATALOG_NAME =                     10003,
+  CLI_COLLATION_SEQ =                    10004,
+  CLI_MAX_IDENTIFIER_LEN =               10005,
+}
+
+union TGetInfoValue {
+  1: string stringValue
+  2: i16 smallIntValue
+  3: i32 integerBitmask
+  4: i32 integerFlag
+  5: i32 binaryValue
+  6: i64 lenValue
+}
+
+// GetInfo()
+//
+// This function is based on ODBC's CLIGetInfo() function.
+// The function returns general information about the data source
+// using the same keys as ODBC.
+struct TGetInfoReq {
+  // The sesssion to run this request against
+  1: required TSessionHandle sessionHandle
+
+  2: required TGetInfoType infoType
+}
+
+struct TGetInfoResp {
+  1: required TStatus status
+
+  2: required TGetInfoValue infoValue
+}
+
+
+// ExecuteStatement()
+//
+// Execute a statement.
+// The returned OperationHandle can be used to check on the
+// status of the statement, and to fetch results once the
+// statement has finished executing.
+struct TExecuteStatementReq {
+  // The session to exexcute the statement against
+  1: required TSessionHandle sessionHandle
+
+  // The statement to be executed (DML, DDL, SET, etc)
+  2: required string statement
+
+  // Configuration properties that are overlayed on top of the
+  // the existing session configuration before this statement
+  // is executed. These properties apply to this statement
+  // only and will not affect the subsequent state of the Session.
+  3: optional map<string, string> confOverlay
+}
+
+struct TExecuteStatementResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTypeInfo()
+//
+// Get information about types supported by the HiveServer instance.
+// The information is returned as a result set which can be fetched
+// using the OperationHandle provided in the response.
+//
+// Refer to the documentation for ODBC's CLIGetTypeInfo function for
+// the format of the result set.
+struct TGetTypeInfoReq {
+  // The session to run this request against.
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetTypeInfoResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetCatalogs()
+//
+// Returns the list of catalogs (databases)
+// Results are ordered by TABLE_CATALOG
+//
+// Resultset columns :
+// col1
+// name: TABLE_CAT
+// type: STRING
+// desc: Catalog name. NULL if not applicable.
+//
+struct TGetCatalogsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetCatalogsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetSchemas()
+//
+// Retrieves the schema names available in this database.
+// The results are ordered by TABLE_CATALOG and TABLE_SCHEM.
+// col1
+// name: TABLE_SCHEM
+// type: STRING
+// desc: schema name
+// col2
+// name: TABLE_CATALOG
+// type: STRING
+// desc: catalog name
+struct TGetSchemasReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog. Must not contain a search pattern.
+  2: optional TIdentifier catalogName
+
+  // schema name or pattern
+  3: optional TPatternOrIdentifier schemaName
+}
+
+struct TGetSchemasResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTables()
+//
+// Returns a list of tables with catalog, schema, and table
+// type information. The information is returned as a result
+// set which can be fetched using the OperationHandle
+// provided in the response.
+// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME
+//
+// Result Set Columns:
+//
+// col1
+// name: TABLE_CAT
+// type: STRING
+// desc: Catalog name. NULL if not applicable.
+//
+// col2
+// name: TABLE_SCHEM
+// type: STRING
+// desc: Schema name.
+//
+// col3
+// name: TABLE_NAME
+// type: STRING
+// desc: Table name.
+//
+// col4
+// name: TABLE_TYPE
+// type: STRING
+// desc: The table type, e.g. "TABLE", "VIEW", etc.
+//
+// col5
+// name: REMARKS
+// type: STRING
+// desc: Comments about the table
+//
+struct TGetTablesReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog or a search pattern.
+  2: optional TPatternOrIdentifier catalogName
+
+  // Name of the schema or a search pattern.
+  3: optional TPatternOrIdentifier schemaName
+
+  // Name of the table or a search pattern.
+  4: optional TPatternOrIdentifier tableName
+
+  // List of table types to match
+  // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY",
+  // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc.
+  5: optional list<string> tableTypes
+}
+
+struct TGetTablesResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTableTypes()
+//
+// Returns the table types available in this database.
+// The results are ordered by table type.
+//
+// col1
+// name: TABLE_TYPE
+// type: STRING
+// desc: Table type name.
+struct TGetTableTypesReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetTableTypesResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetColumns()
+//
+// Returns a list of columns in the specified tables.
+// The information is returned as a result set which can be fetched
+// using the OperationHandle provided in the response.
+// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME,
+// and ORDINAL_POSITION.
+//
+// Result Set Columns are the same as those for the ODBC CLIColumns
+// function.
+//
+struct TGetColumnsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog. Must not contain a search pattern.
+  2: optional TIdentifier catalogName
+
+  // Schema name or search pattern
+  3: optional TPatternOrIdentifier schemaName
+
+  // Table name or search pattern
+  4: optional TPatternOrIdentifier tableName
+
+  // Column name or search pattern
+  5: optional TPatternOrIdentifier columnName
+}
+
+struct TGetColumnsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetFunctions()
+//
+// Returns a list of functions supported by the data source. The
+// behavior of this function matches
+// java.sql.DatabaseMetaData.getFunctions() both in terms of
+// inputs and outputs.
+//
+// Result Set Columns:
+//
+// col1
+// name: FUNCTION_CAT
+// type: STRING
+// desc: Function catalog (may be null)
+//
+// col2
+// name: FUNCTION_SCHEM
+// type: STRING
+// desc: Function schema (may be null)
+//
+// col3
+// name: FUNCTION_NAME
+// type: STRING
+// desc: Function name. This is the name used to invoke the function.
+//
+// col4
+// name: REMARKS
+// type: STRING
+// desc: Explanatory comment on the function.
+//
+// col5
+// name: FUNCTION_TYPE
+// type: SMALLINT
+// desc: Kind of function. One of:
+//       * functionResultUnknown - Cannot determine if a return value or a table
+//                                 will be returned.
+//       * functionNoTable       - Does not a return a table.
+//       * functionReturnsTable  - Returns a table.
+//
+// col6
+// name: SPECIFIC_NAME
+// type: STRING
+// desc: The name which uniquely identifies this function within its schema.
+//       In this case this is the fully qualified class name of the class
+//       that implements this function.
+//
+struct TGetFunctionsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // A catalog name; must match the catalog name as it is stored in the
+  // database; "" retrieves those without a catalog; null means
+  // that the catalog name should not be used to narrow the search.
+  2: optional TIdentifier catalogName
+
+  // A schema name pattern; must match the schema name as it is stored
+  // in the database; "" retrieves those without a schema; null means
+  // that the schema name should not be used to narrow the search.
+  3: optional TPatternOrIdentifier schemaName
+
+  // A function name pattern; must match the function name as it is stored
+  // in the database.
+  4: required TPatternOrIdentifier functionName
+}
+
+struct TGetFunctionsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetOperationStatus()
+//
+// Get the status of an operation running on the server.
+struct TGetOperationStatusReq {
+  // Session to run this request against
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetOperationStatusResp {
+  1: required TStatus status
+  2: optional TOperationState operationState
+}
+
+
+// CancelOperation()
+//
+// Cancels processing on the specified operation handle and
+// frees any resources which were allocated.
+struct TCancelOperationReq {
+  // Operation to cancel
+  1: required TOperationHandle operationHandle
+}
+
+struct TCancelOperationResp {
+  1: required TStatus status
+}
+
+
+// CloseOperation()
+//
+// Given an operation in the FINISHED, CANCELED,
+// or ERROR states, CloseOperation() will free
+// all of the resources which were allocated on
+// the server to service the operation.
+struct TCloseOperationReq {
+  1: required TOperationHandle operationHandle
+}
+
+struct TCloseOperationResp {
+  1: required TStatus status
+}
+
+
+// GetResultSetMetadata()
+//
+// Retrieves schema information for the specified operation
+struct TGetResultSetMetadataReq {
+  // Operation for which to fetch result set schema information
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetResultSetMetadataResp {
+  1: required TStatus status
+  2: optional TTableSchema schema
+}
+
+
+enum TFetchOrientation {
+  // Get the next rowset. The fetch offset is ignored.
+  FETCH_NEXT,
+
+  // Get the previous rowset. The fetch offset is ignored.
+  // NOT SUPPORTED
+  FETCH_PRIOR,
+
+  // Return the rowset at the given fetch offset relative
+  // to the curren rowset.
+  // NOT SUPPORTED
+  FETCH_RELATIVE,
+
+  // Return the rowset at the specified fetch offset.
+  // NOT SUPPORTED
+  FETCH_ABSOLUTE,
+
+  // Get the first rowset in the result set.
+  FETCH_FIRST,
+
+  // Get the last rowset in the result set.
+  // NOT SUPPORTED
+  FETCH_LAST
+}
+
+// FetchResults()
+//
+// Fetch rows from the server corresponding to
+// a particular OperationHandle.
+struct TFetchResultsReq {
+  // Operation from which to fetch results.
+  1: required TOperationHandle operationHandle
+
+  // The fetch orientation. For V1 this must be either
+  // FETCH_NEXT or FETCH_FIRST. Defaults to FETCH_NEXT.
+  2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
+
+  // Max number of rows that should be returned in
+  // the rowset.
+  3: required i64 maxRows
+}
+
+struct TFetchResultsResp {
+  1: required TStatus status
+
+  // TRUE if there are more rows left to fetch from the server.
+  2: optional bool hasMoreRows
+
+  // The rowset. This is optional so that we have the
+  // option in the future of adding alternate formats for
+  // representing result set data, e.g. delimited strings,
+  // binary encoded, etc.
+  3: optional TRowSet results
+}
+
+// GetLog()
+//
+// Fetch operation log from the server corresponding to
+// a particular OperationHandle.
+struct TGetLogReq {
+  // Operation whose log is requested
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetLogResp {
+  1: required TStatus status
+
+  2: required string log
+}
+
+service TCLIService {
+
+  TOpenSessionResp OpenSession(1:TOpenSessionReq req);
+
+  TCloseSessionResp CloseSession(1:TCloseSessionReq req);
+
+  TGetInfoResp GetInfo(1:TGetInfoReq req);
+
+  TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req);
+
+  TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req);
+
+  TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req);
+
+  TGetSchemasResp GetSchemas(1:TGetSchemasReq req);
+
+  TGetTablesResp GetTables(1:TGetTablesReq req);
+
+  TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req);
+
+  TGetColumnsResp GetColumns(1:TGetColumnsReq req);
+
+  TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req);
+
+  TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req);
+
+  TCancelOperationResp CancelOperation(1:TCancelOperationReq req);
+
+  TCloseOperationResp CloseOperation(1:TCloseOperationReq req);
+
+  TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req);
+
+  TFetchResultsResp FetchResults(1:TFetchResultsReq req);
+
+  TGetLogResp GetLog(1:TGetLogReq req);
+}
diff --git a/herringbone-impala/src/main/thrift/fb303.thrift b/herringbone-impala/src/main/thrift/fb303.thrift
new file mode 100644
index 0000000..6438092
--- /dev/null
+++ b/herringbone-impala/src/main/thrift/fb303.thrift
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * fb303.thrift
+ */
+
+namespace java com.facebook.fb303
+namespace cpp facebook.fb303
+namespace rb Impala.Protocol.fb303
+
+/**
+ * Common status reporting mechanism across all services
+ */
+enum fb_status {
+  DEAD = 0,
+  STARTING = 1,
+  ALIVE = 2,
+  STOPPING = 3,
+  STOPPED = 4,
+  WARNING = 5,
+}
+
+/**
+ * Standard base service
+ */
+service FacebookService {
+
+  /**
+   * Returns a descriptive name of the service
+   */
+  string getName(),
+
+  /**
+   * Returns the version of the service
+   */
+  string getVersion(),
+
+  /**
+   * Gets the status of this service
+   */
+  fb_status getStatus(),
+
+  /**
+   * User friendly description of status, such as why the service is in
+   * the dead or warning state, or what is being started or stopped.
+   */
+  string getStatusDetails(),
+
+  /**
+   * Gets the counters for this service
+   */
+  map<string, i64> getCounters(),
+
+  /**
+   * Gets the value of a single counter
+   */
+  i64 getCounter(1: string key),
+
+  /**
+   * Sets an option
+   */
+  void setOption(1: string key, 2: string value),
+
+  /**
+   * Gets an option
+   */
+  string getOption(1: string key),
+
+  /**
+   * Gets all options
+   */
+  map<string, string> getOptions(),
+
+  /**
+   * Returns a CPU profile over the given time interval (client and server
+   * must agree on the profile format).
+   */
+  string getCpuProfile(1: i32 profileDurationInSec),
+
+  /**
+   * Returns the unix time that the server has been running since
+   */
+  i64 aliveSince(),
+
+  /**
+   * Tell the server to reload its configuration, reopen log files, etc
+   */
+  oneway void reinitialize(),
+
+  /**
+   * Suggest a shutdown to the server
+   */
+  oneway void shutdown(),
+
+}
diff --git a/herringbone-impala/src/main/thrift/hive_metastore.thrift b/herringbone-impala/src/main/thrift/hive_metastore.thrift
new file mode 100644
index 0000000..5e05367
--- /dev/null
+++ b/herringbone-impala/src/main/thrift/hive_metastore.thrift
@@ -0,0 +1,528 @@
+#!/usr/local/bin/thrift -java
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#
+# Thrift Service that the MetaStore is built on
+#
+
+include "fb303.thrift"
+
+namespace java org.apache.hadoop.hive.metastore.api
+namespace php metastore
+namespace cpp Apache.Hadoop.Hive
+namespace rb Impala.Protocol.HiveMetastore
+
+const string DDL_TIME = "transient_lastDdlTime"
+
+struct Version {
+  1: string version,
+  2: string comments
+}
+
+struct FieldSchema {
+  1: string name, // name of the field
+  2: string type, // type of the field. primitive types defined above, specify list<TYPE_NAME>, map<TYPE_NAME, TYPE_NAME> for lists & maps
+  3: string comment
+}
+
+struct Type {
+  1: string          name,             // one of the types in PrimitiveTypes or CollectionTypes or User defined types
+  2: optional string type1,            // object type if the name is 'list' (LIST_TYPE), key type if the name is 'map' (MAP_TYPE)
+  3: optional string type2,            // val type if the name is 'map' (MAP_TYPE)
+  //4: optional list<FieldSchema> fields // if the name is one of the user defined types
+}
+
+enum HiveObjectType {
+  GLOBAL = 1,
+  DATABASE = 2,
+  TABLE = 3,
+  PARTITION = 4,
+  COLUMN = 5,
+}
+
+enum PrincipalType {
+  USER = 1,
+  ROLE = 2,
+  GROUP = 3,
+}
+
+const string HIVE_FILTER_FIELD_OWNER = "hive_filter_field_owner__"
+const string HIVE_FILTER_FIELD_PARAMS = "hive_filter_field_params__"
+const string HIVE_FILTER_FIELD_LAST_ACCESS = "hive_filter_field_last_access__"
+
+enum PartitionEventType {
+  LOAD_DONE = 1,
+}
+
+struct HiveObjectRef{
+  1: HiveObjectType objectType,
+  2: string dbName,
+  3: string objectName,
+  4: list<string> partValues,
+  5: string columnName,
+}
+
+struct PrivilegeGrantInfo {
+  1: string privilege,
+  2: i32 createTime,
+  3: string grantor,
+  4: PrincipalType grantorType,
+  5: bool grantOption,
+}
+
+struct HiveObjectPrivilege {
+  1: HiveObjectRef  hiveObject,
+  2: string principalName,
+  3: PrincipalType principalType,
+  4: PrivilegeGrantInfo grantInfo,
+}
+
+struct PrivilegeBag {
+  1: list<HiveObjectPrivilege> privileges,
+}
+
+struct PrincipalPrivilegeSet {
+  1: map<string, list<PrivilegeGrantInfo>> userPrivileges, // user name -> privilege grant info
+  2: map<string, list<PrivilegeGrantInfo>> groupPrivileges, // group name -> privilege grant info
+  3: map<string, list<PrivilegeGrantInfo>> rolePrivileges, //role name -> privilege grant info
+}
+
+struct Role {
+  1: string roleName,
+  2: i32 createTime,
+  3: string ownerName,
+}
+
+// namespace for tables
+struct Database {
+  1: string name,
+  2: string description,
+  3: string locationUri,
+  4: map<string, string> parameters, // properties associated with the database
+  5: optional PrincipalPrivilegeSet privileges
+}
+
+// This object holds the information needed by SerDes
+struct SerDeInfo {
+  1: string name,                   // name of the serde, table name by default
+  2: string serializationLib,       // usually the class that implements the extractor & loader
+  3: map<string, string> parameters // initialization parameters
+}
+
+// sort order of a column (column name along with asc(1)/desc(0))
+struct Order {
+  1: string col,  // sort column name
+  2: i32    order // asc(1) or desc(0)
+}
+
+// this object holds all the information about physical storage of the data belonging to a table
+struct StorageDescriptor {
+  1: list<FieldSchema> cols,  // required (refer to types defined above)
+  2: string location,         // defaults to <warehouse loc>/<db loc>/tablename
+  3: string inputFormat,      // SequenceFileInputFormat (binary) or TextInputFormat`  or custom format
+  4: string outputFormat,     // SequenceFileOutputFormat (binary) or IgnoreKeyTextOutputFormat or custom format
+  5: bool   compressed,       // compressed or not
+  6: i32    numBuckets,       // this must be specified if there are any dimension columns
+  7: SerDeInfo    serdeInfo,  // serialization and deserialization information
+  8: list<string> bucketCols, // reducer grouping columns and clustering columns and bucketing columns`
+  9: list<Order>  sortCols,   // sort order of the data in each bucket
+  10: map<string, string> parameters // any user supplied key value hash
+}
+
+// table information
+struct Table {
+  1: string tableName,                // name of the table
+  2: string dbName,                   // database name ('default')
+  3: string owner,                    // owner of this table
+  4: i32    createTime,               // creation time of the table
+  5: i32    lastAccessTime,           // last access time (usually this will be filled from HDFS and shouldn't be relied on)
+  6: i32    retention,                // retention time
+  7: StorageDescriptor sd,            // storage descriptor of the table
+  8: list<FieldSchema> partitionKeys, // partition keys of the table. only primitive types are supported
+  9: map<string, string> parameters,   // to store comments or any other user level parameters
+  10: string viewOriginalText,         // original view text, null for non-view
+  11: string viewExpandedText,         // expanded view text, null for non-view
+  12: string tableType,                 // table type enum, e.g. EXTERNAL_TABLE
+  13: optional PrincipalPrivilegeSet privileges,
+}
+
+struct Partition {
+  1: list<string> values // string value is converted to appropriate partition key type
+  2: string       dbName,
+  3: string       tableName,
+  4: i32          createTime,
+  5: i32          lastAccessTime,
+  6: StorageDescriptor   sd,
+  7: map<string, string> parameters,
+  8: optional PrincipalPrivilegeSet privileges
+}
+
+struct Index {
+  1: string       indexName, // unique with in the whole database namespace
+  2: string       indexHandlerClass, // reserved
+  3: string       dbName,
+  4: string       origTableName,
+  5: i32          createTime,
+  6: i32          lastAccessTime,
+  7: string       indexTableName,
+  8: StorageDescriptor   sd,
+  9: map<string, string> parameters,
+  10: bool         deferredRebuild
+}
+
+// schema of the table/query results etc.
+struct Schema {
+ // column names, types, comments
+ 1: list<FieldSchema> fieldSchemas,  // delimiters etc
+ 2: map<string, string> properties
+}
+
+// Key-value store to be used with selected
+// Metastore APIs (create, alter methods).
+// The client can pass environment properties / configs that can be
+// accessed in hooks.
+struct EnvironmentContext {
+  1: map<string, string> properties
+}
+
+exception MetaException {
+  1: string message
+}
+
+exception UnknownTableException {
+  1: string message
+}
+
+exception UnknownDBException {
+  1: string message
+}
+
+exception AlreadyExistsException {
+  1: string message
+}
+
+exception InvalidPartitionException {
+  1: string message
+}
+
+exception UnknownPartitionException {
+  1: string message
+}
+
+exception InvalidObjectException {
+  1: string message
+}
+
+exception NoSuchObjectException {
+  1: string message
+}
+
+exception IndexAlreadyExistsException {
+  1: string message
+}
+
+exception InvalidOperationException {
+  1: string message
+}
+
+exception ConfigValSecurityException {
+  1: string message
+}
+
+/**
+* This interface is live.
+*/
+service ThriftHiveMetastore extends fb303.FacebookService
+{
+  void create_database(1:Database database) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
+  Database get_database(1:string name) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  void drop_database(1:string name, 2:bool deleteData, 3:bool cascade) throws(1:NoSuchObjectException o1, 2:InvalidOperationException o2, 3:MetaException o3)
+  list<string> get_databases(1:string pattern) throws(1:MetaException o1)
+  list<string> get_all_databases() throws(1:MetaException o1)
+  void alter_database(1:string dbname, 2:Database db) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // returns the type with given name (make seperate calls for the dependent types if needed)
+  Type get_type(1:string name)  throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  bool create_type(1:Type type) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
+  bool drop_type(1:string type) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  map<string, Type> get_type_all(1:string name)
+                                throws(1:MetaException o2)
+
+  // Gets a list of FieldSchemas describing the columns of a particular table
+  list<FieldSchema> get_fields(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3),
+
+  // Gets a list of FieldSchemas describing both the columns and the partition keys of a particular table
+  list<FieldSchema> get_schema(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3)
+
+  // create a Hive table. Following fields must be set
+  // tableName
+  // database        (only 'default' for now until Hive QL supports databases)
+  // owner           (not needed, but good to have for tracking purposes)
+  // sd.cols         (list of field schemas)
+  // sd.inputFormat  (SequenceFileInputFormat (binary like falcon tables or u_full) or TextInputFormat)
+  // sd.outputFormat (SequenceFileInputFormat (binary) or TextInputFormat)
+  // sd.serdeInfo.serializationLib (SerDe class name eg org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe
+  // * See notes on DDL_TIME
+  void create_table(1:Table tbl) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:NoSuchObjectException o4)
+  void create_table_with_environment_context(1:Table tbl,
+      2:EnvironmentContext environment_context)
+      throws (1:AlreadyExistsException o1,
+              2:InvalidObjectException o2, 3:MetaException o3,
+              4:NoSuchObjectException o4)
+  // drops the table and all the partitions associated with it if the table has partitions
+  // delete data (including partitions) if deleteData is set to true
+  void drop_table(1:string dbname, 2:string name, 3:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o3)
+  list<string> get_tables(1: string db_name, 2: string pattern) throws (1: MetaException o1)
+  list<string> get_all_tables(1: string db_name) throws (1: MetaException o1)
+
+  Table get_table(1:string dbname, 2:string tbl_name)
+                       throws (1:MetaException o1, 2:NoSuchObjectException o2)
+  list<Table> get_table_objects_by_name(1:string dbname, 2:list<string> tbl_names)
+				   throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
+
+  // Get a list of table names that match a filter.
+  // The filter operators are LIKE, <, <=, >, >=, =, <>
+  //
+  // In the filter statement, values interpreted as strings must be enclosed in quotes,
+  // while values interpreted as integers should not be.  Strings and integers are the only
+  // supported value types.
+  //
+  // The currently supported key names in the filter are:
+  // Constants.HIVE_FILTER_FIELD_OWNER, which filters on the tables' owner's name
+  //   and supports all filter operators
+  // Constants.HIVE_FILTER_FIELD_LAST_ACCESS, which filters on the last access times
+  //   and supports all filter operators except LIKE
+  // Constants.HIVE_FILTER_FIELD_PARAMS, which filters on the tables' parameter keys and values
+  //   and only supports the filter operators = and <>.
+  //   Append the parameter key name to HIVE_FILTER_FIELD_PARAMS in the filter statement.
+  //   For example, to filter on parameter keys called "retention", the key name in the filter
+  //   statement should be Constants.HIVE_FILTER_FIELD_PARAMS + "retention"
+  //   Also, = and <> only work for keys that exist
+  //   in the tables. E.g., if you are looking for tables where key1 <> value, it will only
+  //   look at tables that have a value for the parameter key1.
+  // Some example filter statements include:
+  // filter = Constants.HIVE_FILTER_FIELD_OWNER + " like \".*test.*\" and " +
+  //   Constants.HIVE_FILTER_FIELD_LAST_ACCESS + " = 0";
+  // filter = Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"30\" or " +
+  //   Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"90\""
+  // @param dbName
+  //          The name of the database from which you will retrieve the table names
+  // @param filterType
+  //          The type of filter
+  // @param filter
+  //          The filter string
+  // @param max_tables
+  //          The maximum number of tables returned
+  // @return  A list of table names that match the desired filter
+  list<string> get_table_names_by_filter(1:string dbname, 2:string filter, 3:i16 max_tables=-1)
+                       throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
+
+  // alter table applies to only future partitions not for existing partitions
+  // * See notes on DDL_TIME
+  void alter_table(1:string dbname, 2:string tbl_name, 3:Table new_tbl)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+  void alter_table_with_environment_context(1:string dbname, 2:string tbl_name,
+      3:Table new_tbl, 4:EnvironmentContext environment_context)
+      throws (1:InvalidOperationException o1, 2:MetaException o2)
+  // the following applies to only tables that have partitions
+  // * See notes on DDL_TIME
+  Partition add_partition(1:Partition new_part)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition add_partition_with_environment_context(1:Partition new_part,
+      2:EnvironmentContext environment_context)
+      throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2,
+      3:MetaException o3)
+  i32 add_partitions(1:list<Partition> new_parts)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition append_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
+                       throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition append_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name)
+                       throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  bool drop_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  bool drop_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  Partition get_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  Partition get_partition_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals,
+      4: string user_name, 5: list<string> group_names) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  Partition get_partition_by_name(1:string db_name 2:string tbl_name, 3:string part_name)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // returns all the partitions for this table in reverse chronological order.
+  // If max parts is given then it will return only that many.
+  list<Partition> get_partitions(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  list<Partition> get_partitions_with_auth(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1,
+     4: string user_name, 5: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  list<string> get_partition_names(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
+                       throws(1:MetaException o2)
+
+  // get_partition*_ps methods allow filtering by a partial partition specification,
+  // as needed for dynamic partitions. The values that are not restricted should
+  // be empty strings. Nulls were considered (instead of "") but caused errors in
+  // generated Python code. The size of part_vals may be smaller than the
+  // number of partition columns - the unspecified values are considered the same
+  // as "".
+  list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name
+  	3:list<string> part_vals, 4:i16 max_parts=-1)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1,
+     5: string user_name, 6: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  list<string> get_partition_names_ps(1:string db_name,
+  	2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1)
+  	                   throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // get the partitions matching the given partition filter
+  list<Partition> get_partitions_by_filter(1:string db_name 2:string tbl_name
+    3:string filter, 4:i16 max_parts=-1)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // get partitions give a list of partition names
+  list<Partition> get_partitions_by_names(1:string db_name 2:string tbl_name 3:list<string> names)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // changes the partition to the new partition object. partition is identified from the part values
+  // in the new_part
+  // * See notes on DDL_TIME
+  void alter_partition(1:string db_name, 2:string tbl_name, 3:Partition new_part)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  void alter_partition_with_environment_context(1:string db_name,
+      2:string tbl_name, 3:Partition new_part,
+      4:EnvironmentContext environment_context)
+      throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  // rename the old partition to the new partition object by changing old part values to the part values
+  // in the new_part. old partition is identified from part_vals.
+  // partition keys in new_part should be the same as those in old partition.
+  void rename_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:Partition new_part)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  // gets the value of the configuration key in the metastore server. returns
+  // defaultValue if the key does not exist. if the configuration key does not
+  // begin with "hive", "mapred", or "hdfs", a ConfigValSecurityException is
+  // thrown.
+  string get_config_value(1:string name, 2:string defaultValue)
+                          throws(1:ConfigValSecurityException o1)
+
+  // converts a partition name into a partition values array
+  list<string> partition_name_to_vals(1: string part_name)
+                          throws(1: MetaException o1)
+  // converts a partition name into a partition specification (a mapping from
+  // the partition cols to the values)
+  map<string, string> partition_name_to_spec(1: string part_name)
+                          throws(1: MetaException o1)
+
+  void markPartitionForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
+                  4:PartitionEventType eventType) throws (1: MetaException o1, 2: NoSuchObjectException o2,
+                  3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
+                  6: InvalidPartitionException o6)
+  bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
+                  4: PartitionEventType eventType) throws (1: MetaException o1, 2:NoSuchObjectException o2,
+                  3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
+                  6: InvalidPartitionException o6)
+
+  //index
+  Index add_index(1:Index new_index, 2: Table index_table)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  void alter_index(1:string dbname, 2:string base_tbl_name, 3:string idx_name, 4:Index new_idx)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+  bool drop_index_by_name(1:string db_name, 2:string tbl_name, 3:string index_name, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  Index get_index_by_name(1:string db_name 2:string tbl_name, 3:string index_name)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  list<Index> get_indexes(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  list<string> get_index_names(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
+                       throws(1:MetaException o2)
+
+  //authorization privileges
+
+  bool create_role(1:Role role) throws(1:MetaException o1)
+  bool drop_role(1:string role_name) throws(1:MetaException o1)
+  list<string> get_role_names() throws(1:MetaException o1)
+  bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type,
+    4:string grantor, 5:PrincipalType grantorType, 6:bool grant_option) throws(1:MetaException o1)
+  bool revoke_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type)
+                        throws(1:MetaException o1)
+  list<Role> list_roles(1:string principal_name, 2:PrincipalType principal_type) throws(1:MetaException o1)
+
+  PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string user_name,
+    3: list<string> group_names) throws(1:MetaException o1)
+  list<HiveObjectPrivilege> list_privileges(1:string principal_name, 2:PrincipalType principal_type,
+    3: HiveObjectRef hiveObject) throws(1:MetaException o1)
+
+  bool grant_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
+  bool revoke_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
+
+  // this is used by metastore client to send UGI information to metastore server immediately
+  // after setting up a connection.
+  list<string> set_ugi(1:string user_name, 2:list<string> group_names) throws (1:MetaException o1)
+
+  //Authentication (delegation token) interfaces
+
+  // get metastore server delegation token for use from the map/reduce tasks to authenticate
+  // to metastore server
+  string get_delegation_token(1:string token_owner, 2:string renewer_kerberos_principal_name)
+    throws (1:MetaException o1)
+
+  // method to renew delegation token obtained from metastore server
+  i64 renew_delegation_token(1:string token_str_form) throws (1:MetaException o1)
+
+  // method to cancel delegation token obtained from metastore server
+  void cancel_delegation_token(1:string token_str_form) throws (1:MetaException o1)
+}
+
+// * Note about the DDL_TIME: When creating or altering a table or a partition,
+// if the DDL_TIME is not set, the current time will be used.
+
+// For storing info about archived partitions in parameters
+
+// Whether the partition is archived
+const string IS_ARCHIVED = "is_archived",
+// The original location of the partition, before archiving. After archiving,
+// this directory will contain the archive. When the partition
+// is dropped, this directory will be deleted
+const string ORIGINAL_LOCATION = "original_location",
+
+// these should be needed only for backward compatibility with filestore
+const string META_TABLE_COLUMNS   = "columns",
+const string META_TABLE_COLUMN_TYPES   = "columns.types",
+const string BUCKET_FIELD_NAME    = "bucket_field_name",
+const string BUCKET_COUNT         = "bucket_count",
+const string FIELD_TO_DIMENSION   = "field_to_dimension",
+const string META_TABLE_NAME      = "name",
+const string META_TABLE_DB        = "db",
+const string META_TABLE_LOCATION  = "location",
+const string META_TABLE_SERDE     = "serde",
+const string META_TABLE_PARTITION_COLUMNS = "partition_columns",
+const string FILE_INPUT_FORMAT    = "file.inputformat",
+const string FILE_OUTPUT_FORMAT   = "file.outputformat",
+const string META_TABLE_STORAGE   = "storage_handler",
+
+
+
diff --git a/herringbone-main/pom.xml b/herringbone-main/pom.xml
new file mode 100644
index 0000000..08a54ab
--- /dev/null
+++ b/herringbone-main/pom.xml
@@ -0,0 +1,168 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>com.stripe</groupId>
+  <artifactId>herringbone-main</artifactId>
+  <version>0.0.1</version>
+  <packaging>jar</packaging>
+
+  <name>Herringbone Main</name>
+
+  <pluginRepositories>
+    <pluginRepository>
+      <id>dtrott</id>
+      <url>http://maven.davidtrott.com/repository</url>
+    </pluginRepository>
+  </pluginRepositories>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+        <version>1.0-M2</version>
+        <configuration>
+          <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+          <junitxml>.</junitxml>
+          <filereports>WDF TestSuite.txt</filereports>
+          <htmlreporters>${project.build.directory}/html/scalatest</htmlreporters>
+          <testFailureIgnore>false</testFailureIgnore>
+        </configuration>
+        <executions>
+          <execution>
+            <id>test</id>
+            <goals>
+              <goal>test</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.1</version>
+        <configuration>
+          <source>1.6</source>
+          <target>1.6</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>2.3.1</version>
+      </plugin>
+
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <version>2.4.3</version>
+      </plugin>
+
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+        <version>3.1.6</version>
+        <configuration>
+          <recompileMode>incremental</recompileMode>
+          <useZincServer>true</useZincServer>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>compile</goal>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>2.3</version>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <outputFile>target/herringbone-${project.version}-jar-with-dependencies.jar</outputFile>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <properties>
+    <parquet.version>1.6.0rc4</parquet.version>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <scala.version>2.10.4</scala.version>
+    <maven.compiler.source>1.7</maven.compiler.source>
+    <maven.compiler.target>1.7</maven.compiler.target>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>parquet-common</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>parquet-encoding</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>parquet-column</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>parquet-hadoop</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <version>2.5.2</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-jdbc</artifactId>
+      <version>0.14.0</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.twitter</groupId>
+          <artifactId>parquet-hadoop-bundle</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.rogach</groupId>
+      <artifactId>scallop_2.10</artifactId>
+      <version>0.9.5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>jline</artifactId>
+      <version>2.9.0-1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_2.10</artifactId>
+      <version>2.0</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalamock</groupId>
+      <artifactId>scalamock-scalatest-support_2.10</artifactId>
+      <version>3.1.RC1</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/CompactInputFormat.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/CompactInputFormat.scala
new file mode 100644
index 0000000..c9f1628
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/CompactInputFormat.scala
@@ -0,0 +1,168 @@
+package com.stripe.herringbone
+
+import java.util.{List => JavaList}
+import java.io.DataOutput
+import java.io.DataInput
+
+import scala.collection.mutable.MutableList
+import scala.collection.JavaConverters._
+import scala.collection.JavaConversions._
+
+import org.apache.hadoop.io.Writable
+import org.apache.hadoop.mapreduce.{InputSplit,Job,JobContext,Mapper,TaskAttemptContext}
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
+import parquet.hadoop.api.ReadSupport
+import parquet.hadoop.{ParquetInputFormat,ParquetInputSplit,ParquetOutputFormat,ParquetRecordReader}
+import parquet.hadoop.example.{ExampleOutputFormat,GroupReadSupport}
+import parquet.hadoop.util.ContextUtil
+import parquet.example.data.{Group,GroupWriter}
+import parquet.example.data.simple.SimpleGroup
+
+
+class CompactInputFormat[T](readSupportClass: Class[_ <: ReadSupport[T]]) extends ParquetInputFormat[T](readSupportClass) {
+
+  // We can't accurately predict the size of the resulting merged file, so aim
+  // for 900MB. Our HDFS block size is 1024MB so we'll get pretty close.
+  val TARGET = 1024 * 1024 * 900 // 900MB.
+
+  override def getSplits(context: JobContext): JavaList[InputSplit] = {
+    // Limit the splits to 20MB so it's easy to assemble them into 900MB chunks.
+    // This is not actually reliable. Chunks can come back bigger than 20MB, but
+    // it does limit the size of most chunks.
+    val conf = ContextUtil.getConfiguration(context)
+    conf.set("mapred.max.split.size", (20 * 1024 * 1024).toString)
+
+    val splits = super.getSplits(conf, getFooters(context)).asScala.toList
+    val m = if (splits.isEmpty) splits else mergeSplits(splits)
+    m.asInstanceOf[List[InputSplit]].asJava
+  }
+
+  def mergeSplits(splits: List[ParquetInputSplit]): List[MergedInputSplit] = {
+    val sizes = splits.map { _.getLength }
+    println(s"""${splits.length} initial splits were generated.
+                |  Max: ${mb(sizes.max)}
+                |  Min: ${mb(sizes.min)}
+                |  Avg: ${mb(sizes.sum.toDouble / sizes.length)}""".stripMargin)
+
+    // TODO: get a CS undergrad to give us better bin packing.
+    var buckets = MutableList[MutableList[ParquetInputSplit]](MutableList(splits.head))
+    splits.tail.foreach { split =>
+      val bucket = buckets.minBy { b => b.map { _.getLength }.sum }
+      if ((split.getLength + bucket.map { _.getLength }.sum) < TARGET) {
+        bucket += split
+      } else {
+        buckets += MutableList(split)
+      }
+    }
+
+    val newSizes = buckets.map { _.map { _.getLength }.sum }.toList
+    println(s"""${buckets.length} merged splits were generated.
+                |  Max: ${mb(newSizes.max)}
+                |  Min: ${mb(newSizes.min)}
+                |  Avg: ${mb(newSizes.sum.toDouble / newSizes.length)}""".stripMargin)
+
+    buckets.map { b => new MergedInputSplit(b.toList) }.toList
+  }
+
+  override def createRecordReader(split: InputSplit, context: TaskAttemptContext): MergedRecordReader[T] = {
+    val readSupport = getReadSupport(ContextUtil.getConfiguration(context))
+    split match {
+      case s: MergedInputSplit => new MergedRecordReader[T](s, context, readSupport)
+      case _ => throw new Exception(s"Expected a MergedInputSplit. Found a $split.")
+    }
+  }
+
+  // Helper for pretty-printing byte values.
+  def mb(n: Double): String = {
+    val K = 1024
+    val M = K * K
+    val G = K * M
+    if (n < K) f"$n%.2fB"
+    else if (n < M) f"${n / K}%.2fK"
+    else if (n < G) f"${n / M}%.2fM"
+    else f"${n / G}%.2fG"
+  }
+}
+
+class MergedInputSplit(var splits: List[ParquetInputSplit]) extends InputSplit with Writable {
+  def this() = this(List())
+
+  var splitNumber = 0
+
+  def currentSplit: ParquetInputSplit = splits(splitNumber)
+  def nextSplit: Option[ParquetInputSplit] = {
+    if (splitNumber < splits.length - 1) {
+      splitNumber += 1
+      Some(currentSplit)
+    } else {
+      None
+    }
+  }
+
+  // write and readFields are paired for serialization/deserialization.
+  override def write(out: DataOutput) = {
+    out.writeInt(splits.length)
+    splits.foreach { s => s.write(out) }
+  }
+
+  override def readFields(in: DataInput) = {
+    val count = in.readInt
+    splits = for (i <- List.range(0, count)) yield {
+      val s = new ParquetInputSplit
+      s.readFields(in)
+      s
+    }
+  }
+
+  override def getLength: Long = splits.map { _.getLength }.sum
+  override def getLocations: Array[String] = splits.flatMap { _.getLocations }.toArray
+  override def toString = "<MergedInputSplit splits:" + this.splits.length + ">"
+}
+
+class MergedRecordReader[T](split: MergedInputSplit,
+                            taskContext: TaskAttemptContext,
+                            readSupport: ReadSupport[T]) extends ParquetRecordReader[T](readSupport) {
+  val totalLength = split.getLength
+  var progress = 0L
+
+  override def initialize(split: InputSplit, context: TaskAttemptContext) {
+    super.initialize(split.asInstanceOf[MergedInputSplit].currentSplit, context)
+  }
+
+  def startNextSplit(split: MergedInputSplit, context: TaskAttemptContext): Boolean = {
+    split.nextSplit match {
+      case Some(s) => {
+        super.initialize(s, context)
+        true
+      }
+      case None => false
+    }
+  }
+
+  // nextKeyValue is used to ask for the next tuple and returns false when the
+  // recordReader has no more tuples. Since we're wrapping multiple splits, and
+  // therefore multiple record readers, we detect when the current inernal
+  // reader is done and move to the next reader.
+  override def nextKeyValue: Boolean = {
+    val next = super.nextKeyValue
+    if (next) {
+      next
+    } else {
+      super.close
+      progress += split.currentSplit.getLength
+
+      if (startNextSplit(split, taskContext)) {
+        nextKeyValue
+      } else {
+        false
+      }
+    }
+  }
+
+  override def toString = "<MergedRecordReader>"
+  override def getProgress: Float = progress / totalLength
+}
+
+
+class CompactGroupInputFormat extends CompactInputFormat[Group](classOf[GroupReadSupport]) { }
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/CompactJob.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/CompactJob.scala
new file mode 100644
index 0000000..ba690f7
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/CompactJob.scala
@@ -0,0 +1,98 @@
+package com.stripe.herringbone
+
+import com.stripe.herringbone.util.ParquetUtils
+
+import java.util.{List => JavaList}
+import java.io.DataOutput
+import java.io.DataInput
+
+import scala.collection.mutable.MutableList
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.{Configuration,Configured}
+import org.apache.hadoop.fs.{FileSystem,Path}
+import org.apache.hadoop.mapreduce.{Job,Mapper}
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
+import org.apache.hadoop.util.{Tool,ToolRunner}
+
+import org.codehaus.jackson.map.ObjectMapper
+import org.codehaus.jackson.`type`.TypeReference
+
+import org.rogach.scallop.ScallopConf
+
+import parquet.example.data.{Group,GroupWriter}
+import parquet.hadoop.{BadConfigurationException,ParquetOutputFormat}
+import parquet.hadoop.api.{DelegatingWriteSupport,WriteSupport}
+import parquet.hadoop.api.WriteSupport.FinalizedWriteContext
+import parquet.hadoop.example.GroupWriteSupport
+
+class ParquetCompactConf(arguments: Seq[String]) extends ScallopConf(arguments) {
+  val inputPath = opt[String](required = true)
+  val outputPath = opt[String](required = true)
+}
+
+class ParquetCompactWriteSupport extends DelegatingWriteSupport[Group](new GroupWriteSupport) {
+  var extraMetadata: java.util.Map[String, String] = _
+
+  override def init(configuration: Configuration): WriteSupport.WriteContext = {
+    extractMetadata(configuration)
+    super.init(configuration)
+  }
+
+  override def finalizeWrite(): FinalizedWriteContext = {
+    new FinalizedWriteContext(extraMetadata)
+  }
+
+  def extractMetadata(configuration: Configuration) = {
+    val metadataJson = configuration.get(ParquetCompactWriteSupport.ExtraMetadataKey)
+    try {
+      extraMetadata = new ObjectMapper().readValue(metadataJson, new TypeReference[java.util.Map[String,String]](){})
+    } catch { case e: java.io.IOException =>
+      throw new BadConfigurationException("Unable to deserialize extra extra metadata: " + metadataJson, e)
+    }
+  }
+}
+
+object ParquetCompactWriteSupport {
+  val ExtraMetadataKey = "herringbone.compact.extrametadata"
+}
+
+class CompactJob extends Configured with Tool {
+  override def run(arguments: Array[String]) = {
+    val args = new ParquetCompactConf(arguments)
+    val fs = FileSystem.get(getConf)
+    val inputPath = new Path(args.inputPath())
+    val outputPath = new Path(args.outputPath())
+
+    // Pass along metadata (which includes the thrift schema) to the results.
+    val metadata = ParquetUtils.readKeyValueMetaData(inputPath, fs)
+    val metadataJson = new ObjectMapper().writeValueAsString(metadata)
+    getConf.set(ParquetCompactWriteSupport.ExtraMetadataKey, metadataJson)
+
+    val job = new Job(getConf)
+
+    FileInputFormat.setInputPaths(job, inputPath)
+    FileOutputFormat.setOutputPath(job, outputPath)
+    ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetCompactWriteSupport])
+    GroupWriteSupport.setSchema(ParquetUtils.readSchema(inputPath, fs), job.getConfiguration)
+
+    job.setJobName("compact " + args.inputPath() + " → " + args.outputPath())
+    job.setInputFormatClass(classOf[CompactGroupInputFormat]);
+    job.setOutputFormatClass(classOf[ParquetOutputFormat[Group]])
+    job.setMapperClass(classOf[Mapper[Void,Group,Void,Group]])
+    job.setJarByClass(classOf[CompactJob])
+    job.getConfiguration.set("mapreduce.job.user.classpath.first", "true")
+    job.setNumReduceTasks(0)
+
+    if(job.waitForCompletion(true)) 0 else 1
+  }
+}
+
+object CompactJob {
+
+  def main(args: Array[String]) = {
+    val result = ToolRunner.run(new Configuration, new CompactJob, args)
+    System.exit(result)
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/FlattenJob.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/FlattenJob.scala
new file mode 100644
index 0000000..5d78de1
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/FlattenJob.scala
@@ -0,0 +1,78 @@
+package com.stripe.herringbone
+
+import com.stripe.herringbone.flatten.{ParquetFlatConf,ParquetFlatMapper,TypeFlattener}
+import com.stripe.herringbone.flatten.FlatConverter
+import com.stripe.herringbone.util.ParquetUtils
+
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input._
+import org.apache.hadoop.mapreduce.lib.output._
+import org.apache.hadoop.util._
+import org.apache.hadoop.fs._
+import org.apache.hadoop.conf._
+
+import parquet.example.data._
+import parquet.example.data.simple._
+import parquet.hadoop._
+import parquet.hadoop.example._
+import parquet.io.api._
+import parquet.schema._
+
+import org.rogach.scallop._
+
+class FlattenMapper extends ParquetFlatMapper[Group] {
+  def valueOut(value: Group) = {
+    FlatConverter.flattenGroup(value, flattenedSchema, separator, renameId)
+  }
+}
+
+class FlattenJob extends Configured with Tool {
+  override def run(args: Array[String]) = {
+    val conf = new ParquetFlatConf(args)
+    val fs = FileSystem.get(getConf)
+    val inputPath = new Path(conf.inputPath())
+    val outputPath = new Path(conf.outputPath())
+    val previousPath = conf.previousPath.get.map{new Path(_)}
+
+    val separator = conf.separator()
+    getConf.set(ParquetFlatMapper.SeparatorKey, separator)
+
+    val renameId = conf.renameId()
+    getConf.set(ParquetFlatMapper.RenameIdKey, renameId.toString)
+
+    if (fs.exists(outputPath)) {
+      println(s"Deleting existing $outputPath")
+      fs.delete(outputPath, true)
+    }
+
+    val flattenedSchema = TypeFlattener.flatten(
+      ParquetUtils.readSchema(inputPath, fs),
+      previousPath.map { ParquetUtils.readSchema(_, fs) },
+      separator,
+      renameId
+    )
+
+    val jobName = "flatten " + conf.inputPath() + " -> " + conf.outputPath()
+    val job = new Job(getConf, jobName)
+
+    FileInputFormat.setInputPaths(job, inputPath)
+    FileOutputFormat.setOutputPath(job, outputPath)
+    ExampleOutputFormat.setSchema(job, flattenedSchema)
+
+    job.setInputFormatClass(classOf[CompactGroupInputFormat]);
+    job.setOutputFormatClass(classOf[ExampleOutputFormat])
+    job.setMapperClass(classOf[FlattenMapper])
+    job.setJarByClass(classOf[FlattenJob])
+    job.getConfiguration.set("mapreduce.job.user.classpath.first", "true")
+    job.setNumReduceTasks(0)
+
+    if (job.waitForCompletion(true)) 0 else 1
+  }
+}
+
+object FlattenJob {
+  def main(args: Array[String]) = {
+    val result = ToolRunner.run(new Configuration, new FlattenJob, args)
+    System.exit(result)
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/ParquetLoad.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/ParquetLoad.scala
new file mode 100644
index 0000000..013aa4a
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/ParquetLoad.scala
@@ -0,0 +1,45 @@
+package com.stripe.herringbone
+
+import com.stripe.herringbone.load._
+
+import org.apache.hadoop.conf._
+import org.apache.hadoop.util._
+
+class ParquetLoad extends Configured with Tool {
+  override def run(args: Array[String]): Int = {
+    val conf = new ParquetLoadConf(args)
+    val hadoopFs = new HadoopFs()
+    val fieldUtils = FieldUtils(hadoopFs, ImpalaHiveSchemaTypeMapper)
+
+    val loader: ParquetLoader = if (conf.hive()) {
+      HiveLoader(conf, hadoopFs, fieldUtils)
+    } else {
+      ImpalaLoader(conf, hadoopFs, fieldUtils)
+    }
+
+    if (conf.updatePartitions()) {
+      val tableExists = loader.checkTableExists(conf.table(), conf.database())
+
+      (conf.path.get, tableExists) match {
+        case (_, true) => loader.updateTable(conf.table(), conf.database())
+        case (Some(path), false) => loader.createTable(path, conf.table(), conf.database())
+        case (None, false) => {
+          println("ERROR - path not specified and table not yet created. Specify path from which to create the table")
+          return 1
+        }
+      }
+    } else {
+      loader.createTable(conf.path(), conf.table(), conf.database())
+    }
+    loader.closeConnection
+
+    0
+  }
+}
+
+object ParquetLoad {
+  def main(args: Array[String]) = {
+    val result = ToolRunner.run(new Configuration, new ParquetLoad, args)
+    System.exit(result)
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/TsvJob.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/TsvJob.scala
new file mode 100644
index 0000000..ab61dca
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/TsvJob.scala
@@ -0,0 +1,98 @@
+package com.stripe.herringbone
+
+import com.stripe.herringbone.flatten.{ParquetFlatConf,ParquetFlatMapper,TypeFlattener}
+import com.stripe.herringbone.flatten.FlatConverter
+import com.stripe.herringbone.util.ParquetUtils
+
+import java.io.{BufferedWriter, OutputStreamWriter}
+
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input._
+import org.apache.hadoop.mapreduce.lib.output._
+import org.apache.hadoop.util._
+import org.apache.hadoop.fs._
+import org.apache.hadoop.conf._
+import org.apache.hadoop.io.Text
+
+import org.rogach.scallop._
+
+import parquet.example.data._
+import parquet.example.data.simple._
+import parquet.hadoop._
+import parquet.hadoop.example._
+import parquet.io.api._
+import parquet.schema._
+
+import scala.collection.JavaConversions._
+
+class TsvMapper extends ParquetFlatMapper[Text] {
+  def valueOut(value: Group) = {
+    val tsvLine = FlatConverter.groupToTSV(value, flattenedSchema, separator, renameId) + "\n"
+    new Text(tsvLine)
+  }
+}
+
+class TsvJob extends Configured with Tool {
+  override def run(args: Array[String]) = {
+    val conf = new ParquetFlatConf(args)
+    val fs = FileSystem.get(getConf)
+    val inputPath = new Path(conf.inputPath())
+    val outputPath = new Path(conf.outputPath())
+    val previousPath = conf.previousPath.get.map{new Path(_)}
+
+    val separator = conf.separator()
+    getConf.set(ParquetFlatMapper.SeparatorKey, separator)
+
+    val renameId = conf.renameId()
+    getConf.set(ParquetFlatMapper.RenameIdKey, renameId.toString)
+
+    if (fs.exists(outputPath)) {
+      println(s"Deleting existing $outputPath")
+      fs.delete(outputPath, true)
+    }
+
+    val flattenedSchema = TypeFlattener.flatten(
+      ParquetUtils.readSchema(inputPath, fs),
+      previousPath.map { ParquetUtils.readSchema(_, fs) },
+      separator,
+      renameId
+    )
+
+    val jobName = "tsv " + conf.inputPath() + " -> " + conf.outputPath()
+    val job = new Job(getConf, jobName)
+
+    FileInputFormat.setInputPaths(job, inputPath)
+    FileOutputFormat.setOutputPath(job, outputPath)
+    ExampleOutputFormat.setSchema(job, flattenedSchema)
+
+    job.setInputFormatClass(classOf[CompactGroupInputFormat])
+    job.setOutputFormatClass(classOf[TextOutputFormat[Text, Text]].asInstanceOf[Class[Nothing]])
+    job.setMapperClass(classOf[TsvMapper])
+    job.setJarByClass(classOf[TsvJob])
+    job.getConfiguration.set("mapreduce.job.user.classpath.first", "true")
+    job.setNumReduceTasks(0)
+
+    if (job.waitForCompletion(true)) {
+      val headerPath = new Path(conf.outputPath() + "/_header.tsv")
+      writeHeader(fs, headerPath, flattenedSchema)
+      0
+    } else {
+      1
+    }
+  }
+
+  def writeHeader(fs: FileSystem, outputPath: Path, schema: MessageType) {
+    val header = FlatConverter.constructHeader(schema)
+    val writer = new BufferedWriter(new OutputStreamWriter(fs.create(outputPath, true)))
+    writer.write(header)
+    writer.write("\n")
+    writer.close()
+  }
+}
+
+object TsvJob {
+  def main(args: Array[String]) = {
+    val result = ToolRunner.run(new Configuration, new TsvJob, args)
+    System.exit(result)
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/FlatConsumer.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/FlatConsumer.scala
new file mode 100644
index 0000000..e0f837a
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/FlatConsumer.scala
@@ -0,0 +1,108 @@
+package com.stripe.herringbone.flatten
+
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input._
+import org.apache.hadoop.mapreduce.lib.output._
+import org.apache.hadoop.util._
+import org.apache.hadoop.fs._
+import org.apache.hadoop.conf._
+
+import parquet.example.data._
+import parquet.example.data.simple._
+import parquet.hadoop._
+import parquet.hadoop.example._
+import parquet.io.api._
+import parquet.schema._
+
+class FlatConsumer(output: Group, separator: String, renameId: Boolean) extends RecordConsumer {
+
+  case class StackFrame(field: String, var values: List[String])
+  var stack = List[StackFrame]()
+  // Impala stops working after a field becomes too long. The docs
+  // indicate that we should have 32k. However, a binary search on a
+  // too-long field yielded 6776 as the maximum working value.
+  val MaxStringBytes = 6776
+
+  def startMessage {}
+  def endMessage {}
+  def startGroup {}
+  def endGroup {}
+
+  def startField(field: String, index: Int) {
+    stack ::= StackFrame(field, Nil)
+  }
+
+  def endField(field: String, index: Int) {
+    if(stack.head.values.size > 0) {
+      withField {name =>
+        val joined = Binary.fromString(
+          stack
+            .head
+            .values
+            .reverse
+            .map{_.replace("\t", " ")}
+            .mkString(","))
+        val truncated = truncate(joined, MaxStringBytes)
+        output.add(name, truncated)
+      }
+    }
+    stack = stack.tail
+  }
+
+  def addInteger(value: Int) {
+    writeField(value.toString){name => output.add(name, value)}
+  }
+
+  def addLong(value: Long) {
+    writeField(value.toString){name => output.add(name, value)}
+  }
+
+  def addBoolean(value: Boolean) {
+    writeField(value.toString){name => output.add(name, value)}
+  }
+
+  def truncate(value: Binary, length: Integer): Binary = {
+    if (value.length <= length) {
+      value
+    } else {
+      val bytesTruncated = new Array[Byte](length)
+      value.toByteBuffer.get(bytesTruncated, 0, length)
+      Binary.fromByteArray(bytesTruncated)
+    }
+  }
+
+  def addBinary(value: Binary) {
+    // Truncate strings so Impala doesn't break
+    val truncated = truncate(value, MaxStringBytes)
+    writeField(truncated.toStringUsingUTF8){name => output.add(name, truncated)}
+  }
+
+  def addFloat(value: Float) {
+    writeField(value.toString){name => output.add(name, value)}
+  }
+
+  def addDouble(value: Double) {
+    writeField(value.toString){name => output.add(name, value)}
+  }
+
+  def withField(fn: String=>Unit) {
+    val path = if (TypeFlattener.omitIdField(stack.head.field, stack.size, renameId))
+      stack.tail
+    else
+      stack
+
+    val name = path.reverse.map{_.field}.mkString(separator)
+    if(output.getType.containsField(name))
+      fn(name)
+  }
+
+  def writeField(stringRep: =>String)(fn: String => Unit) {
+    withField{name =>
+      val fieldType = output.getType.getType(name)
+      if(fieldType.asInstanceOf[PrimitiveType].getPrimitiveTypeName == PrimitiveType.PrimitiveTypeName.BINARY)
+        stack.head.values ::= stringRep
+      else
+        fn(name)
+    }
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/FlatConverter.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/FlatConverter.scala
new file mode 100644
index 0000000..51741e7
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/FlatConverter.scala
@@ -0,0 +1,54 @@
+package com.stripe.herringbone.flatten
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.conf.Configuration
+
+import parquet.example.data.Group
+import parquet.example.data.GroupWriter
+import parquet.example.data.simple.SimpleGroup
+import parquet.schema.MessageType
+
+import scala.collection.JavaConversions._
+
+object FlatConverter {
+  def groupToTSV(group: Group, flatSchema: MessageType, separator: String, renameId: Boolean): String = {
+    val flatGroup = flattenGroup(group, flatSchema, separator, renameId)
+    val fieldValues = (0 until flatSchema.getFieldCount).map{ field =>
+      val valueCount = flatGroup.getFieldRepetitionCount(field)
+      if (valueCount == 0) {
+        ""
+      } else if (valueCount == 1) {
+        escapeString(flatGroup.getValueToString(field, 0))
+      } else {
+        escapeString(flatGroup.getValueToString(field, 0))
+        System.err.println("Warning: Field contains multiple values, extracting only the first")
+        System.err.println(flatGroup.toString)
+      }
+    }
+    fieldValues.mkString("\t")
+  }
+
+  def constructHeader(schema: MessageType) = {
+    schema
+      .getPaths()
+      .toList
+      .map{_(0)}
+      .mkString("\t")
+  }
+
+  def flattenGroup(group: Group, flatSchema: MessageType, separator: String, renameId: Boolean) = {
+    var flatGroup = new SimpleGroup(flatSchema)
+    val writer = new GroupWriter(new FlatConsumer(flatGroup, separator, renameId), group.getType)
+    writer.write(group)
+    flatGroup
+  }
+
+  private def escapeString(s: String) = {
+    val quote = "\""
+    if (s.contains("\t"))
+      // This is how pandas escapes tabs and quotes
+      quote + s.replace(quote, "\"\"") + quote
+    else
+      s
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/ParquetFlatConf.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/ParquetFlatConf.scala
new file mode 100644
index 0000000..89ea49f
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/ParquetFlatConf.scala
@@ -0,0 +1,11 @@
+package com.stripe.herringbone.flatten
+
+import org.rogach.scallop._
+
+class ParquetFlatConf(arguments: Seq[String]) extends ScallopConf(arguments) {
+  val inputPath = opt[String](required = true)
+  val outputPath = opt[String](required = true)
+  val previousPath = opt[String](descr = "Path of previously generated flat output, so field ordering can be maintained (optional)")
+  val separator = opt[String](default = Some("__"))
+  val renameId = opt[Boolean](descr = "Flatten a.b.id as a__b instead of a__b__id")
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/ParquetFlatMapper.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/ParquetFlatMapper.scala
new file mode 100644
index 0000000..f6c1a03
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/ParquetFlatMapper.scala
@@ -0,0 +1,29 @@
+package com.stripe.herringbone.flatten
+
+import org.apache.hadoop.mapreduce.Mapper
+import parquet.example.data.Group
+import parquet.schema.{MessageType,MessageTypeParser}
+
+abstract class ParquetFlatMapper[ValueOut] extends Mapper[Void,Group,Void,ValueOut] {
+  var flattenedSchema: MessageType = _
+  var separator: String = _
+  var renameId: Boolean = _
+
+  override def setup(context: Mapper[Void,Group,Void,ValueOut]#Context) {
+    // the schema is stored in the job context when we call ExampleOutputFormat.setSchema
+    flattenedSchema = MessageTypeParser.parseMessageType(context.getConfiguration.get("parquet.example.schema"))
+    separator = context.getConfiguration.get(ParquetFlatMapper.SeparatorKey)
+    renameId = context.getConfiguration.get(ParquetFlatMapper.RenameIdKey) == "true"
+  }
+
+  override def map(key: Void, value: Group, context: Mapper[Void,Group,Void,ValueOut]#Context) {
+    context.write(key, valueOut(value))
+  }
+
+  def valueOut(value: Group): ValueOut
+}
+
+object ParquetFlatMapper {
+  val SeparatorKey = "herringbone.flatten.separator"
+  val RenameIdKey = "herringbone.flatten.rename.id"
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/TypeFlattener.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/TypeFlattener.scala
new file mode 100644
index 0000000..246972e
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/flatten/TypeFlattener.scala
@@ -0,0 +1,59 @@
+package com.stripe.herringbone.flatten
+
+import parquet.schema._
+import java.util.{List=>JList}
+import scala.collection.JavaConverters._
+
+class TypeFlattener(separator: String, renameId: Boolean) extends TypeConverter[List[Type]] {
+    def convertPrimitiveType(path: JList[GroupType], primitiveType: PrimitiveType) = {
+      val typeName =
+        if(TypeFlattener.isRepeated(primitiveType))
+          PrimitiveType.PrimitiveTypeName.BINARY
+        else
+          primitiveType.getPrimitiveTypeName
+
+      val types = if (TypeFlattener.omitIdField(primitiveType.getName, path.size, renameId))
+        path.asScala.tail
+      else
+        (path.asScala.tail :+ primitiveType)
+
+      val name = types.map{_.getName}.mkString(separator)
+      List(new PrimitiveType(Type.Repetition.OPTIONAL, typeName, primitiveType.getTypeLength, name))
+    }
+
+    def convertGroupType(path: JList[GroupType], groupType: GroupType, children: JList[List[Type]]) = {
+      if(TypeFlattener.isRepeated(groupType))
+        Nil
+      else
+        flatten(children)
+    }
+
+    def convertMessageType(messageType: MessageType, children: JList[List[Type]]) = flatten(children)
+
+    def flatten(children: JList[List[Type]]) = children.asScala.flatten.toList
+}
+
+object TypeFlattener {
+  def flatten(messageType: MessageType,
+    previousMessageType: Option[MessageType],
+    separator: String,
+    renameId: Boolean) = {
+    val flattened = messageType.convertWith(new TypeFlattener(separator, renameId))
+    val fieldsToUse = previousMessageType match {
+      case Some(prevMessageType) => {
+        // if passed a previous flattened schema, preserve that field ordering,
+        // and append any new fields
+        val prevFields = prevMessageType.getFields.asScala.toList
+        prevFields ::: flattened.filterNot{prevFields.contains(_)}
+      }
+      case None => flattened
+    }
+    new MessageType(messageType.getName, fieldsToUse.asJava)
+  }
+
+  def isRepeated(t: Type) = t.isRepetition(Type.Repetition.REPEATED)
+
+  def omitIdField(fieldName: String, numberOfFields: Integer, renameId: Boolean) = {
+    renameId && Seq("id", "_id").contains(fieldName) && numberOfFields > 1
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/load/FieldUtils.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/load/FieldUtils.scala
new file mode 100644
index 0000000..7599e21
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/load/FieldUtils.scala
@@ -0,0 +1,53 @@
+package com.stripe.herringbone.load
+
+import com.stripe.herringbone.util.ParquetUtils
+
+import org.apache.hadoop.fs._
+
+import parquet.schema.{ PrimitiveType, Type }
+import parquet.schema.PrimitiveType.PrimitiveTypeName
+import parquet.schema.PrimitiveType.PrimitiveTypeName._
+
+import scala.collection.JavaConversions._
+
+case class FieldUtils(hadoopFs: HadoopFs, schemaTypeMapper: SchemaTypeMapper) {
+  def findPartitionFields(path: Path) = {
+    hadoopFs.findPartitions(path).map {
+      case (name, example) if (example.forall{_.isDigit}) =>
+        "`%s` int".format(name)
+      case (name, _) =>
+        "`%s` string".format(name)
+    }
+  }
+
+  def findTableFields(path: Path) = {
+    val schema = ParquetUtils.readSchema(path, hadoopFs.fileSystem)
+    tableFieldsFromSchemaFields(schema.getFields)
+  }
+
+  def tableFieldsFromSchemaFields(fields: Seq[Type]) = {
+    fields
+      .filter { f => f.isPrimitive }
+      .map { f =>
+        "`%s` %s".format(f.getName, schemaTypeMapper.getSchemaType(f.asInstanceOf[PrimitiveType].getPrimitiveTypeName))
+      }.toList
+  }
+}
+
+trait SchemaTypeMapper {
+  def getSchemaType(pt: PrimitiveTypeName): String
+}
+
+object ImpalaHiveSchemaTypeMapper extends SchemaTypeMapper {
+  def getSchemaType(pt: PrimitiveTypeName) = {
+    pt match {
+      case BINARY => "STRING"
+      case INT32 => "INT"
+      case INT64 | INT96 => "BIGINT"
+      case DOUBLE => "DOUBLE"
+      case BOOLEAN => "BOOLEAN"
+      case FLOAT => "FLOAT"
+      case FIXED_LEN_BYTE_ARRAY => "BINARY"
+    }
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/load/HadoopFs.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/load/HadoopFs.scala
new file mode 100644
index 0000000..abda424
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/load/HadoopFs.scala
@@ -0,0 +1,39 @@
+package com.stripe.herringbone.load
+
+import com.stripe.herringbone.util.ParquetUtils
+
+import org.apache.hadoop.conf._
+import org.apache.hadoop.fs._
+import org.apache.hadoop.util._
+
+class HadoopFs {
+  lazy val fileSystem = FileSystem.get(new Configuration)
+
+  def findAbsolutePath(path: Path) = {
+    fileSystem.getFileStatus(path).getPath.toUri.getPath
+  }
+
+  def findSortedLeafPaths(path: Path): List[Path] =
+    findLeafPaths(path).sortBy{case (path,time) => time}.map{_._1}
+
+  def findLeafPaths(path: Path): List[(Path,Long)] = {
+    val parquetFileStatuses = fileSystem.listStatus(path, ParquetUtils.parquetFilter)
+    if (parquetFileStatuses.size > 0)
+      List((path, parquetFileStatuses.head.getModificationTime))
+    else {
+      fileSystem.listStatus(path, ParquetUtils.partitionFilter)
+        .toList
+        .map{_.getPath}
+        .flatMap{findLeafPaths(_)}
+    }
+  }
+
+  def findPartitions(path: Path) = {
+    path.toUri.getPath.split("/")
+      .filter{_.contains("=")}
+      .map{segment =>
+        val parts = segment.split("=")
+        (parts(0), parts(1))
+      }.toList
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/load/HiveLoader.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/load/HiveLoader.scala
new file mode 100644
index 0000000..1557677
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/load/HiveLoader.scala
@@ -0,0 +1,76 @@
+package com.stripe.herringbone
+
+import com.stripe.herringbone.load._
+
+import java.sql.ResultSet
+
+import org.apache.hadoop.conf._
+import org.apache.hadoop.fs._
+import org.apache.hadoop.util._
+
+case class HiveLoader(conf: ParquetLoadConf,
+  hadoopFs: HadoopFs,
+  fieldUtils: FieldUtils) extends ParquetLoader {
+
+  val connection = HiveServer2Connection(conf.connectionUrl() + ":" + conf.connectionPort())
+
+  def checkTableExists(table: String, database: String): Boolean = {
+    connection.execute("USE %s".format(database))
+    var exists: Boolean = false
+    connection.executeQuery("SHOW TABLES") { resultSet =>
+      val existingTable = resultSet.getString(1).trim
+      if (existingTable == table)
+        exists = true
+    }
+    exists
+  }
+
+  def createTable(pathString: String, table: String, database: String = "default") {
+    val path = new Path(pathString)
+    val location = hadoopFs.findAbsolutePath(path)
+    val leafPaths = hadoopFs.findSortedLeafPaths(path)
+
+    if (leafPaths.isEmpty)
+      error("Could not find parquet files under " + path)
+
+    val tableFields = fieldUtils.findTableFields(leafPaths.last)
+    val partitionFields = fieldUtils.findPartitionFields(leafPaths.last)
+    val tableWhileImporting = table + "__import"
+
+    connection.execute("CREATE DATABASE IF NOT EXISTS %s".format(database))
+    connection.execute("USE %s".format(database))
+
+    createTableWithPartitionFields(location, tableWhileImporting, tableFields, partitionFields)
+
+    connection.execute("DROP TABLE IF EXISTS %s".format(table))
+    connection.execute("ALTER TABLE %s RENAME TO %s".format(tableWhileImporting, table))
+
+    if (!partitionFields.isEmpty)
+      updateTable(table, database)
+  }
+
+  def createTableWithPartitionFields(location: String, table: String, tableFields: List[String],
+    partitionFields: List[String]) {
+
+    connection.execute("DROP TABLE IF EXISTS `%s`".format (table))
+
+    val tableClause = "CREATE EXTERNAL TABLE IF NOT EXISTS `%s` (%s)".format(
+      table, tableFields.mkString(", "))
+
+    val partitionClause =
+      if (partitionFields.isEmpty)
+        ""
+      else
+        " PARTITIONED BY (%s)".format(partitionFields.mkString(" ,"))
+
+    val storedClause = " STORED AS PARQUET LOCATION \"%s\"".format(location)
+
+    connection.execute(tableClause + partitionClause + storedClause)
+  }
+
+  def updateTable(table: String, database: String) = {
+    connection.execute("MSCK REPAIR TABLE %s".format(table))
+  }
+
+  def closeConnection() = connection.close
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/load/HiveServer2Connection.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/load/HiveServer2Connection.scala
new file mode 100644
index 0000000..cb34423
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/load/HiveServer2Connection.scala
@@ -0,0 +1,35 @@
+package com.stripe.herringbone.load
+
+import java.sql.{ Connection, DriverManager, ResultSet }
+
+case class HiveServer2Connection(connectionUrl: String) {
+  lazy val connection: Connection = {
+    Class.forName("org.apache.hive.jdbc.HiveDriver")
+    DriverManager.getConnection(connectionUrl)
+  }
+
+  def execute(query: String) {
+    try {
+      println(query)
+      val statement = connection.createStatement
+      statement.execute(query)
+    } catch {
+      case e: Throwable => e.printStackTrace
+    }
+  }
+
+  def executeQuery(query: String)(fn: ResultSet => Unit) {
+    try {
+      println(query)
+      val statement = connection.createStatement
+      val resultSet = statement.executeQuery(query)
+      while (resultSet.next) {
+        fn(resultSet)
+      }
+    } catch {
+      case e: Throwable => e.printStackTrace
+    }
+  }
+
+  def close = connection.close
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/load/ImpalaLoader.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/load/ImpalaLoader.scala
new file mode 100644
index 0000000..ca7d57b
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/load/ImpalaLoader.scala
@@ -0,0 +1,122 @@
+package com.stripe.herringbone.load
+
+import com.stripe.herringbone.impala.{ImpalaClient,ImpalaValue}
+
+import org.apache.hadoop.conf._
+import org.apache.hadoop.util._
+import org.apache.hadoop.fs._
+
+case class ImpalaLoader(conf: ParquetLoadConf,
+  hadoopFs: HadoopFs,
+  fieldUtils: FieldUtils) extends ParquetLoader {
+
+  lazy val impalaClient = ImpalaClient(conf.connectionUrl(),
+    conf.connectionPort().toInt)
+
+  def checkTableExists(table: String, database: String): Boolean = {
+    execute("USE %s".format(database))
+    var exists: Boolean = false
+    query("SHOW TABLES"){row =>
+      row.foreach { value =>
+        if (value.raw == table) exists = true
+      }
+    }
+    exists
+  }
+
+  def createTable(pathString: String, table: String, database: String = "default") {
+    val path = new Path(pathString)
+    val location = hadoopFs.findAbsolutePath(path)
+    val leafPaths = hadoopFs.findSortedLeafPaths(path)
+
+    if(leafPaths.isEmpty)
+      error("Could not find parquet files under " + path)
+
+    val tableFields = fieldUtils.findTableFields(leafPaths.last)
+    val partitionFields = fieldUtils.findPartitionFields(leafPaths.last)
+
+    execute("CREATE DATABASE IF NOT EXISTS importing")
+    execute("USE importing")
+
+    createTableWithPartitionFields(location, table, tableFields, partitionFields)
+
+    if(partitionFields.size > 0)
+      addPartitions(table, leafPaths.map{hadoopFs.findPartitions(_)})
+
+    execute("CREATE DATABASE IF NOT EXISTS %s".format(database))
+    execute("DROP TABLE IF EXISTS %s.%s".format(database, table))
+    execute("ALTER TABLE importing.%s RENAME TO %s.%s".format(table, database, table))
+    if (partitionFields.isEmpty) execute("COMPUTE STATS %s.%s".format(database, table))
+  }
+
+  def updateTable(table: String, database: String) {
+    execute("USE %s".format(database))
+
+    val basePath = findBasePath(table)
+    val tablePartitions = findTablePartitions(table)
+    val leafPaths = hadoopFs.findSortedLeafPaths(new Path(basePath))
+    leafPaths.reverse.foreach{path =>
+      val partitions = hadoopFs.findPartitions(path)
+      if(!tablePartitions.contains(partitions.map{_._2}))
+        addPartition(table, partitions)
+    }
+  }
+
+  def findBasePath(table: String) = {
+    var location: String = null
+    query("DESCRIBE FORMATTED %s".format(table)){row =>
+      if(row(0).raw.startsWith("Location:"))
+        location = row(1).raw
+    }
+    location
+  }
+
+  def findTablePartitions(table: String) = {
+    var partitions: List[List[String]] = Nil
+    query("SHOW TABLE STATS %s".format(table)){row =>
+      if(row.size > 4)
+        partitions ::= List(row(0).raw)
+    }
+    partitions
+  }
+
+  def createTableWithPartitionFields(location: String, table: String, tableFields: List[String], partitionFields: List[String]) {
+    execute("DROP TABLE IF EXISTS `%s`".format (table))
+
+    val tableClause = "CREATE EXTERNAL TABLE IF NOT EXISTS `%s` (%s)".format(table, tableFields.mkString(", "))
+    val partitionClause =
+      if(partitionFields.isEmpty)
+        ""
+      else
+        " PARTITIONED BY (%s)".format(partitionFields.mkString(" ,"))
+    val storedClause = " STORED AS PARQUETFILE LOCATION \"%s\"".format(location)
+
+    execute(tableClause + partitionClause + storedClause)
+  }
+
+  def addPartitions(table: String, partitions: List[List[(String, String)]]) {
+    partitions.foreach{addPartition(table, _)}
+  }
+
+  def addPartition(table: String, partitions: List[(String,String)]) {
+    val partitionClause =
+      partitions.map {
+        case (name, value) if(value.forall{_.isDigit}) =>
+          "`%s`=%s".format(name, value)
+         case (name, value) =>
+          "`%s`='%s'".format(name, value)
+        }.mkString(", ")
+
+    execute("ALTER TABLE %s ADD IF NOT EXISTS PARTITION (%s)".format(table, partitionClause))
+  }
+
+  private def execute(stmt: String) {
+    impalaClient.execute(stmt)
+  }
+
+  private def query(stmt: String)(fn: Seq[ImpalaValue] => Unit) {
+    impalaClient.query(stmt){ r =>  fn(r) }
+  }
+
+  def closeConnection() = {}
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/load/ParquetLoadConf.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/load/ParquetLoadConf.scala
new file mode 100644
index 0000000..3615695
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/load/ParquetLoadConf.scala
@@ -0,0 +1,18 @@
+package com.stripe.herringbone.load
+
+import org.rogach.scallop._
+
+class ParquetLoadConf(arguments: Seq[String]) extends ScallopConf(arguments) {
+  val database = opt[String](default = Some("default"))
+  val table = opt[String](required = true)
+  val path = opt[String]()
+  val hive = opt[Boolean]("hive")
+  val connectionUrl = opt[String](required = true)
+  val connectionPort = opt[String](required = true)
+
+  val updatePartitions = toggle(descrYes = "Create table if not present, otherwise update with new partitions", default = Some(false))
+  validateOpt (path, updatePartitions) {
+    case (None, None) => Left("You must specify at least one of path or update-partitions")
+    case _ => Right(Unit)
+  }
+}
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/load/ParquetLoader.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/load/ParquetLoader.scala
new file mode 100644
index 0000000..54a5d68
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/load/ParquetLoader.scala
@@ -0,0 +1,9 @@
+package com.stripe.herringbone.load
+
+trait ParquetLoader {
+  def checkTableExists(table: String, db: String): Boolean
+  def updateTable(table: String, db: String): Unit
+  def createTable(path: String, table: String, db: String): Unit
+  def closeConnection(): Unit
+}
+
diff --git a/herringbone-main/src/main/scala/com/stripe/herringbone/util/ParquetUtils.scala b/herringbone-main/src/main/scala/com/stripe/herringbone/util/ParquetUtils.scala
new file mode 100644
index 0000000..ca675d4
--- /dev/null
+++ b/herringbone-main/src/main/scala/com/stripe/herringbone/util/ParquetUtils.scala
@@ -0,0 +1,36 @@
+package com.stripe.herringbone.util
+
+import org.apache.hadoop.conf._
+import org.apache.hadoop.util._
+import org.apache.hadoop.fs._
+
+import parquet.hadoop.ParquetFileReader
+
+object ParquetUtils {
+  def getParquetMetadata(path: Path, fs: FileSystem) = {
+    // Just use the first parquet file to figure out the impala fields
+    // This also dodges the problem of any non-parquet files stashed
+    // in the path.
+    val parquetFileStatuses = fs.listStatus(path, parquetFilter)
+    val representativeParquetPath = parquetFileStatuses.head.getPath
+
+    val footers = ParquetFileReader.readFooters(new Configuration, representativeParquetPath)
+    footers.get(0).getParquetMetadata
+  }
+
+  def readSchema(path: Path, fs: FileSystem) = {
+    getParquetMetadata(path, fs).getFileMetaData.getSchema
+  }
+
+  def readKeyValueMetaData(path: Path, fs: FileSystem) = {
+    getParquetMetadata(path, fs).getFileMetaData.getKeyValueMetaData
+  }
+
+  val parquetFilter = new PathFilter {
+    def accept(path: Path) = path.getName.endsWith(".parquet")
+  }
+
+  val partitionFilter = new PathFilter {
+    def accept(path: Path) = path.getName.contains("=")
+  }
+}
diff --git a/herringbone-main/src/main/thrift/ImpalaService.thrift b/herringbone-main/src/main/thrift/ImpalaService.thrift
new file mode 100644
index 0000000..1246ca4
--- /dev/null
+++ b/herringbone-main/src/main/thrift/ImpalaService.thrift
@@ -0,0 +1,177 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace cpp impala
+namespace java com.cloudera.impala.thrift
+namespace rb impala.protocol
+
+include "Status.thrift"
+include "beeswax.thrift"
+include "cli_service.thrift"
+
+// ImpalaService accepts query execution options through beeswax.Query.configuration in
+// key:value form. For example, the list of strings could be:
+//     "num_nodes:1", "abort_on_error:false"
+// The valid keys are listed in this enum. They map to TQueryOptions.
+// Note: If you add an option or change the default, you also need to update:
+// - ImpalaInternalService.thrift: TQueryOptions
+// - ImpaladClientExecutor.getBeeswaxQueryConfigurations()
+// - ImpalaServer::SetQueryOptions()
+// - ImpalaServer::TQueryOptionsToMap()
+enum TImpalaQueryOptions {
+  // if true, abort execution on the first error
+  ABORT_ON_ERROR,
+
+  // maximum # of errors to be reported; Unspecified or 0 indicates backend default
+  MAX_ERRORS,
+
+  // if true, disable llvm codegen
+  DISABLE_CODEGEN,
+
+  // batch size to be used by backend; Unspecified or a size of 0 indicates backend
+  // default
+  BATCH_SIZE,
+
+  // a per-machine approximate limit on the memory consumption of this query;
+  // unspecified or a limit of 0 means no limit;
+  // otherwise specified either as:
+  // a) an int (= number of bytes);
+  // b) a float followed by "M" (MB) or "G" (GB)
+  MEM_LIMIT,
+
+  // specifies the degree of parallelism with which to execute the query;
+  // 1: single-node execution
+  // NUM_NODES_ALL: executes on all nodes that contain relevant data
+  // NUM_NODES_ALL_RACKS: executes on one node per rack that holds relevant data
+  // > 1: executes on at most that many nodes at any point in time (ie, there can be
+  //      more nodes than numNodes with plan fragments for this query, but at most
+  //      numNodes would be active at any point in time)
+  // Constants (NUM_NODES_ALL, NUM_NODES_ALL_RACKS) are defined in JavaConstants.thrift.
+  NUM_NODES,
+
+  // maximum length of the scan range; only applicable to HDFS scan range; Unspecified or
+  // a length of 0 indicates backend default;
+  MAX_SCAN_RANGE_LENGTH,
+
+  // Maximum number of io buffers (per disk)
+  MAX_IO_BUFFERS,
+
+  // Number of scanner threads.
+  NUM_SCANNER_THREADS,
+
+  // If true, Impala will try to execute on file formats that are not fully supported yet
+  ALLOW_UNSUPPORTED_FORMATS,
+
+  // if set and > -1, specifies the default limit applied to a top-level SELECT statement
+  // with an ORDER BY but without a LIMIT clause (ie, if the SELECT statement also has
+  // a LIMIT clause, this default is ignored)
+  DEFAULT_ORDER_BY_LIMIT,
+
+  // DEBUG ONLY:
+  // If set to
+  //   "[<backend number>:]<node id>:<TExecNodePhase>:<TDebugAction>",
+  // the exec node with the given id will perform the specified action in the given
+  // phase. If the optional backend number (starting from 0) is specified, only that
+  // backend instance will perform the debug action, otherwise all backends will behave
+  // in that way.
+  // If the string doesn't have the required format or if any of its components is
+  // invalid, the option is ignored.
+  DEBUG_ACTION,
+
+  // If true, raise an error when the DEFAULT_ORDER_BY_LIMIT has been reached.
+  ABORT_ON_DEFAULT_LIMIT_EXCEEDED,
+
+  // Compression codec for parquet when inserting into parquet tables.
+  // Valid values are "snappy", "gzip" and "none"
+  // Leave blank to use default.
+  PARQUET_COMPRESSION_CODEC,
+
+  // HBase scan query option. If set and > 0, HBASE_CACHING is the value for
+  // "hbase.client.Scan.setCaching()" when querying HBase table. Otherwise, use backend
+  // default.
+  // If the value is too high, then the hbase region server will have a hard time (GC
+  // pressure and long response times). If the value is too small, then there will be
+  // extra trips to the hbase region server.
+  HBASE_CACHING,
+
+  // HBase scan query option. If set, HBase scan will always set
+  // "hbase.client.setCacheBlocks" to CACHE_BLOCKS. Default is false.
+  // If the table is large and the query is doing big scan, set it to false to
+  // avoid polluting the cache in the hbase region server.
+  // If the table is small and the table is used several time, set it to true to improve
+  // performance.
+  HBASE_CACHE_BLOCKS,
+}
+
+// The summary of an insert.
+struct TInsertResult {
+  // Number of appended rows per modified partition. Only applies to HDFS tables.
+  // The keys represent partitions to create, coded as k1=v1/k2=v2/k3=v3..., with the
+  // root in an unpartitioned table being the empty string.
+  1: required map<string, i64> rows_appended
+}
+
+// Response from a call to PingImpalaService
+struct TPingImpalaServiceResp {
+  // The Impala service's version string.
+  1: string version
+}
+
+// Parameters for a ResetTable request which will invalidate a table's metadata.
+// DEPRECATED.
+struct TResetTableReq {
+  // Name of the table's parent database.
+  1: required string db_name
+
+  // Name of the table.
+  2: required string table_name
+}
+
+// For all rpc that return a TStatus as part of their result type,
+// if the status_code field is set to anything other than OK, the contents
+// of the remainder of the result type is undefined (typically not set)
+service ImpalaService extends beeswax.BeeswaxService {
+  // Cancel execution of query. Returns RUNTIME_ERROR if query_id
+  // unknown.
+  // This terminates all threads running on behalf of this query at
+  // all nodes that were involved in the execution.
+  // Throws BeeswaxException if the query handle is invalid (this doesn't
+  // necessarily indicate an error: the query might have finished).
+  Status.TStatus Cancel(1:beeswax.QueryHandle query_id)
+      throws(1:beeswax.BeeswaxException error);
+
+  // Invalidates all catalog metadata, forcing a reload
+  // DEPRECATED; execute query "invalidate metadata" to refresh metadata
+  Status.TStatus ResetCatalog();
+
+  // Invalidates a specific table's catalog metadata, forcing a reload on the next access
+  // DEPRECATED; execute query "refresh <table>" to refresh metadata
+  Status.TStatus ResetTable(1:TResetTableReq request)
+
+  // Returns the runtime profile string for the given query handle.
+  string GetRuntimeProfile(1:beeswax.QueryHandle query_id)
+      throws(1:beeswax.BeeswaxException error);
+
+  // Closes the query handle and return the result summary of the insert.
+  TInsertResult CloseInsert(1:beeswax.QueryHandle handle)
+      throws(1:beeswax.QueryNotFoundException error, 2:beeswax.BeeswaxException error2);
+
+  // Client calls this RPC to verify that the server is an ImpalaService. Returns the
+  // server version.
+  TPingImpalaServiceResp PingImpalaService();
+}
+
+// Impala HiveServer2 service
+service ImpalaHiveServer2Service extends cli_service.TCLIService {
+}
diff --git a/herringbone-main/src/main/thrift/Status.thrift b/herringbone-main/src/main/thrift/Status.thrift
new file mode 100644
index 0000000..8906d1e
--- /dev/null
+++ b/herringbone-main/src/main/thrift/Status.thrift
@@ -0,0 +1,32 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace cpp impala
+namespace java com.cloudera.impala.thrift
+namespace rb impala.protocol
+
+enum TStatusCode {
+  OK,
+  CANCELLED,
+  ANALYSIS_ERROR,
+  NOT_IMPLEMENTED_ERROR,
+  RUNTIME_ERROR,
+  MEM_LIMIT_EXCEEDED,
+  INTERNAL_ERROR
+}
+
+struct TStatus {
+  1: required TStatusCode status_code
+  2: list<string> error_msgs
+}
diff --git a/herringbone-main/src/main/thrift/beeswax.thrift b/herringbone-main/src/main/thrift/beeswax.thrift
new file mode 100644
index 0000000..2707457
--- /dev/null
+++ b/herringbone-main/src/main/thrift/beeswax.thrift
@@ -0,0 +1,175 @@
+/*
+ * Licensed to Cloudera, Inc. under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  Cloudera, Inc. licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Interface for interacting with Beeswax Server
+ */
+
+namespace java com.cloudera.beeswax.api
+namespace py beeswaxd
+namespace cpp beeswax
+namespace rb impala.protocol.beeswax
+
+include "hive_metastore.thrift"
+
+// A Query
+struct Query {
+  1: string query;
+  // A list of HQL commands to execute before the query.
+  // This is typically defining UDFs, setting settings, and loading resources.
+  3: list<string> configuration;
+
+  // User and groups to "act as" for purposes of Hadoop.
+  4: string hadoop_user;
+}
+
+typedef string LogContextId
+
+enum QueryState {
+  CREATED,
+  INITIALIZED,
+  COMPILED,
+  RUNNING,
+  FINISHED,
+  EXCEPTION
+}
+
+struct QueryHandle {
+  1: string id;
+  2: LogContextId log_context;
+}
+
+struct QueryExplanation {
+  1: string textual
+}
+
+struct Results {
+  // If set, data is valid.  Otherwise, results aren't ready yet.
+  1: bool ready,
+  // Columns for the results
+  2: list<string> columns,
+  // A set of results
+  3: list<string> data,
+  // The starting row of the results
+  4: i64 start_row,
+  // Whether there are more results to fetch
+  5: bool has_more
+}
+
+/**
+ * Metadata information about the results.
+ * Applicable only for SELECT.
+ */
+struct ResultsMetadata {
+  /** The schema of the results */
+  1: hive_metastore.Schema schema,
+  /** The directory containing the results. Not applicable for partition table. */
+  2: string table_dir,
+  /** If the results are straight from an existing table, the table name. */
+  3: string in_tablename,
+  /** Field delimiter */
+  4: string delim,
+}
+
+exception BeeswaxException {
+  1: string message,
+  // Use get_log(log_context) to retrieve any log related to this exception
+  2: LogContextId log_context,
+  // (Optional) The QueryHandle that caused this exception
+  3: QueryHandle handle,
+  4: optional i32 errorCode = 0,
+  5: optional string SQLState = "     "
+}
+
+exception QueryNotFoundException {
+}
+
+/** Represents a Hadoop-style configuration variable. */
+struct ConfigVariable {
+  1: string key,
+  2: string value,
+  3: string description
+}
+
+service BeeswaxService {
+  /**
+   * Submit a query and return a handle (QueryHandle). The query runs asynchronously.
+   */
+  QueryHandle query(1:Query query) throws(1:BeeswaxException error),
+
+  /**
+   * run a query synchronously and return a handle (QueryHandle).
+   */
+  QueryHandle executeAndWait(1:Query query, 2:LogContextId clientCtx)
+                        throws(1:BeeswaxException error),
+
+  /**
+   * Get the query plan for a query.
+   */
+  QueryExplanation explain(1:Query query)
+                        throws(1:BeeswaxException error),
+
+  /**
+   * Get the results of a query. This is non-blocking. Caller should check
+   * Results.ready to determine if the results are in yet. The call requests
+   * the batch size of fetch.
+   */
+  Results fetch(1:QueryHandle query_id, 2:bool start_over, 3:i32 fetch_size=-1)
+              throws(1:QueryNotFoundException error, 2:BeeswaxException error2),
+
+  /**
+   * Get the state of the query
+   */
+  QueryState get_state(1:QueryHandle handle) throws(1:QueryNotFoundException error),
+
+  /**
+   * Get the result metadata
+   */
+  ResultsMetadata get_results_metadata(1:QueryHandle handle)
+                                    throws(1:QueryNotFoundException error),
+
+  /**
+   * Used to test connection to server.  A "noop" command.
+   */
+  string echo(1:string s)
+
+  /**
+   * Returns a string representation of the configuration object being used.
+   * Handy for debugging.
+   */
+  string dump_config()
+
+  /**
+   * Get the log messages related to the given context.
+   */
+  string get_log(1:LogContextId context) throws(1:QueryNotFoundException error)
+
+  /*
+   * Returns "default" configuration.
+   */
+  list<ConfigVariable> get_default_configuration(1:bool include_hadoop)
+
+  /*
+   * closes the query with given handle
+   */
+  void close(1:QueryHandle handle) throws(1:QueryNotFoundException error,
+                            2:BeeswaxException error2)
+
+  /*
+   * clean the log context for given id
+   */
+  void clean(1:LogContextId log_context)
+}
diff --git a/herringbone-main/src/main/thrift/cli_service.thrift b/herringbone-main/src/main/thrift/cli_service.thrift
new file mode 100644
index 0000000..24a3558
--- /dev/null
+++ b/herringbone-main/src/main/thrift/cli_service.thrift
@@ -0,0 +1,1015 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Coding Conventions for this file:
+//
+// Structs/Enums/Unions
+// * Struct, Enum, and Union names begin with a "T",
+//   and use a capital letter for each new word, with no underscores.
+// * All fields should be declared as either optional or required.
+//
+// Functions
+// * Function names start with a capital letter and have a capital letter for
+//   each new word, with no underscores.
+// * Each function should take exactly one parameter, named TFunctionNameReq,
+//   and should return either void or TFunctionNameResp. This convention allows
+//   incremental updates.
+//
+// Services
+// * Service names begin with the letter "T", use a capital letter for each
+//   new word (with no underscores), and end with the word "Service".
+
+namespace java org.apache.hive.service.cli.thrift
+namespace cpp apache.hive.service.cli.thrift
+namespace rb impala.protocol.hive
+
+// List of protocol versions. A new token should be
+// added to the end of this list every time a change is made.
+enum TProtocolVersion {
+  HIVE_CLI_SERVICE_PROTOCOL_V1
+}
+
+enum TTypeId {
+  BOOLEAN_TYPE,
+  TINYINT_TYPE,
+  SMALLINT_TYPE,
+  INT_TYPE,
+  BIGINT_TYPE,
+  FLOAT_TYPE,
+  DOUBLE_TYPE,
+  STRING_TYPE,
+  TIMESTAMP_TYPE,
+  BINARY_TYPE,
+  ARRAY_TYPE,
+  MAP_TYPE,
+  STRUCT_TYPE,
+  UNION_TYPE,
+  USER_DEFINED_TYPE,
+  DECIMAL_TYPE
+}
+
+const set<TTypeId> PRIMITIVE_TYPES = [
+  TTypeId.BOOLEAN_TYPE
+  TTypeId.TINYINT_TYPE
+  TTypeId.SMALLINT_TYPE
+  TTypeId.INT_TYPE
+  TTypeId.BIGINT_TYPE
+  TTypeId.FLOAT_TYPE
+  TTypeId.DOUBLE_TYPE
+  TTypeId.STRING_TYPE
+  TTypeId.TIMESTAMP_TYPE
+  TTypeId.BINARY_TYPE,
+  TTypeId.DECIMAL_TYPE
+]
+
+const set<TTypeId> COMPLEX_TYPES = [
+  TTypeId.ARRAY_TYPE
+  TTypeId.MAP_TYPE
+  TTypeId.STRUCT_TYPE
+  TTypeId.UNION_TYPE
+  TTypeId.USER_DEFINED_TYPE
+]
+
+const set<TTypeId> COLLECTION_TYPES = [
+  TTypeId.ARRAY_TYPE
+  TTypeId.MAP_TYPE
+]
+
+const map<TTypeId,string> TYPE_NAMES = {
+  TTypeId.BOOLEAN_TYPE: "BOOLEAN",
+  TTypeId.TINYINT_TYPE: "TINYINT",
+  TTypeId.SMALLINT_TYPE: "SMALLINT",
+  TTypeId.INT_TYPE: "INT",
+  TTypeId.BIGINT_TYPE: "BIGINT",
+  TTypeId.FLOAT_TYPE: "FLOAT",
+  TTypeId.DOUBLE_TYPE: "DOUBLE",
+  TTypeId.STRING_TYPE: "STRING",
+  TTypeId.TIMESTAMP_TYPE: "TIMESTAMP",
+  TTypeId.BINARY_TYPE: "BINARY",
+  TTypeId.ARRAY_TYPE: "ARRAY",
+  TTypeId.MAP_TYPE: "MAP",
+  TTypeId.STRUCT_TYPE: "STRUCT",
+  TTypeId.UNION_TYPE: "UNIONTYPE"
+  TTypeId.DECIMAL_TYPE: "DECIMAL"
+}
+
+// Thrift does not support recursively defined types or forward declarations,
+// which makes it difficult to represent Hive's nested types.
+// To get around these limitations TTypeDesc employs a type list that maps
+// integer "pointers" to TTypeEntry objects. The following examples show
+// how different types are represented using this scheme:
+//
+// "INT":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     }
+//   ]
+// }
+//
+// "ARRAY<INT>":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.array_entry {
+//       object_type_ptr = 1
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     }
+//   ]
+// }
+//
+// "MAP<INT,STRING>":
+// TTypeDesc {
+//   types = [
+//     TTypeEntry.map_entry {
+//       key_type_ptr = 1
+//       value_type_ptr = 2
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = INT_TYPE
+//     },
+//     TTypeEntry.primitive_entry {
+//       type = STRING_TYPE
+//     }
+//   ]
+// }
+
+typedef i32 TTypeEntryPtr
+
+// Type entry for a primitive type.
+struct TPrimitiveTypeEntry {
+  // The primitive type token. This must satisfy the condition
+  // that type is in the PRIMITIVE_TYPES set.
+  1: required TTypeId type
+}
+
+// Type entry for an ARRAY type.
+struct TArrayTypeEntry {
+  1: required TTypeEntryPtr objectTypePtr
+}
+
+// Type entry for a MAP type.
+struct TMapTypeEntry {
+  1: required TTypeEntryPtr keyTypePtr
+  2: required TTypeEntryPtr valueTypePtr
+}
+
+// Type entry for a STRUCT type.
+struct TStructTypeEntry {
+  1: required map<string, TTypeEntryPtr> nameToTypePtr
+}
+
+// Type entry for a UNIONTYPE type.
+struct TUnionTypeEntry {
+  1: required map<string, TTypeEntryPtr> nameToTypePtr
+}
+
+struct TUserDefinedTypeEntry {
+  // The fully qualified name of the class implementing this type.
+  1: required string typeClassName
+}
+
+// We use a union here since Thrift does not support inheritance.
+union TTypeEntry {
+  1: TPrimitiveTypeEntry primitiveEntry
+  2: TArrayTypeEntry arrayEntry
+  3: TMapTypeEntry mapEntry
+  4: TStructTypeEntry structEntry
+  5: TUnionTypeEntry unionEntry
+  6: TUserDefinedTypeEntry userDefinedTypeEntry
+}
+
+// Type descriptor for columns.
+struct TTypeDesc {
+  // The "top" type is always the first element of the list.
+  // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE
+  // type, then subsequent elements represent nested types.
+  1: required list<TTypeEntry> types
+}
+
+// A result set column descriptor.
+struct TColumnDesc {
+  // The name of the column
+  1: required string columnName
+
+  // The type descriptor for this column
+  2: required TTypeDesc typeDesc
+
+  // The ordinal position of this column in the schema
+  3: required i32 position
+
+  4: optional string comment
+}
+
+// Metadata used to describe the schema (column names, types, comments)
+// of result sets.
+struct TTableSchema {
+  1: required list<TColumnDesc> columns
+}
+
+// A Boolean column value.
+struct TBoolValue {
+  // NULL if value is unset.
+  1: optional bool value
+}
+
+// A Byte column value.
+struct TByteValue {
+  // NULL if value is unset.
+  1: optional byte value
+}
+
+// A signed, 16 bit column value.
+struct TI16Value {
+  // NULL if value is unset
+  1: optional i16 value
+}
+
+// A signed, 32 bit column value
+struct TI32Value {
+  // NULL if value is unset
+  1: optional i32 value
+}
+
+// A signed 64 bit column value
+struct TI64Value {
+  // NULL if value is unset
+  1: optional i64 value
+}
+
+// A floating point 64 bit column value
+struct TDoubleValue {
+  // NULL if value is unset
+  1: optional double value
+}
+
+struct TStringValue {
+  // NULL if value is unset
+  1: optional string value
+}
+
+union TColumn {
+  1: list<TBoolValue> boolColumn
+  2: list<TByteValue> byteColumn
+  3: list<TI16Value> i16Column
+  4: list<TI32Value> i32Column
+  5: list<TI64Value> i64Column
+  6: list<TDoubleValue> doubleColumn
+  7: list<TStringValue> stringColumn
+}
+
+// A single column value in a result set.
+// Note that Hive's type system is richer than Thrift's,
+// so in some cases we have to map multiple Hive types
+// to the same Thrift type. On the client-side this is
+// disambiguated by looking at the Schema of the
+// result set.
+union TColumnValue {
+  1: TBoolValue   boolVal      // BOOLEAN
+  2: TByteValue   byteVal      // TINYINT
+  3: TI16Value    i16Val       // SMALLINT
+  4: TI32Value    i32Val       // INT
+  5: TI64Value    i64Val       // BIGINT, TIMESTAMP
+  6: TDoubleValue doubleVal    // FLOAT, DOUBLE
+  7: TStringValue stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL
+}
+
+// Represents a row in a rowset.
+struct TRow {
+  1: required list<TColumnValue> colVals
+}
+
+// Represents a rowset
+struct TRowSet {
+  // The starting row offset of this rowset.
+  1: required i64 startRowOffset
+  2: required list<TRow> rows
+  3: optional list<TColumn> columns
+}
+
+// The return status code contained in each response.
+enum TStatusCode {
+  SUCCESS_STATUS,
+  SUCCESS_WITH_INFO_STATUS,
+  STILL_EXECUTING_STATUS,
+  ERROR_STATUS,
+  INVALID_HANDLE_STATUS
+}
+
+// The return status of a remote request
+struct TStatus {
+  1: required TStatusCode statusCode
+
+  // If status is SUCCESS_WITH_INFO, info_msgs may be populated with
+  // additional diagnostic information.
+  2: optional list<string> infoMessages
+
+  // If status is ERROR, then the following fields may be set
+  3: optional string sqlState  // as defined in the ISO/IEF CLI specification
+  4: optional i32 errorCode    // internal error code
+  5: optional string errorMessage
+}
+
+// The state of an operation (i.e. a query or other
+// asynchronous operation that generates a result set)
+// on the server.
+enum TOperationState {
+  // The operation has been initialized
+  INITIALIZED_STATE,
+
+  // The operation is running. In this state the result
+  // set is not available.
+  RUNNING_STATE,
+
+  // The operation has completed. When an operation is in
+  // this state its result set may be fetched.
+  FINISHED_STATE,
+
+  // The operation was canceled by a client
+  CANCELED_STATE,
+
+  // The operation was closed by a client
+  CLOSED_STATE,
+
+  // The operation failed due to an error
+  ERROR_STATE,
+
+  // The operation is in an unrecognized state
+  UKNOWN_STATE,
+}
+
+
+// A string identifier. This is interpreted literally.
+typedef string TIdentifier
+
+// A search pattern.
+//
+// Valid search pattern characters:
+// '_': Any single character.
+// '%': Any sequence of zero or more characters.
+// '\': Escape character used to include special characters,
+//      e.g. '_', '%', '\'. If a '\' precedes a non-special
+//      character it has no special meaning and is interpreted
+//      literally.
+typedef string TPattern
+
+
+// A search pattern or identifier. Used as input
+// parameter for many of the catalog functions.
+typedef string TPatternOrIdentifier
+
+struct THandleIdentifier {
+  // 16 byte globally unique identifier
+  // This is the public ID of the handle and
+  // can be used for reporting.
+  1: required binary guid,
+
+  // 16 byte secret generated by the server
+  // and used to verify that the handle is not
+  // being hijacked by another user.
+  2: required binary secret,
+}
+
+// Client-side handle to persistent
+// session information on the server-side.
+struct TSessionHandle {
+  1: required THandleIdentifier sessionId
+}
+
+// The subtype of an OperationHandle.
+enum TOperationType {
+  EXECUTE_STATEMENT,
+  GET_TYPE_INFO,
+  GET_CATALOGS,
+  GET_SCHEMAS,
+  GET_TABLES,
+  GET_TABLE_TYPES,
+  GET_COLUMNS,
+  GET_FUNCTIONS,
+  UNKNOWN,
+}
+
+// Client-side reference to a task running
+// asynchronously on the server.
+struct TOperationHandle {
+  1: required THandleIdentifier operationId
+  2: required TOperationType operationType
+
+  // If hasResultSet = TRUE, then this operation
+  // generates a result set that can be fetched.
+  // Note that the result set may be empty.
+  //
+  // If hasResultSet = FALSE, then this operation
+  // does not generate a result set, and calling
+  // GetResultSetMetadata or FetchResults against
+  // this OperationHandle will generate an error.
+  3: required bool hasResultSet
+
+  // For operations that don't generate result sets,
+  // modifiedRowCount is either:
+  //
+  // 1) The number of rows that were modified by
+  //    the DML operation (e.g. number of rows inserted,
+  //    number of rows deleted, etc).
+  //
+  // 2) 0 for operations that don't modify or add rows.
+  //
+  // 3) < 0 if the operation is capable of modifiying rows,
+  //    but Hive is unable to determine how many rows were
+  //    modified. For example, Hive's LOAD DATA command
+  //    doesn't generate row count information because
+  //    Hive doesn't inspect the data as it is loaded.
+  //
+  // modifiedRowCount is unset if the operation generates
+  // a result set.
+  4: optional double modifiedRowCount
+}
+
+
+// OpenSession()
+//
+// Open a session (connection) on the server against
+// which operations may be executed.
+struct TOpenSessionReq {
+  // The version of the HiveServer2 protocol that the client is using.
+  1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1
+
+  // Username and password for authentication.
+  // Depending on the authentication scheme being used,
+  // this information may instead be provided by a lower
+  // protocol layer, in which case these fields may be
+  // left unset.
+  2: optional string username
+  3: optional string password
+
+  // Configuration overlay which is applied when the session is
+  // first created.
+  4: optional map<string, string> configuration
+}
+
+struct TOpenSessionResp {
+  1: required TStatus status
+
+  // The protocol version that the server is using.
+  2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1
+
+  // Session Handle
+  3: optional TSessionHandle sessionHandle
+
+  // The configuration settings for this session.
+  4: optional map<string, string> configuration
+}
+
+
+// CloseSession()
+//
+// Closes the specified session and frees any resources
+// currently allocated to that session. Any open
+// operations in that session will be canceled.
+struct TCloseSessionReq {
+  1: required TSessionHandle sessionHandle
+}
+
+struct TCloseSessionResp {
+  1: required TStatus status
+}
+
+
+
+enum TGetInfoType {
+  CLI_MAX_DRIVER_CONNECTIONS =           0,
+  CLI_MAX_CONCURRENT_ACTIVITIES =        1,
+  CLI_DATA_SOURCE_NAME =                 2,
+  CLI_FETCH_DIRECTION =                  8,
+  CLI_SERVER_NAME =                      13,
+  CLI_SEARCH_PATTERN_ESCAPE =            14,
+  CLI_DBMS_NAME =                        17,
+  CLI_DBMS_VER =                         18,
+  CLI_ACCESSIBLE_TABLES =                19,
+  CLI_ACCESSIBLE_PROCEDURES =            20,
+  CLI_CURSOR_COMMIT_BEHAVIOR =           23,
+  CLI_DATA_SOURCE_READ_ONLY =            25,
+  CLI_DEFAULT_TXN_ISOLATION =            26,
+  CLI_IDENTIFIER_CASE =                  28,
+  CLI_IDENTIFIER_QUOTE_CHAR =            29,
+  CLI_MAX_COLUMN_NAME_LEN =              30,
+  CLI_MAX_CURSOR_NAME_LEN =              31,
+  CLI_MAX_SCHEMA_NAME_LEN =              32,
+  CLI_MAX_CATALOG_NAME_LEN =             34,
+  CLI_MAX_TABLE_NAME_LEN =               35,
+  CLI_SCROLL_CONCURRENCY =               43,
+  CLI_TXN_CAPABLE =                      46,
+  CLI_USER_NAME =                        47,
+  CLI_TXN_ISOLATION_OPTION =             72,
+  CLI_INTEGRITY =                        73,
+  CLI_GETDATA_EXTENSIONS =               81,
+  CLI_NULL_COLLATION =                   85,
+  CLI_ALTER_TABLE =                      86,
+  CLI_ORDER_BY_COLUMNS_IN_SELECT =       90,
+  CLI_SPECIAL_CHARACTERS =               94,
+  CLI_MAX_COLUMNS_IN_GROUP_BY =          97,
+  CLI_MAX_COLUMNS_IN_INDEX =             98,
+  CLI_MAX_COLUMNS_IN_ORDER_BY =          99,
+  CLI_MAX_COLUMNS_IN_SELECT =            100,
+  CLI_MAX_COLUMNS_IN_TABLE =             101,
+  CLI_MAX_INDEX_SIZE =                   102,
+  CLI_MAX_ROW_SIZE =                     104,
+  CLI_MAX_STATEMENT_LEN =                105,
+  CLI_MAX_TABLES_IN_SELECT =             106,
+  CLI_MAX_USER_NAME_LEN =                107,
+  CLI_OJ_CAPABILITIES =                  115,
+
+  CLI_XOPEN_CLI_YEAR =                   10000,
+  CLI_CURSOR_SENSITIVITY =               10001,
+  CLI_DESCRIBE_PARAMETER =               10002,
+  CLI_CATALOG_NAME =                     10003,
+  CLI_COLLATION_SEQ =                    10004,
+  CLI_MAX_IDENTIFIER_LEN =               10005,
+}
+
+union TGetInfoValue {
+  1: string stringValue
+  2: i16 smallIntValue
+  3: i32 integerBitmask
+  4: i32 integerFlag
+  5: i32 binaryValue
+  6: i64 lenValue
+}
+
+// GetInfo()
+//
+// This function is based on ODBC's CLIGetInfo() function.
+// The function returns general information about the data source
+// using the same keys as ODBC.
+struct TGetInfoReq {
+  // The sesssion to run this request against
+  1: required TSessionHandle sessionHandle
+
+  2: required TGetInfoType infoType
+}
+
+struct TGetInfoResp {
+  1: required TStatus status
+
+  2: required TGetInfoValue infoValue
+}
+
+
+// ExecuteStatement()
+//
+// Execute a statement.
+// The returned OperationHandle can be used to check on the
+// status of the statement, and to fetch results once the
+// statement has finished executing.
+struct TExecuteStatementReq {
+  // The session to exexcute the statement against
+  1: required TSessionHandle sessionHandle
+
+  // The statement to be executed (DML, DDL, SET, etc)
+  2: required string statement
+
+  // Configuration properties that are overlayed on top of the
+  // the existing session configuration before this statement
+  // is executed. These properties apply to this statement
+  // only and will not affect the subsequent state of the Session.
+  3: optional map<string, string> confOverlay
+}
+
+struct TExecuteStatementResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTypeInfo()
+//
+// Get information about types supported by the HiveServer instance.
+// The information is returned as a result set which can be fetched
+// using the OperationHandle provided in the response.
+//
+// Refer to the documentation for ODBC's CLIGetTypeInfo function for
+// the format of the result set.
+struct TGetTypeInfoReq {
+  // The session to run this request against.
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetTypeInfoResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetCatalogs()
+//
+// Returns the list of catalogs (databases)
+// Results are ordered by TABLE_CATALOG
+//
+// Resultset columns :
+// col1
+// name: TABLE_CAT
+// type: STRING
+// desc: Catalog name. NULL if not applicable.
+//
+struct TGetCatalogsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetCatalogsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetSchemas()
+//
+// Retrieves the schema names available in this database.
+// The results are ordered by TABLE_CATALOG and TABLE_SCHEM.
+// col1
+// name: TABLE_SCHEM
+// type: STRING
+// desc: schema name
+// col2
+// name: TABLE_CATALOG
+// type: STRING
+// desc: catalog name
+struct TGetSchemasReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog. Must not contain a search pattern.
+  2: optional TIdentifier catalogName
+
+  // schema name or pattern
+  3: optional TPatternOrIdentifier schemaName
+}
+
+struct TGetSchemasResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTables()
+//
+// Returns a list of tables with catalog, schema, and table
+// type information. The information is returned as a result
+// set which can be fetched using the OperationHandle
+// provided in the response.
+// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME
+//
+// Result Set Columns:
+//
+// col1
+// name: TABLE_CAT
+// type: STRING
+// desc: Catalog name. NULL if not applicable.
+//
+// col2
+// name: TABLE_SCHEM
+// type: STRING
+// desc: Schema name.
+//
+// col3
+// name: TABLE_NAME
+// type: STRING
+// desc: Table name.
+//
+// col4
+// name: TABLE_TYPE
+// type: STRING
+// desc: The table type, e.g. "TABLE", "VIEW", etc.
+//
+// col5
+// name: REMARKS
+// type: STRING
+// desc: Comments about the table
+//
+struct TGetTablesReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog or a search pattern.
+  2: optional TPatternOrIdentifier catalogName
+
+  // Name of the schema or a search pattern.
+  3: optional TPatternOrIdentifier schemaName
+
+  // Name of the table or a search pattern.
+  4: optional TPatternOrIdentifier tableName
+
+  // List of table types to match
+  // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY",
+  // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc.
+  5: optional list<string> tableTypes
+}
+
+struct TGetTablesResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetTableTypes()
+//
+// Returns the table types available in this database.
+// The results are ordered by table type.
+//
+// col1
+// name: TABLE_TYPE
+// type: STRING
+// desc: Table type name.
+struct TGetTableTypesReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+}
+
+struct TGetTableTypesResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetColumns()
+//
+// Returns a list of columns in the specified tables.
+// The information is returned as a result set which can be fetched
+// using the OperationHandle provided in the response.
+// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME,
+// and ORDINAL_POSITION.
+//
+// Result Set Columns are the same as those for the ODBC CLIColumns
+// function.
+//
+struct TGetColumnsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // Name of the catalog. Must not contain a search pattern.
+  2: optional TIdentifier catalogName
+
+  // Schema name or search pattern
+  3: optional TPatternOrIdentifier schemaName
+
+  // Table name or search pattern
+  4: optional TPatternOrIdentifier tableName
+
+  // Column name or search pattern
+  5: optional TPatternOrIdentifier columnName
+}
+
+struct TGetColumnsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetFunctions()
+//
+// Returns a list of functions supported by the data source. The
+// behavior of this function matches
+// java.sql.DatabaseMetaData.getFunctions() both in terms of
+// inputs and outputs.
+//
+// Result Set Columns:
+//
+// col1
+// name: FUNCTION_CAT
+// type: STRING
+// desc: Function catalog (may be null)
+//
+// col2
+// name: FUNCTION_SCHEM
+// type: STRING
+// desc: Function schema (may be null)
+//
+// col3
+// name: FUNCTION_NAME
+// type: STRING
+// desc: Function name. This is the name used to invoke the function.
+//
+// col4
+// name: REMARKS
+// type: STRING
+// desc: Explanatory comment on the function.
+//
+// col5
+// name: FUNCTION_TYPE
+// type: SMALLINT
+// desc: Kind of function. One of:
+//       * functionResultUnknown - Cannot determine if a return value or a table
+//                                 will be returned.
+//       * functionNoTable       - Does not a return a table.
+//       * functionReturnsTable  - Returns a table.
+//
+// col6
+// name: SPECIFIC_NAME
+// type: STRING
+// desc: The name which uniquely identifies this function within its schema.
+//       In this case this is the fully qualified class name of the class
+//       that implements this function.
+//
+struct TGetFunctionsReq {
+  // Session to run this request against
+  1: required TSessionHandle sessionHandle
+
+  // A catalog name; must match the catalog name as it is stored in the
+  // database; "" retrieves those without a catalog; null means
+  // that the catalog name should not be used to narrow the search.
+  2: optional TIdentifier catalogName
+
+  // A schema name pattern; must match the schema name as it is stored
+  // in the database; "" retrieves those without a schema; null means
+  // that the schema name should not be used to narrow the search.
+  3: optional TPatternOrIdentifier schemaName
+
+  // A function name pattern; must match the function name as it is stored
+  // in the database.
+  4: required TPatternOrIdentifier functionName
+}
+
+struct TGetFunctionsResp {
+  1: required TStatus status
+  2: optional TOperationHandle operationHandle
+}
+
+
+// GetOperationStatus()
+//
+// Get the status of an operation running on the server.
+struct TGetOperationStatusReq {
+  // Session to run this request against
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetOperationStatusResp {
+  1: required TStatus status
+  2: optional TOperationState operationState
+}
+
+
+// CancelOperation()
+//
+// Cancels processing on the specified operation handle and
+// frees any resources which were allocated.
+struct TCancelOperationReq {
+  // Operation to cancel
+  1: required TOperationHandle operationHandle
+}
+
+struct TCancelOperationResp {
+  1: required TStatus status
+}
+
+
+// CloseOperation()
+//
+// Given an operation in the FINISHED, CANCELED,
+// or ERROR states, CloseOperation() will free
+// all of the resources which were allocated on
+// the server to service the operation.
+struct TCloseOperationReq {
+  1: required TOperationHandle operationHandle
+}
+
+struct TCloseOperationResp {
+  1: required TStatus status
+}
+
+
+// GetResultSetMetadata()
+//
+// Retrieves schema information for the specified operation
+struct TGetResultSetMetadataReq {
+  // Operation for which to fetch result set schema information
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetResultSetMetadataResp {
+  1: required TStatus status
+  2: optional TTableSchema schema
+}
+
+
+enum TFetchOrientation {
+  // Get the next rowset. The fetch offset is ignored.
+  FETCH_NEXT,
+
+  // Get the previous rowset. The fetch offset is ignored.
+  // NOT SUPPORTED
+  FETCH_PRIOR,
+
+  // Return the rowset at the given fetch offset relative
+  // to the curren rowset.
+  // NOT SUPPORTED
+  FETCH_RELATIVE,
+
+  // Return the rowset at the specified fetch offset.
+  // NOT SUPPORTED
+  FETCH_ABSOLUTE,
+
+  // Get the first rowset in the result set.
+  FETCH_FIRST,
+
+  // Get the last rowset in the result set.
+  // NOT SUPPORTED
+  FETCH_LAST
+}
+
+// FetchResults()
+//
+// Fetch rows from the server corresponding to
+// a particular OperationHandle.
+struct TFetchResultsReq {
+  // Operation from which to fetch results.
+  1: required TOperationHandle operationHandle
+
+  // The fetch orientation. For V1 this must be either
+  // FETCH_NEXT or FETCH_FIRST. Defaults to FETCH_NEXT.
+  2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
+
+  // Max number of rows that should be returned in
+  // the rowset.
+  3: required i64 maxRows
+}
+
+struct TFetchResultsResp {
+  1: required TStatus status
+
+  // TRUE if there are more rows left to fetch from the server.
+  2: optional bool hasMoreRows
+
+  // The rowset. This is optional so that we have the
+  // option in the future of adding alternate formats for
+  // representing result set data, e.g. delimited strings,
+  // binary encoded, etc.
+  3: optional TRowSet results
+}
+
+// GetLog()
+//
+// Fetch operation log from the server corresponding to
+// a particular OperationHandle.
+struct TGetLogReq {
+  // Operation whose log is requested
+  1: required TOperationHandle operationHandle
+}
+
+struct TGetLogResp {
+  1: required TStatus status
+
+  2: required string log
+}
+
+service TCLIService {
+
+  TOpenSessionResp OpenSession(1:TOpenSessionReq req);
+
+  TCloseSessionResp CloseSession(1:TCloseSessionReq req);
+
+  TGetInfoResp GetInfo(1:TGetInfoReq req);
+
+  TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req);
+
+  TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req);
+
+  TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req);
+
+  TGetSchemasResp GetSchemas(1:TGetSchemasReq req);
+
+  TGetTablesResp GetTables(1:TGetTablesReq req);
+
+  TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req);
+
+  TGetColumnsResp GetColumns(1:TGetColumnsReq req);
+
+  TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req);
+
+  TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req);
+
+  TCancelOperationResp CancelOperation(1:TCancelOperationReq req);
+
+  TCloseOperationResp CloseOperation(1:TCloseOperationReq req);
+
+  TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req);
+
+  TFetchResultsResp FetchResults(1:TFetchResultsReq req);
+
+  TGetLogResp GetLog(1:TGetLogReq req);
+}
diff --git a/herringbone-main/src/main/thrift/fb303.thrift b/herringbone-main/src/main/thrift/fb303.thrift
new file mode 100644
index 0000000..6438092
--- /dev/null
+++ b/herringbone-main/src/main/thrift/fb303.thrift
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * fb303.thrift
+ */
+
+namespace java com.facebook.fb303
+namespace cpp facebook.fb303
+namespace rb Impala.Protocol.fb303
+
+/**
+ * Common status reporting mechanism across all services
+ */
+enum fb_status {
+  DEAD = 0,
+  STARTING = 1,
+  ALIVE = 2,
+  STOPPING = 3,
+  STOPPED = 4,
+  WARNING = 5,
+}
+
+/**
+ * Standard base service
+ */
+service FacebookService {
+
+  /**
+   * Returns a descriptive name of the service
+   */
+  string getName(),
+
+  /**
+   * Returns the version of the service
+   */
+  string getVersion(),
+
+  /**
+   * Gets the status of this service
+   */
+  fb_status getStatus(),
+
+  /**
+   * User friendly description of status, such as why the service is in
+   * the dead or warning state, or what is being started or stopped.
+   */
+  string getStatusDetails(),
+
+  /**
+   * Gets the counters for this service
+   */
+  map<string, i64> getCounters(),
+
+  /**
+   * Gets the value of a single counter
+   */
+  i64 getCounter(1: string key),
+
+  /**
+   * Sets an option
+   */
+  void setOption(1: string key, 2: string value),
+
+  /**
+   * Gets an option
+   */
+  string getOption(1: string key),
+
+  /**
+   * Gets all options
+   */
+  map<string, string> getOptions(),
+
+  /**
+   * Returns a CPU profile over the given time interval (client and server
+   * must agree on the profile format).
+   */
+  string getCpuProfile(1: i32 profileDurationInSec),
+
+  /**
+   * Returns the unix time that the server has been running since
+   */
+  i64 aliveSince(),
+
+  /**
+   * Tell the server to reload its configuration, reopen log files, etc
+   */
+  oneway void reinitialize(),
+
+  /**
+   * Suggest a shutdown to the server
+   */
+  oneway void shutdown(),
+
+}
diff --git a/herringbone-main/src/main/thrift/hive_metastore.thrift b/herringbone-main/src/main/thrift/hive_metastore.thrift
new file mode 100644
index 0000000..5e05367
--- /dev/null
+++ b/herringbone-main/src/main/thrift/hive_metastore.thrift
@@ -0,0 +1,528 @@
+#!/usr/local/bin/thrift -java
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#
+# Thrift Service that the MetaStore is built on
+#
+
+include "fb303.thrift"
+
+namespace java org.apache.hadoop.hive.metastore.api
+namespace php metastore
+namespace cpp Apache.Hadoop.Hive
+namespace rb Impala.Protocol.HiveMetastore
+
+const string DDL_TIME = "transient_lastDdlTime"
+
+struct Version {
+  1: string version,
+  2: string comments
+}
+
+struct FieldSchema {
+  1: string name, // name of the field
+  2: string type, // type of the field. primitive types defined above, specify list<TYPE_NAME>, map<TYPE_NAME, TYPE_NAME> for lists & maps
+  3: string comment
+}
+
+struct Type {
+  1: string          name,             // one of the types in PrimitiveTypes or CollectionTypes or User defined types
+  2: optional string type1,            // object type if the name is 'list' (LIST_TYPE), key type if the name is 'map' (MAP_TYPE)
+  3: optional string type2,            // val type if the name is 'map' (MAP_TYPE)
+  //4: optional list<FieldSchema> fields // if the name is one of the user defined types
+}
+
+enum HiveObjectType {
+  GLOBAL = 1,
+  DATABASE = 2,
+  TABLE = 3,
+  PARTITION = 4,
+  COLUMN = 5,
+}
+
+enum PrincipalType {
+  USER = 1,
+  ROLE = 2,
+  GROUP = 3,
+}
+
+const string HIVE_FILTER_FIELD_OWNER = "hive_filter_field_owner__"
+const string HIVE_FILTER_FIELD_PARAMS = "hive_filter_field_params__"
+const string HIVE_FILTER_FIELD_LAST_ACCESS = "hive_filter_field_last_access__"
+
+enum PartitionEventType {
+  LOAD_DONE = 1,
+}
+
+struct HiveObjectRef{
+  1: HiveObjectType objectType,
+  2: string dbName,
+  3: string objectName,
+  4: list<string> partValues,
+  5: string columnName,
+}
+
+struct PrivilegeGrantInfo {
+  1: string privilege,
+  2: i32 createTime,
+  3: string grantor,
+  4: PrincipalType grantorType,
+  5: bool grantOption,
+}
+
+struct HiveObjectPrivilege {
+  1: HiveObjectRef  hiveObject,
+  2: string principalName,
+  3: PrincipalType principalType,
+  4: PrivilegeGrantInfo grantInfo,
+}
+
+struct PrivilegeBag {
+  1: list<HiveObjectPrivilege> privileges,
+}
+
+struct PrincipalPrivilegeSet {
+  1: map<string, list<PrivilegeGrantInfo>> userPrivileges, // user name -> privilege grant info
+  2: map<string, list<PrivilegeGrantInfo>> groupPrivileges, // group name -> privilege grant info
+  3: map<string, list<PrivilegeGrantInfo>> rolePrivileges, //role name -> privilege grant info
+}
+
+struct Role {
+  1: string roleName,
+  2: i32 createTime,
+  3: string ownerName,
+}
+
+// namespace for tables
+struct Database {
+  1: string name,
+  2: string description,
+  3: string locationUri,
+  4: map<string, string> parameters, // properties associated with the database
+  5: optional PrincipalPrivilegeSet privileges
+}
+
+// This object holds the information needed by SerDes
+struct SerDeInfo {
+  1: string name,                   // name of the serde, table name by default
+  2: string serializationLib,       // usually the class that implements the extractor & loader
+  3: map<string, string> parameters // initialization parameters
+}
+
+// sort order of a column (column name along with asc(1)/desc(0))
+struct Order {
+  1: string col,  // sort column name
+  2: i32    order // asc(1) or desc(0)
+}
+
+// this object holds all the information about physical storage of the data belonging to a table
+struct StorageDescriptor {
+  1: list<FieldSchema> cols,  // required (refer to types defined above)
+  2: string location,         // defaults to <warehouse loc>/<db loc>/tablename
+  3: string inputFormat,      // SequenceFileInputFormat (binary) or TextInputFormat`  or custom format
+  4: string outputFormat,     // SequenceFileOutputFormat (binary) or IgnoreKeyTextOutputFormat or custom format
+  5: bool   compressed,       // compressed or not
+  6: i32    numBuckets,       // this must be specified if there are any dimension columns
+  7: SerDeInfo    serdeInfo,  // serialization and deserialization information
+  8: list<string> bucketCols, // reducer grouping columns and clustering columns and bucketing columns`
+  9: list<Order>  sortCols,   // sort order of the data in each bucket
+  10: map<string, string> parameters // any user supplied key value hash
+}
+
+// table information
+struct Table {
+  1: string tableName,                // name of the table
+  2: string dbName,                   // database name ('default')
+  3: string owner,                    // owner of this table
+  4: i32    createTime,               // creation time of the table
+  5: i32    lastAccessTime,           // last access time (usually this will be filled from HDFS and shouldn't be relied on)
+  6: i32    retention,                // retention time
+  7: StorageDescriptor sd,            // storage descriptor of the table
+  8: list<FieldSchema> partitionKeys, // partition keys of the table. only primitive types are supported
+  9: map<string, string> parameters,   // to store comments or any other user level parameters
+  10: string viewOriginalText,         // original view text, null for non-view
+  11: string viewExpandedText,         // expanded view text, null for non-view
+  12: string tableType,                 // table type enum, e.g. EXTERNAL_TABLE
+  13: optional PrincipalPrivilegeSet privileges,
+}
+
+struct Partition {
+  1: list<string> values // string value is converted to appropriate partition key type
+  2: string       dbName,
+  3: string       tableName,
+  4: i32          createTime,
+  5: i32          lastAccessTime,
+  6: StorageDescriptor   sd,
+  7: map<string, string> parameters,
+  8: optional PrincipalPrivilegeSet privileges
+}
+
+struct Index {
+  1: string       indexName, // unique with in the whole database namespace
+  2: string       indexHandlerClass, // reserved
+  3: string       dbName,
+  4: string       origTableName,
+  5: i32          createTime,
+  6: i32          lastAccessTime,
+  7: string       indexTableName,
+  8: StorageDescriptor   sd,
+  9: map<string, string> parameters,
+  10: bool         deferredRebuild
+}
+
+// schema of the table/query results etc.
+struct Schema {
+ // column names, types, comments
+ 1: list<FieldSchema> fieldSchemas,  // delimiters etc
+ 2: map<string, string> properties
+}
+
+// Key-value store to be used with selected
+// Metastore APIs (create, alter methods).
+// The client can pass environment properties / configs that can be
+// accessed in hooks.
+struct EnvironmentContext {
+  1: map<string, string> properties
+}
+
+exception MetaException {
+  1: string message
+}
+
+exception UnknownTableException {
+  1: string message
+}
+
+exception UnknownDBException {
+  1: string message
+}
+
+exception AlreadyExistsException {
+  1: string message
+}
+
+exception InvalidPartitionException {
+  1: string message
+}
+
+exception UnknownPartitionException {
+  1: string message
+}
+
+exception InvalidObjectException {
+  1: string message
+}
+
+exception NoSuchObjectException {
+  1: string message
+}
+
+exception IndexAlreadyExistsException {
+  1: string message
+}
+
+exception InvalidOperationException {
+  1: string message
+}
+
+exception ConfigValSecurityException {
+  1: string message
+}
+
+/**
+* This interface is live.
+*/
+service ThriftHiveMetastore extends fb303.FacebookService
+{
+  void create_database(1:Database database) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
+  Database get_database(1:string name) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  void drop_database(1:string name, 2:bool deleteData, 3:bool cascade) throws(1:NoSuchObjectException o1, 2:InvalidOperationException o2, 3:MetaException o3)
+  list<string> get_databases(1:string pattern) throws(1:MetaException o1)
+  list<string> get_all_databases() throws(1:MetaException o1)
+  void alter_database(1:string dbname, 2:Database db) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // returns the type with given name (make seperate calls for the dependent types if needed)
+  Type get_type(1:string name)  throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  bool create_type(1:Type type) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3)
+  bool drop_type(1:string type) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  map<string, Type> get_type_all(1:string name)
+                                throws(1:MetaException o2)
+
+  // Gets a list of FieldSchemas describing the columns of a particular table
+  list<FieldSchema> get_fields(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3),
+
+  // Gets a list of FieldSchemas describing both the columns and the partition keys of a particular table
+  list<FieldSchema> get_schema(1: string db_name, 2: string table_name) throws (1: MetaException o1, 2: UnknownTableException o2, 3: UnknownDBException o3)
+
+  // create a Hive table. Following fields must be set
+  // tableName
+  // database        (only 'default' for now until Hive QL supports databases)
+  // owner           (not needed, but good to have for tracking purposes)
+  // sd.cols         (list of field schemas)
+  // sd.inputFormat  (SequenceFileInputFormat (binary like falcon tables or u_full) or TextInputFormat)
+  // sd.outputFormat (SequenceFileInputFormat (binary) or TextInputFormat)
+  // sd.serdeInfo.serializationLib (SerDe class name eg org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe
+  // * See notes on DDL_TIME
+  void create_table(1:Table tbl) throws(1:AlreadyExistsException o1, 2:InvalidObjectException o2, 3:MetaException o3, 4:NoSuchObjectException o4)
+  void create_table_with_environment_context(1:Table tbl,
+      2:EnvironmentContext environment_context)
+      throws (1:AlreadyExistsException o1,
+              2:InvalidObjectException o2, 3:MetaException o3,
+              4:NoSuchObjectException o4)
+  // drops the table and all the partitions associated with it if the table has partitions
+  // delete data (including partitions) if deleteData is set to true
+  void drop_table(1:string dbname, 2:string name, 3:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o3)
+  list<string> get_tables(1: string db_name, 2: string pattern) throws (1: MetaException o1)
+  list<string> get_all_tables(1: string db_name) throws (1: MetaException o1)
+
+  Table get_table(1:string dbname, 2:string tbl_name)
+                       throws (1:MetaException o1, 2:NoSuchObjectException o2)
+  list<Table> get_table_objects_by_name(1:string dbname, 2:list<string> tbl_names)
+				   throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
+
+  // Get a list of table names that match a filter.
+  // The filter operators are LIKE, <, <=, >, >=, =, <>
+  //
+  // In the filter statement, values interpreted as strings must be enclosed in quotes,
+  // while values interpreted as integers should not be.  Strings and integers are the only
+  // supported value types.
+  //
+  // The currently supported key names in the filter are:
+  // Constants.HIVE_FILTER_FIELD_OWNER, which filters on the tables' owner's name
+  //   and supports all filter operators
+  // Constants.HIVE_FILTER_FIELD_LAST_ACCESS, which filters on the last access times
+  //   and supports all filter operators except LIKE
+  // Constants.HIVE_FILTER_FIELD_PARAMS, which filters on the tables' parameter keys and values
+  //   and only supports the filter operators = and <>.
+  //   Append the parameter key name to HIVE_FILTER_FIELD_PARAMS in the filter statement.
+  //   For example, to filter on parameter keys called "retention", the key name in the filter
+  //   statement should be Constants.HIVE_FILTER_FIELD_PARAMS + "retention"
+  //   Also, = and <> only work for keys that exist
+  //   in the tables. E.g., if you are looking for tables where key1 <> value, it will only
+  //   look at tables that have a value for the parameter key1.
+  // Some example filter statements include:
+  // filter = Constants.HIVE_FILTER_FIELD_OWNER + " like \".*test.*\" and " +
+  //   Constants.HIVE_FILTER_FIELD_LAST_ACCESS + " = 0";
+  // filter = Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"30\" or " +
+  //   Constants.HIVE_FILTER_FIELD_PARAMS + "retention = \"90\""
+  // @param dbName
+  //          The name of the database from which you will retrieve the table names
+  // @param filterType
+  //          The type of filter
+  // @param filter
+  //          The filter string
+  // @param max_tables
+  //          The maximum number of tables returned
+  // @return  A list of table names that match the desired filter
+  list<string> get_table_names_by_filter(1:string dbname, 2:string filter, 3:i16 max_tables=-1)
+                       throws (1:MetaException o1, 2:InvalidOperationException o2, 3:UnknownDBException o3)
+
+  // alter table applies to only future partitions not for existing partitions
+  // * See notes on DDL_TIME
+  void alter_table(1:string dbname, 2:string tbl_name, 3:Table new_tbl)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+  void alter_table_with_environment_context(1:string dbname, 2:string tbl_name,
+      3:Table new_tbl, 4:EnvironmentContext environment_context)
+      throws (1:InvalidOperationException o1, 2:MetaException o2)
+  // the following applies to only tables that have partitions
+  // * See notes on DDL_TIME
+  Partition add_partition(1:Partition new_part)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition add_partition_with_environment_context(1:Partition new_part,
+      2:EnvironmentContext environment_context)
+      throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2,
+      3:MetaException o3)
+  i32 add_partitions(1:list<Partition> new_parts)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition append_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
+                       throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  Partition append_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name)
+                       throws (1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  bool drop_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  bool drop_partition_by_name(1:string db_name, 2:string tbl_name, 3:string part_name, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  Partition get_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  Partition get_partition_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals,
+      4: string user_name, 5: list<string> group_names) throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  Partition get_partition_by_name(1:string db_name 2:string tbl_name, 3:string part_name)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // returns all the partitions for this table in reverse chronological order.
+  // If max parts is given then it will return only that many.
+  list<Partition> get_partitions(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  list<Partition> get_partitions_with_auth(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1,
+     4: string user_name, 5: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  list<string> get_partition_names(1:string db_name, 2:string tbl_name, 3:i16 max_parts=-1)
+                       throws(1:MetaException o2)
+
+  // get_partition*_ps methods allow filtering by a partial partition specification,
+  // as needed for dynamic partitions. The values that are not restricted should
+  // be empty strings. Nulls were considered (instead of "") but caused errors in
+  // generated Python code. The size of part_vals may be smaller than the
+  // number of partition columns - the unspecified values are considered the same
+  // as "".
+  list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name
+  	3:list<string> part_vals, 4:i16 max_parts=-1)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+  list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1,
+     5: string user_name, 6: list<string> group_names) throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  list<string> get_partition_names_ps(1:string db_name,
+  	2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1)
+  	                   throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // get the partitions matching the given partition filter
+  list<Partition> get_partitions_by_filter(1:string db_name 2:string tbl_name
+    3:string filter, 4:i16 max_parts=-1)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // get partitions give a list of partition names
+  list<Partition> get_partitions_by_names(1:string db_name 2:string tbl_name 3:list<string> names)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  // changes the partition to the new partition object. partition is identified from the part values
+  // in the new_part
+  // * See notes on DDL_TIME
+  void alter_partition(1:string db_name, 2:string tbl_name, 3:Partition new_part)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  void alter_partition_with_environment_context(1:string db_name,
+      2:string tbl_name, 3:Partition new_part,
+      4:EnvironmentContext environment_context)
+      throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  // rename the old partition to the new partition object by changing old part values to the part values
+  // in the new_part. old partition is identified from part_vals.
+  // partition keys in new_part should be the same as those in old partition.
+  void rename_partition(1:string db_name, 2:string tbl_name, 3:list<string> part_vals, 4:Partition new_part)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+
+  // gets the value of the configuration key in the metastore server. returns
+  // defaultValue if the key does not exist. if the configuration key does not
+  // begin with "hive", "mapred", or "hdfs", a ConfigValSecurityException is
+  // thrown.
+  string get_config_value(1:string name, 2:string defaultValue)
+                          throws(1:ConfigValSecurityException o1)
+
+  // converts a partition name into a partition values array
+  list<string> partition_name_to_vals(1: string part_name)
+                          throws(1: MetaException o1)
+  // converts a partition name into a partition specification (a mapping from
+  // the partition cols to the values)
+  map<string, string> partition_name_to_spec(1: string part_name)
+                          throws(1: MetaException o1)
+
+  void markPartitionForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
+                  4:PartitionEventType eventType) throws (1: MetaException o1, 2: NoSuchObjectException o2,
+                  3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
+                  6: InvalidPartitionException o6)
+  bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name, 3:map<string,string> part_vals,
+                  4: PartitionEventType eventType) throws (1: MetaException o1, 2:NoSuchObjectException o2,
+                  3: UnknownDBException o3, 4: UnknownTableException o4, 5: UnknownPartitionException o5,
+                  6: InvalidPartitionException o6)
+
+  //index
+  Index add_index(1:Index new_index, 2: Table index_table)
+                       throws(1:InvalidObjectException o1, 2:AlreadyExistsException o2, 3:MetaException o3)
+  void alter_index(1:string dbname, 2:string base_tbl_name, 3:string idx_name, 4:Index new_idx)
+                       throws (1:InvalidOperationException o1, 2:MetaException o2)
+  bool drop_index_by_name(1:string db_name, 2:string tbl_name, 3:string index_name, 4:bool deleteData)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  Index get_index_by_name(1:string db_name 2:string tbl_name, 3:string index_name)
+                       throws(1:MetaException o1, 2:NoSuchObjectException o2)
+
+  list<Index> get_indexes(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
+  list<string> get_index_names(1:string db_name, 2:string tbl_name, 3:i16 max_indexes=-1)
+                       throws(1:MetaException o2)
+
+  //authorization privileges
+
+  bool create_role(1:Role role) throws(1:MetaException o1)
+  bool drop_role(1:string role_name) throws(1:MetaException o1)
+  list<string> get_role_names() throws(1:MetaException o1)
+  bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type,
+    4:string grantor, 5:PrincipalType grantorType, 6:bool grant_option) throws(1:MetaException o1)
+  bool revoke_role(1:string role_name, 2:string principal_name, 3:PrincipalType principal_type)
+                        throws(1:MetaException o1)
+  list<Role> list_roles(1:string principal_name, 2:PrincipalType principal_type) throws(1:MetaException o1)
+
+  PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string user_name,
+    3: list<string> group_names) throws(1:MetaException o1)
+  list<HiveObjectPrivilege> list_privileges(1:string principal_name, 2:PrincipalType principal_type,
+    3: HiveObjectRef hiveObject) throws(1:MetaException o1)
+
+  bool grant_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
+  bool revoke_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
+
+  // this is used by metastore client to send UGI information to metastore server immediately
+  // after setting up a connection.
+  list<string> set_ugi(1:string user_name, 2:list<string> group_names) throws (1:MetaException o1)
+
+  //Authentication (delegation token) interfaces
+
+  // get metastore server delegation token for use from the map/reduce tasks to authenticate
+  // to metastore server
+  string get_delegation_token(1:string token_owner, 2:string renewer_kerberos_principal_name)
+    throws (1:MetaException o1)
+
+  // method to renew delegation token obtained from metastore server
+  i64 renew_delegation_token(1:string token_str_form) throws (1:MetaException o1)
+
+  // method to cancel delegation token obtained from metastore server
+  void cancel_delegation_token(1:string token_str_form) throws (1:MetaException o1)
+}
+
+// * Note about the DDL_TIME: When creating or altering a table or a partition,
+// if the DDL_TIME is not set, the current time will be used.
+
+// For storing info about archived partitions in parameters
+
+// Whether the partition is archived
+const string IS_ARCHIVED = "is_archived",
+// The original location of the partition, before archiving. After archiving,
+// this directory will contain the archive. When the partition
+// is dropped, this directory will be deleted
+const string ORIGINAL_LOCATION = "original_location",
+
+// these should be needed only for backward compatibility with filestore
+const string META_TABLE_COLUMNS   = "columns",
+const string META_TABLE_COLUMN_TYPES   = "columns.types",
+const string BUCKET_FIELD_NAME    = "bucket_field_name",
+const string BUCKET_COUNT         = "bucket_count",
+const string FIELD_TO_DIMENSION   = "field_to_dimension",
+const string META_TABLE_NAME      = "name",
+const string META_TABLE_DB        = "db",
+const string META_TABLE_LOCATION  = "location",
+const string META_TABLE_SERDE     = "serde",
+const string META_TABLE_PARTITION_COLUMNS = "partition_columns",
+const string FILE_INPUT_FORMAT    = "file.inputformat",
+const string FILE_OUTPUT_FORMAT   = "file.outputformat",
+const string META_TABLE_STORAGE   = "storage_handler",
+
+
+
diff --git a/herringbone-main/src/test/resources/test.parquet b/herringbone-main/src/test/resources/test.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..17c1541ef121c7570dcab3414ad951c9f4918d8d
GIT binary patch
literal 916
zcmbtT-D=w~6xLF=n2<qtAtQq?gdt{wEHufSxm%$7Y1qQbmbuN<*hMVK#1N;cY+tk^
z&#`ydv+Q9Cdx0IvHkb@{Q(xFe=lj0%`RGXVx8VRIOfl_Y0|FD94lz;rys4x6GpkC$
z=l-mQfq#sT8$#m%AY!!OtpzGjDImJoZ4%eZEGLI?BC}jG4}qmu1_TPkE&Jl*<M~qF
z6e7Q5E0Heo5yI^*B1gQ4qP0M{r52kajf6zF0}ErbOhp&W4coCNp6TGW-8GS2OE4go
zw4`TF>ekSHlir;J-leLntyQ5FdUq4D-=X*Wk%K!WUAt>?Y{dWod%sQVSIaVx-6K!(
zl}zHi&qc;$vdyn0`FZf8O{tr3m%gIz<yE+Nf93X6imj40mtv!UZ&a}$san;{ayK*+
z(s3fze5Pv*lo(P{+!Tovso3ON5Kg}?X5sX_mgKivU8yV#g6bE}zbxKeOkG`M2k%|)
zAMbL5>Ou|g;RC&Aqv8LJRri%g!*=vQTiEs4=MN2M`n|nFq*G0c!8|iU*mc^^au}QF
zR{)&TPs>|GUT@0-`#hEOX;i@2sPKdF_*4W#&iRXo37`2VgW&X~KV-4b#3*2bk572;
M4E=%YGKc&47i@*Wu>b%7

literal 0
HcmV?d00001

diff --git a/herringbone-main/src/test/scala/com/stripe/herringbone/FlattenJobTest.scala b/herringbone-main/src/test/scala/com/stripe/herringbone/FlattenJobTest.scala
new file mode 100644
index 0000000..c896c47
--- /dev/null
+++ b/herringbone-main/src/test/scala/com/stripe/herringbone/FlattenJobTest.scala
@@ -0,0 +1,22 @@
+package com.stripe.herringbone.test
+
+import com.stripe.herringbone.flatten._
+import org.scalatest._
+import parquet.example.Paper
+import parquet.io.api.Binary
+
+class FlattenJobTest extends FlatSpec with Matchers {
+  def toBinary(x: Array[Byte]) = Binary.fromByteArray(x)
+
+  "truncate" should "truncate to correct length" in {
+    val consumer = new FlatConsumer(Paper.r1, "__", false)
+    val bytes = toBinary(Array[Byte](1,2,3,4))
+    assert(consumer.truncate(bytes, 3).getBytes().sameElements(Array[Byte](1,2,3)))
+  }
+
+  "truncate" should "not truncate if unnecessary" in {
+    val consumer = new FlatConsumer(Paper.r1, "__", false)
+    val bytes = toBinary(Array[Byte](1,2,3,4))
+    assert(consumer.truncate(bytes, 8) == bytes)
+  }
+}
diff --git a/herringbone-main/src/test/scala/com/stripe/herringbone/flatten/FlatConverterTest.scala b/herringbone-main/src/test/scala/com/stripe/herringbone/flatten/FlatConverterTest.scala
new file mode 100644
index 0000000..241cd23
--- /dev/null
+++ b/herringbone-main/src/test/scala/com/stripe/herringbone/flatten/FlatConverterTest.scala
@@ -0,0 +1,61 @@
+package com.stripe.herringbone.test
+
+import com.stripe.herringbone.flatten.{FlatConverter,TypeFlattener}
+
+import org.scalatest._
+import org.apache.hadoop.fs.Path
+
+import parquet.example.Paper
+import parquet.example.data.simple.SimpleGroup
+import parquet.example.data.GroupWriter
+import parquet.schema.MessageType
+import parquet.schema.PrimitiveType
+import parquet.schema.Type.Repetition.OPTIONAL
+import parquet.schema.Type.Repetition.REQUIRED
+import parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY
+
+import scala.collection.mutable.StringBuilder
+import java.io.StringWriter
+
+class FlatConverterTest extends FlatSpec with Matchers {
+
+  def nestedGroupFixture =
+    new {
+      val group = Paper.r1
+      val schema = Paper.schema
+      val flatSchema = TypeFlattener.flatten(schema, None, "__", true)
+      val flatGroup = FlatConverter.flattenGroup(group, flatSchema, "__", true)
+    }
+
+  def flatGroupFixture =
+    new {
+      val flatSchema =
+        new MessageType("Charge",
+          new PrimitiveType(REQUIRED, BINARY, "_id"),
+          new PrimitiveType(OPTIONAL, BINARY, "email"),
+          new PrimitiveType(REQUIRED, BINARY, "merchant")
+        )
+      val flatGroupMissingFields = new SimpleGroup(flatSchema)
+      flatGroupMissingFields.add("_id", "ch_1")
+      flatGroupMissingFields.add("merchant", "acct_1")
+      val flatGroupAllFields = new SimpleGroup(flatSchema)
+      flatGroupAllFields.add("email", "bob@stripe.com")
+      flatGroupAllFields.add("merchant", "acct_1")
+      flatGroupAllFields.add("_id", "ch_1")
+    }
+
+  "groupToTSV" should "convert a flattened group" in {
+    val f = nestedGroupFixture
+    val groupTSV = FlatConverter.groupToTSV(f.flatGroup, f.flatSchema, "__", true)
+    assert(groupTSV == "10\t\t20,40,60")
+  }
+
+  "groupToTSV" should "respect schema ordering, handle optional fields" in {
+    val f = flatGroupFixture
+    val missingTSV = FlatConverter.groupToTSV(f.flatGroupMissingFields, f.flatSchema, "__", true)
+    assert(missingTSV == "ch_1\t\tacct_1")
+    val allTSV = FlatConverter.groupToTSV(f.flatGroupAllFields, f.flatSchema, "__", true)
+    assert(allTSV == "ch_1\tbob@stripe.com\tacct_1")
+  }
+}
+
diff --git a/herringbone-main/src/test/scala/com/stripe/herringbone/flatten/TypeFlattenerTest.scala b/herringbone-main/src/test/scala/com/stripe/herringbone/flatten/TypeFlattenerTest.scala
new file mode 100644
index 0000000..d2eb6b2
--- /dev/null
+++ b/herringbone-main/src/test/scala/com/stripe/herringbone/flatten/TypeFlattenerTest.scala
@@ -0,0 +1,95 @@
+package com.stripe.herringbone.test
+
+import com.stripe.herringbone.flatten.TypeFlattener
+
+import org.scalatest._
+
+import parquet.schema.GroupType
+import parquet.schema.MessageType
+import parquet.schema.PrimitiveType
+import parquet.schema.Type.Repetition.OPTIONAL
+import parquet.schema.Type.Repetition.REPEATED
+import parquet.schema.Type.Repetition.REQUIRED
+import parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY
+import parquet.schema.PrimitiveType.PrimitiveTypeName.INT64
+
+class TypeFlattenerTest extends FlatSpec with Matchers {
+
+  "flatten" should "omit the idField in nested fieldname if specified" in {
+    val input = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "_id"),
+      new GroupType(OPTIONAL, "Page",
+          new PrimitiveType(OPTIONAL, BINARY, "_id")))
+
+    val expected = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "_id"),
+      new PrimitiveType(OPTIONAL, BINARY, "Page"))
+
+    val result = TypeFlattener.flatten(input, None, "__", true)
+    assert(expected == result)
+  }
+
+  "flatten" should "not omit the idField in nested fieldname if none is specified" in {
+    val input = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "_id"),
+      new GroupType(OPTIONAL, "Page",
+          new PrimitiveType(OPTIONAL, BINARY, "_id")))
+
+    val expected = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "_id"),
+      new PrimitiveType(OPTIONAL, BINARY, "Page___id"))
+
+    val result = TypeFlattener.flatten(input, None, "__", false)
+    assert(expected == result)
+  }
+
+  "flatten" should "not include repeated groups" in {
+    val input = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "_id"),
+      new GroupType(REPEATED, "Nope",
+          new PrimitiveType(REPEATED, INT64, "Never")))
+
+    val expected = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "_id"))
+
+    val result = TypeFlattener.flatten(input, None, "__", true)
+    assert(expected == result)
+  }
+
+  "flatten" should "set all fields as optional" in {
+    val input = new MessageType("Document",
+      new GroupType(OPTIONAL, "Yep",
+          new GroupType(REQUIRED, "Grouped",
+              new PrimitiveType(REQUIRED, BINARY, "Yes"),
+              new PrimitiveType(REPEATED, BINARY, "Maybe")),
+          new PrimitiveType(OPTIONAL, BINARY, "Sometimes")))
+
+    val expected = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "Yep__Grouped__Yes"),
+      new PrimitiveType(OPTIONAL, BINARY, "Yep__Grouped__Maybe"),
+      new PrimitiveType(OPTIONAL, BINARY, "Yep__Sometimes"))
+
+    val result = TypeFlattener.flatten(input, None, "__", true)
+    assert(expected == result)
+  }
+
+  "flatten" should "preserve the order of previously flattened fields" in {
+    val input = new MessageType("Document",
+      new PrimitiveType(REQUIRED, BINARY, "Old__Two"),
+      new GroupType(OPTIONAL, "New",
+        new PrimitiveType(REQUIRED, BINARY, "One")),
+      new PrimitiveType(REQUIRED, BINARY, "Old__One"))
+
+    val old = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "Old__One"),
+      new PrimitiveType(OPTIONAL, BINARY, "Old__Two"))
+
+    val expected = new MessageType("Document",
+      new PrimitiveType(OPTIONAL, BINARY, "Old__One"),
+      new PrimitiveType(OPTIONAL, BINARY, "Old__Two"),
+      new PrimitiveType(OPTIONAL, BINARY, "New__One"))
+
+    val result = TypeFlattener.flatten(input, Some(old), "__", true)
+    assert(expected == result)
+  }
+}
diff --git a/herringbone-main/src/test/scala/com/stripe/herringbone/load/FieldUtilsTest.scala b/herringbone-main/src/test/scala/com/stripe/herringbone/load/FieldUtilsTest.scala
new file mode 100644
index 0000000..2094b43
--- /dev/null
+++ b/herringbone-main/src/test/scala/com/stripe/herringbone/load/FieldUtilsTest.scala
@@ -0,0 +1,49 @@
+package com.stripe.herringbone.test.load
+
+import com.stripe.herringbone.load.{FieldUtils, HadoopFs, ImpalaHiveSchemaTypeMapper}
+import org.apache.hadoop.fs._
+import org.scalamock.scalatest.MockFactory
+import org.scalatest._
+import parquet.schema.{PrimitiveType, Type}
+
+class FieldUtilsTest extends FlatSpec with Matchers with MockFactory {
+
+  "findPartitionFields" should "find the partition field names and types" in {
+    val hadoopFs = mock[HadoopFs]
+    val path = new Path("path")
+
+    val partitions = List(("day", "123"), ("type", "foo"))
+    (hadoopFs.findPartitions _).expects(path).returning(partitions)
+
+    val expected = List("`day` int", "`type` string")
+    FieldUtils(hadoopFs, ImpalaHiveSchemaTypeMapper).findPartitionFields(path) should equal (expected)
+  }
+
+  "tableFieldsFromSchemaFields" should "find the table fields from the parquet schema" in {
+    val hadoopFs = mock[HadoopFs]
+    val optional = Type.Repetition.valueOf("OPTIONAL")
+    val input = List(
+      new PrimitiveType(optional, PrimitiveType.PrimitiveTypeName.valueOf("BINARY"), "a"),
+      new PrimitiveType(optional, PrimitiveType.PrimitiveTypeName.valueOf("INT32"), "b"),
+      new PrimitiveType(optional, PrimitiveType.PrimitiveTypeName.valueOf("INT64"), "c"),
+      new PrimitiveType(optional, PrimitiveType.PrimitiveTypeName.valueOf("INT96"), "d"),
+      new PrimitiveType(optional, PrimitiveType.PrimitiveTypeName.valueOf("DOUBLE"), "e"),
+      new PrimitiveType(optional, PrimitiveType.PrimitiveTypeName.valueOf("BOOLEAN"), "f"),
+      new PrimitiveType(optional, PrimitiveType.PrimitiveTypeName.valueOf("FLOAT"), "g"),
+      new PrimitiveType(optional, PrimitiveType.PrimitiveTypeName.valueOf("FIXED_LEN_BYTE_ARRAY"), "h")
+    )
+
+    val expected = List(
+      "`a` STRING",
+      "`b` INT",
+      "`c` BIGINT",
+      "`d` BIGINT",
+      "`e` DOUBLE",
+      "`f` BOOLEAN",
+      "`g` FLOAT",
+      "`h` BINARY"
+    )
+
+    FieldUtils(hadoopFs, ImpalaHiveSchemaTypeMapper).tableFieldsFromSchemaFields(input) should equal (expected)
+  }
+}
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..9239264
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,17 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>com.stripe</groupId>
+  <artifactId>herringbone</artifactId>
+  <version>0.0.1</version>
+  <packaging>pom</packaging>
+
+  <name>Herringbone</name>
+
+  <modules>
+    <module>herringbone-impala</module>
+    <module>herringbone-main</module>
+  </modules>
+</project>
+