Skip to content
Permalink
Browse files

initial commit

  • Loading branch information
nicolaskruchten committed Dec 4, 2015
0 parents commit 1302e4cb7bd15a29f33a9519d0b551de8693dcf7
Showing 1,916 changed files with 619,036 additions and 0 deletions.
1,788 Doxyfile

Large diffs are not rendered by default.

@@ -0,0 +1,94 @@
CC := gcc
CXX := g++
PYTHON_ENABLED:=1
DOCUMENTATION_ENABLED:=1
TCMALLOC_ENABLED?=1

DOCKER_REGISTRY:=quay.io/
DOCKER_USER:=datacratic/

LOCAL_DIR?=$(HOME)/local
LOCAL_LIB_DIR?=$(LOCAL_DIR)/lib /usr/local/lib
LOCAL_INCLUDE_DIR?=$(LOCAL_DIR)/include

# Shim for the 14.04 migration
DIST_CODENAME:=$(shell lsb_release -sc)

V8_INCLUDE_PATH?=$(LOCAL_INCLUDE_DIR)/v8
MACHINE_NAME:=$(shell uname -n)

V8_LIB:=v8


-include local.mk
VIRTUALENV ?= virtualenv
PYTHON ?= $(VIRTUALENV)/bin/python
PIP ?= $(VIRTUALENV)/bin/pip
PYTHON_DEPENDENCIES_PRE_CMD ?= $(PIP) install -U pip==7.1.0
PYFLAKES ?= $(VIRTUALENV)/bin/flake8 --select=F,E9,E101
J2 ?= $(VIRTUALENV)/bin/j2
J2ENV ?= $(J2) -f env

export VIRTUALENV

default: all
.PHONY: default

BUILD ?= build
ARCH ?= $(shell uname -m)
OBJ := $(BUILD)/$(ARCH)/obj
BIN := $(BUILD)/$(ARCH)/bin
LIB := $(BUILD)/$(ARCH)/lib
TESTS := $(BUILD)/$(ARCH)/tests
TMPBIN := $(BUILD)/$(ARCH)/tmp
INC := $(BUILD)/$(ARCH)/include
SRC := .
TMP ?= $(BUILD)/$(ARCH)/tmp
TEST_TMP := $(TESTS)
# Vars for configuration files or files that live outside bin and lib
ALTROOT := $(BUILD)/$(ARCH)/altroot
ETC := $(ALTROOT)/etc
PLUGINS := $(BUILD)/$(ARCH)/mldb_plugins

JML_BUILD := mldb/jml-build
INCLUDE := -Imldb

export BIN
export BUILD
export TEST_TMP
export TMP

include $(JML_BUILD)/arch/$(ARCH).mk

CXX_VERSION?=$(shell g++ --version | head -n1 | sed 's/.* //g')

CFLAGS += -fno-strict-overflow -msse4.2

CXXFLAGS += -Wno-deprecated -Winit-self -fno-omit-frame-pointer -std=c++0x -fno-deduce-init-list -msse3 -Wno-unused-but-set-variable -I$(LOCAL_INCLUDE_DIR) -I/usr/local/include -Wno-psabi -D__GXX_EXPERIMENTAL_CXX0X__=1 -msse4.2 -I$(V8_INCLUDE_PATH) -DNODEJS_DISABLED=1 -D_GLIBCXX_USE_NANOSLEEP=1 -D_GLIBCXX_USE_SCHED_YIELD=1
CXXLINKFLAGS += -Wl,--copy-dt-needed-entries -Wl,--no-as-needed -L/usr/local/lib
CFLAGS += -Wno-unused-but-set-variable

VALGRINDFLAGS := --suppressions=valgrind.supp --error-exitcode=1 --leak-check=full --soname-synonyms=somalloc=*tcmalloc*

$(if $(findstring x4.5,x$(CXX_VERSION)),$(eval CXXFLAGS += -Dnoexcept= -Dnullptr=NULL))
$(if $(findstring x4.8,x$(CXX_VERSION)),$(eval CXXFLAGS += -Wno-unused-local-typedefs -Wno-return-local-addr))
$(if $(findstring x4.9,x$(CXX_VERSION)),$(eval CXXFLAGS += -Wno-unused-local-typedefs))
$(if $(findstring x5.1,x$(CXX_VERSION)),$(eval CXXFLAGS += -Wno-unused-local-typedefs -Wno-unused-variable))

include $(JML_BUILD)/functions.mk
include $(JML_BUILD)/rules.mk
include $(JML_BUILD)/python.mk
include $(JML_BUILD)/tcmalloc.mk
include $(JML_BUILD)/docker.mk
include mldb/mldb_macros.mk
include mldb/release.mk

PREMAKE := 1

$(eval $(call include_sub_make,mldb))

PREMAKE := 0

$(eval $(call include_sub_make,mldb))


200 ReadMe.md
@@ -0,0 +1,200 @@
# Building and running a development Docker image

These instructions are designed for a clean installation of **Ubuntu 14.04** and its default compiler, **GCC 4.8**.

## System dependencies

For C++ code to compile, the following system packages need to be installed:

```bash
apt-get install -y git valgrind build-essential libboost-all-dev libgoogle-perftools-dev liblzma-dev libcrypto++-dev libblas-dev liblapack-dev python-virtualenv libcurl4-openssl-dev libssh2-1-dev libpython-dev libgit2-dev libv8-dev libarchive-dev
```

For Python modules to install correctly:

```bash
apt-get install -y libffi-dev libfreetype6-dev libpng12-dev libcap-dev
```

To build and run the Docker image, you will need to install Docker: https://docs.docker.com/engine/installation/ubuntulinux/

## Cloning, compiling and test

```bash
git clone git@github.com:datacratic/mldb.git
cd mldb
git submodule update --init
make dependencies
make compile
make test
```

To speed things up, consider using the `-j` option in `make` to leverage multiple cores.

Build output lands in the `build` directory and there is no `make clean` target: you can just `rm -rf build`. You can speed up recompilation after deleting your `build` directory by using `ccache`, which can be installed with `apt-get install ccache`. You can then create a file at the top of the repo directory called `local.mk` with the following contents:

```
CXX := ccache g++
CC := ccache gcc
```

*N.B.* To use `ccache` to maximum effect, you should set the cache size to something like 10GB if you have the disk space with `ccache -M 10G`.

## Building a Docker image

To *build* a development Docker image just run the following command from the top level of this repo:

```bash
nice make -j16 -k docker_mldb DOCKER_ALLOW_DIRTY=1
```

The final lines of output will give you a docker hash for this image, and the image is also tagged as `<username>_latest` where `<username>` is your Unix username on the box.

To *run* a development Docker image you just built, follow the Running instructions from http://mldb.ai/doc/#builtin/Running.md.html except where the tag there is `latest` just substitute `<username>_latest` and where the name there is `mldb` just substitute something unique to you (e.g. `<username>` is a good candidate!).

Docker images built this way will have the internal/experimental entities shown in the documentation. For external releases, the flags `RUN_STRIP=-s` is passed which, as a side effect, will hide the internal entities in the documentation.

## Basic Build System Commands

These all work from the top-level of this repo:

* `make compile` will compile all libraries, executables and tests.
* `make test` will execute all tests
* the `-k` flag will prevent `make` from stopping the first time it hits an error
* the `-j<x>` flag will cause `make` to use `<x>` cores to build
* `makerun <prog> <args>` will cause the build system to rebuild and run program `<prog>` with arguments `<args>` *so long as* you put the following into your `.bashrc` or `.profile` or equivalent:

```
makerun() {
name=$1
shift
make -j8 run_$name "$name"_ARGS="$*"
}
```

## Basic Docker commands

To see the running containers (with `-a` to see _all_ the containers):
```bash
docker ps
```

To see all the images (with `--no_trunc` to see the full image IDs):
```bash
docker images
```

To kill a running container:
```bash
docker kill <container's name>
```
To erase a killed container:
```bash
docker rm <container's name>
```

To delete an image. Note that you may have a conflict and need to delete a container before deleting an image.
```bash
docker rmi <full image ID>
```

## Advanced Docker

The easiest way to get a privileged shell into a running container is to use `docker exec`:

```
docker run ... # start your container as explained at http://mldb.ai/doc/#builtin/Running.md.html
docker exec -t -i CONTAINER_ID|CONTAINER_NAME /bin/bash
```

## Docker layers

The `mldb` docker is built on top of a few base images:

- `quay.io/datacratic/mldb_base:14.04`
- `quay.io/datacratic/baseimage:0.9.17`

Some warnings:
* if you need to rebuild a layer, you must rebuild all layers which depend on it. (all the ones above it)
* the build is done from the top level of the mldb repo
* The build process will build whatever is in the current workspace.
It does **not** depend on the current state of the upstream Git repo and what
changes have been pushed.
* **Note that there is no versioning of the resulting images at the moment**


### `quay.io/datacratic/mldb_base`

----

*All files related to this layer live in the [datacratic/mldb_base](https://github.com/datacratic/mldb_base)* **PUBLIC git repo**.
Keep that in mind when modifying its contents.

----

This layer is built on top of `baseimage`, it contains all the required system packages and python modules to run the `mldb` layer.
A change to any of these would require a rebuild of this image:

- python_requirements.txt
- python_constraints.txt
- system package (apt-get)

#### Build instructions

To rebuild this layer, run:

```
make mldb_base
docker push quay.io/datacratic/mldb_base:14.04
```

The script used to build this layer is `mldb_base/docker_create_mldb_base.sh`

A few things to keep in mind when editing/running the script:

* All these steps are _PUBLIC_. They are accessible on quay.io, so don't put private stuff in there.

Some switches are available if you need to do a custom build of that layer for some reason:

```
docker_create_mldb_base.sh [-b base_image] [-i image_name] [-w pip_wheelhouse_url]
-b base_image Base image to use (quay.io/datacratic/baseimage:0.9.17)
-i image_name Name of the resulting image (quay.io/datacratic/mldb_base:14.04)
-w pip_wheelhouse_url URL to use a a pip wheelhouse
```

### `quay.io/datacratic/baseimage:0.9.17`

This image is a fork of `phusion/baseimage:0.9.17` rebuilt to have the latest packages updates.
This image contains the base ubuntu packages and a custom init system.

See [phusion/baseimage-docker](https://github.com/phusion/baseimage-docker) for details.

#### Build instructions

If the mldb_base layer does a lot of packages upgrade during its creation, it would be useful to rebuild this layer.
To do so, run the following commands from the top of the mldb repo:

```
make baseimage
docker push quay.io/datacratic/baseimage:latest
```

## S3 Credentials

Some tests require S3 credentials in order to run. These credentials are
nothing special---they simply require read-only access to public S3 buckets.
But they need to be enabled for full test coverage.

To enable these tests, you need to create a file ~/.cloud_credentials with a
line with the following format (the fields are tab separated):

```
s3 1 AKRESTOFACCESSKEYID SeCrEtOfKeyGoeShEre
```

The MLDB tests will pick up this file when executing. The Makefile will check
for the presence of the file containing an S3 line when deciding whether or
not to enable those tests.
@@ -0,0 +1,77 @@
// This file is part of MLDB. Copyright 2015 Datacratic. All rights reserved.

/** abort.cc -*- C++ -*-
Rémi Attab, 13 Nov 2012
Copyright (c) 2012 Datacratic. All rights reserved.
Utilities related to the abort() function
*/

#include "abort.h"

#include <cstdlib>
#include <iostream>

using namespace std;


namespace ML {

namespace {


/******************************************************************************/
/* COMPILE SETTING */
/******************************************************************************/

#ifndef JML_ABORT
# define JML_ABORT 0
#else
# undef JML_ABORT
# define JML_ABORT 1
#endif

enum { COMPILE_STATE = JML_ABORT };


/******************************************************************************/
/* ABORT STATE */
/******************************************************************************/

struct AbortState {

AbortState() :
state(COMPILE_STATE)
{
state = state || getenv("JML_ABORT") != NULL;
}

bool state;
} staticState;

}; // namespace anonymous


/******************************************************************************/
/* INTERFACE */
/******************************************************************************/

void do_abort()
{
if (staticState.state) abort();
}

bool get_abort_state()
{
return staticState.state;
}

void set_abort_state(bool b)
{
staticState.state = b;
}



} // namepsace ML

0 comments on commit 1302e4c

Please sign in to comment.
You can’t perform that action at this time.