Merge pull request #13 from seomoz/improve_build

Improve the build
seomoz · Jan 13, 2015 · ee9eb5b · ee9eb5b
2 parents f0cd199 + 16b7779
commit ee9eb5b
Show file tree

Hide file tree

Showing 8 changed files with 70 additions and 35 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,8 @@ mozsci/spearmanr_by_fast.cpp
 
 *.pyc
 
+.coverage
+mozsci/*.so
+
+# vim files
+*.swp
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,13 @@
+language: python
+python:
+  - 2.7
+script: make test
+virtualenv:
+  system_site_packages: true
+cache:
+  - apt
+  - pip
+install:
+  - sudo sudo apt-get -y install libatlas-base-dev libatlas-dev lib{blas,lapack}-dev python-numpy python-scipy python-matplotlib
+  - pip install -r requirements.txt --use-mirrors
+  - python setup.py build_ext --inplace
diff --git a/Makefile b/Makefile
@@ -0,0 +1,27 @@
+clean:
+	# Remove the build
+	rm -rf build dist
+	# And all of our pyc files
+	rm -f mozsci/*.pyc test/*.pyc
+	# All compiled files
+	rm -f mozsci/*.so mozsci/spearmanr_by_fast.cpp 
+	# And lastly, .coverage files
+	rm -f .coverage
+
+test: nose
+
+nose:
+	rm -rf .coverage
+	nosetests --exe --cover-package=mozsci --with-coverage --cover-branches -v --cover-erase 
+
+unittest:
+	python -m unittest discover -s test
+
+# build inplace for unit tests to pass (since they are run from this
+# top level directory we need the .so files to be in the src tree
+# when they run.
+build: clean
+	python setup.py build_ext --inplace
+
+install: build
+	python setup.py install
diff --git a/README.md b/README.md
@@ -1,14 +1,9 @@
 mozsci
 ======
 
-Data science tools from SEOMoz
+[![Build Status](https://api.travis-ci.org/seomoz/mozsci.png)](https://api.travis-ci.org/seomoz/mozsci.png)
 
-Dependencies:
-    numpy
-    scipy
-    matplotlib
-    cython
+Data science tools from Moz
 
-Includes a optional wrapper for Pybrain neural networks, in `models.pybrain_wrapper`.
-If this is used, Pybrain must also be available.
+See `.travis.yml` and `requirements.txt` for dependencies.
 
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+nose
+coverage
+Cython>=0.17
+simplejson
+pybrain==0.3.0
+scikit-learn
diff --git a/test/test_glm.py b/test/test_glm.py
@@ -105,7 +105,7 @@ def test_negative_binomial(self):
             [ (This is k*) -2.53387660e-01  -4.31184391e-01  -1.60095828e-03  -1.43475268e-02
               (This is the intercept) 2.71606920e+00]
         """
-        mydata = np.genfromtxt('data/poissonreg.csv', delimiter=',', skip_header=1)
+        mydata = np.genfromtxt('test/data/poissonreg.csv', delimiter=',', skip_header=1)
         features = mydata[:, 2:5]
 
         Y = mydata[:, 6]

diff --git a/test/test_histogram.py b/test/test_histogram.py
@@ -4,7 +4,6 @@
 import time
 
 from mozsci import histogram
-import pylab as plt
 
 
 class TestHistogram1D(unittest.TestCase):
@@ -40,19 +39,13 @@ def test_histogram1d(self):
 
         # check sampler
         t1 = time.time()
-        samples = h.sample(3e6)
+        samples = h.sample(5e6)
         t2 = time.time()
-        print "Time to sample 1D for 3e6 = " + str(t2 - t1) + " s"
 
-        # TODO: replace this "eye norm" with an actual norm
-        (counts, edges) = plt.histogram(samples, 50, normed=True)
+        (counts, edges) = np.histogram(samples, 50, normed=True)
         centers = 0.5 * (edges[1:] + edges[0:-1])
         actual_pdf = 1.0 / np.sqrt(2.0 * 3.14159) * np.exp(-centers ** 2 / 2.0)
-        fig = plt.figure(1); fig.clf()
-        plt.plot(centers, counts, label="Sample")
-        plt.plot(centers, actual_pdf, label="Actual")
-        plt.legend()
-        fig.show()
+        self.assertTrue(np.allclose(counts, actual_pdf, atol=5e-3))
 
     def test_stratified_sample(self):
         hist = histogram.Histogram1DFast(5, 0, 5)
@@ -61,29 +54,25 @@ def test_stratified_sample(self):
 
         hist.compute_pdf_cdf()
 
-        # generate a 1e6 size sample
+        # generate a sample
         x = hist.sample(int(hist.bin_count.sum()))
 
-        # now sample the large sample in 2 ways
-        #  uniformly
-        #  stratified
-        sample_size = [500, 300, 100, 98, 2]
+        # now do a stratified sample of the large sample
+        sample_size = [5000, 3000, 1000, 250, 2]
         x_stratified_sample = hist.stratified_sample(x, sample_size)
         hist_check = histogram.Histogram1DFast(5, 0, 5)
         hist_check.update(x_stratified_sample)
 
-        # this "eye norm" too needs to be replaced
-        fig = plt.figure(101)
-        fig.clf()
-        plt.plot(sample_size, 'bo', label='ideal')
-        plt.plot(hist_check.bin_count, 'rx', label='actual sample')
-        plt.legend()
-        plt.title("1D stratified sampling")
-        fig.show()
+        # check that the actual sample distribution matches the expected
+        # one.  We expect a small relative difference in all entries
+        # except the last (where we expect a small absolute difference)
+        self.assertTrue(np.allclose(1.0,
+            hist_check.bin_count[:-1].astype(np.float) / sample_size[:-1],
+            atol=0.10, rtol=0.0))
+        self.assertTrue(abs(hist_check.bin_count[-1] - sample_size[-1]) < 3)
+
 
 if __name__ == "__main__":
     unittest.main()
 
 
-
-
diff --git a/test/test_pybrain_wrapper.py b/test/test_pybrain_wrapper.py
@@ -13,7 +13,7 @@ def train_xor(self, learning_rate=0.1):
                [1, 0],
                [1, 1]])
         y = np.array([0, 1, 1, 0])
-        net = PyBrainNN(learning_rate=learning_rate, maxiterations=10000, lam=0.0, args=(2, 3, 1), kwargs={'fast':True, 'bias':True})
+        net = PyBrainNN(learning_rate=learning_rate, maxiterations=10000, lam=0.0, args=(2, 3, 1), kwargs={'bias':True})
         net.fit(X, y)
 
         return net, X, y