Skip to content

Commit

Permalink
Merge pull request #13 from seomoz/improve_build
Browse files Browse the repository at this point in the history
Improve the build
  • Loading branch information
Matthew Peters committed Jan 13, 2015
2 parents f0cd199 + 16b7779 commit ee9eb5b
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 35 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,8 @@ mozsci/spearmanr_by_fast.cpp

*.pyc

.coverage
mozsci/*.so

# vim files
*.swp
13 changes: 13 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
language: python
python:
- 2.7
script: make test
virtualenv:
system_site_packages: true
cache:
- apt
- pip
install:
- sudo sudo apt-get -y install libatlas-base-dev libatlas-dev lib{blas,lapack}-dev python-numpy python-scipy python-matplotlib
- pip install -r requirements.txt --use-mirrors
- python setup.py build_ext --inplace
27 changes: 27 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
clean:
# Remove the build
rm -rf build dist
# And all of our pyc files
rm -f mozsci/*.pyc test/*.pyc
# All compiled files
rm -f mozsci/*.so mozsci/spearmanr_by_fast.cpp
# And lastly, .coverage files
rm -f .coverage

test: nose

nose:
rm -rf .coverage
nosetests --exe --cover-package=mozsci --with-coverage --cover-branches -v --cover-erase

unittest:
python -m unittest discover -s test

# build inplace for unit tests to pass (since they are run from this
# top level directory we need the .so files to be in the src tree
# when they run.
build: clean
python setup.py build_ext --inplace

install: build
python setup.py install
11 changes: 3 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
mozsci
======

Data science tools from SEOMoz
[![Build Status](https://api.travis-ci.org/seomoz/mozsci.png)](https://api.travis-ci.org/seomoz/mozsci.png)

Dependencies:
numpy
scipy
matplotlib
cython
Data science tools from Moz

Includes a optional wrapper for Pybrain neural networks, in `models.pybrain_wrapper`.
If this is used, Pybrain must also be available.
See `.travis.yml` and `requirements.txt` for dependencies.

6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
nose
coverage
Cython>=0.17
simplejson
pybrain==0.3.0
scikit-learn
2 changes: 1 addition & 1 deletion test/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_negative_binomial(self):
[ (This is k*) -2.53387660e-01 -4.31184391e-01 -1.60095828e-03 -1.43475268e-02
(This is the intercept) 2.71606920e+00]
"""
mydata = np.genfromtxt('data/poissonreg.csv', delimiter=',', skip_header=1)
mydata = np.genfromtxt('test/data/poissonreg.csv', delimiter=',', skip_header=1)
features = mydata[:, 2:5]

Y = mydata[:, 6]
Expand Down
39 changes: 14 additions & 25 deletions test/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import time

from mozsci import histogram
import pylab as plt


class TestHistogram1D(unittest.TestCase):
Expand Down Expand Up @@ -40,19 +39,13 @@ def test_histogram1d(self):

# check sampler
t1 = time.time()
samples = h.sample(3e6)
samples = h.sample(5e6)
t2 = time.time()
print "Time to sample 1D for 3e6 = " + str(t2 - t1) + " s"

# TODO: replace this "eye norm" with an actual norm
(counts, edges) = plt.histogram(samples, 50, normed=True)
(counts, edges) = np.histogram(samples, 50, normed=True)
centers = 0.5 * (edges[1:] + edges[0:-1])
actual_pdf = 1.0 / np.sqrt(2.0 * 3.14159) * np.exp(-centers ** 2 / 2.0)
fig = plt.figure(1); fig.clf()
plt.plot(centers, counts, label="Sample")
plt.plot(centers, actual_pdf, label="Actual")
plt.legend()
fig.show()
self.assertTrue(np.allclose(counts, actual_pdf, atol=5e-3))

def test_stratified_sample(self):
hist = histogram.Histogram1DFast(5, 0, 5)
Expand All @@ -61,29 +54,25 @@ def test_stratified_sample(self):

hist.compute_pdf_cdf()

# generate a 1e6 size sample
# generate a sample
x = hist.sample(int(hist.bin_count.sum()))

# now sample the large sample in 2 ways
# uniformly
# stratified
sample_size = [500, 300, 100, 98, 2]
# now do a stratified sample of the large sample
sample_size = [5000, 3000, 1000, 250, 2]
x_stratified_sample = hist.stratified_sample(x, sample_size)
hist_check = histogram.Histogram1DFast(5, 0, 5)
hist_check.update(x_stratified_sample)

# this "eye norm" too needs to be replaced
fig = plt.figure(101)
fig.clf()
plt.plot(sample_size, 'bo', label='ideal')
plt.plot(hist_check.bin_count, 'rx', label='actual sample')
plt.legend()
plt.title("1D stratified sampling")
fig.show()
# check that the actual sample distribution matches the expected
# one. We expect a small relative difference in all entries
# except the last (where we expect a small absolute difference)
self.assertTrue(np.allclose(1.0,
hist_check.bin_count[:-1].astype(np.float) / sample_size[:-1],
atol=0.10, rtol=0.0))
self.assertTrue(abs(hist_check.bin_count[-1] - sample_size[-1]) < 3)


if __name__ == "__main__":
unittest.main()




2 changes: 1 addition & 1 deletion test/test_pybrain_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def train_xor(self, learning_rate=0.1):
[1, 0],
[1, 1]])
y = np.array([0, 1, 1, 0])
net = PyBrainNN(learning_rate=learning_rate, maxiterations=10000, lam=0.0, args=(2, 3, 1), kwargs={'fast':True, 'bias':True})
net = PyBrainNN(learning_rate=learning_rate, maxiterations=10000, lam=0.0, args=(2, 3, 1), kwargs={'bias':True})
net.fit(X, y)

return net, X, y
Expand Down

0 comments on commit ee9eb5b

Please sign in to comment.