Skip to content
Browse files

changed signature to not be a dictionary

  • Loading branch information...
1 parent 356286c commit 7a6e2d18ba764137d30602fc1d577afc07771755 @jonathan-taylor jonathan-taylor committed Feb 22, 2011
Showing with 114 additions and 212 deletions.
  1. +13 −9 doc/ancova.rst
  2. +81 −74 formula/ancova.py
  3. +0 −107 formula/make.py
  4. +20 −22 tests/test_design.py
View
22 doc/ancova.rst
@@ -40,7 +40,7 @@ We can recreate this example using the *ANCOVA* object
p = Factor('P', ['M', 'L'])
x = Term("X")
- f = ANCOVA({x:[(e,),(p,)]})
+ f = ANCOVA((x,e),(x,p))
print sorted(f.formula.terms)
This would output:
@@ -53,7 +53,7 @@ Interchanging the order above
.. testcode::
- f2 = ANCOVA({x:[(p,),(e,)]})
+ f2 = ANCOVA((x,p),(x,e))
print sorted(f2.formula.terms)
.. testoutput::
@@ -109,7 +109,7 @@ As well as in the *ANCOVA*
.. testcode::
- f3 = ANCOVA({x:[(p,e)]})
+ f3 = ANCOVA((x,(p,e)))
print f3.codings
print sorted(f3.formula.terms)
@@ -129,7 +129,7 @@ predict:
.. testcode::
- f4 = ANCOVA({x:[(p,e),(p,),(e,)]})
+ f4 = ANCOVA((x,(p,e)),(x,p),(x,e))
print f4.codings
print sorted(f4.formula.terms)
@@ -147,7 +147,7 @@ formula
.. testcode::
- f5 = ANCOVA({x:[(p,e),(e,),(p,)]})
+ f5 = ANCOVA((x,(p,e)),(x,e),(x,p))
print f5.codings
print sorted(f5.formula.terms)
@@ -175,7 +175,7 @@ by
.. testcode::
- f6 = ANCOVA({x:[(), (p,e),(e,),(p,)]})
+ f6 = ANCOVA(x,(x,e),(x,p),(x,(p,e)))
print f6.codings
print sorted(f6.formula.terms)
@@ -195,7 +195,9 @@ constructor of ANCOVA
.. testcode::
- f7 = ANCOVA({x:[(), (p,e),(e,),(p,)]}, add_intercept=False)
+ ANCOVA.add_interecept=False
+ f7 = ANCOVA(x,(x,(p,e)),(x,e),(x,p))
+ ANCOVA.add_interecept=True
print f7.codings
print sorted(f7.formula.terms)
@@ -226,7 +228,9 @@ maps to a specific contrast.
.. testcode::
- f7 = ANCOVA({x:[(), (p,e),(e,),(p,)]}, add_intercept=False)
+ ANCOVA.add_interecept=False
+ f7 = ANCOVA(x, (x,(p,e)), (x,e), (x,p))
+ ANCOVA.add_interecept=True
print f7.contrasts
.. testoutput::
@@ -240,7 +244,7 @@ As opposed to
.. testcode::
- f3 = ANCOVA({x:[(p,e)]})
+ f3 = ANCOVA((x,(p,e)))
print f3.contrasts
which yields
View
155 formula/ancova.py
@@ -8,14 +8,6 @@
class ANCOVA(object):
- # Add a global intercept as R does (unless -1 is added in R)
- add_intercept = True
-
- # Add "main effect" for each numeric expression
- add_main_effects = False
-
- # Aliases for intercept
- aliases_for_intercept = [1, 'constant', '(Intercept)']
"""
Instantiated with a dictionary: dict([(expr, ((factor1, factor2), (factor1,)))]).
@@ -45,42 +37,62 @@ class ANCOVA(object):
"""
- def __init__(self, expr_factor_dict,
- add_intercept=True,
- add_main_effects=False,
- aliases_for_intercept=[1, 'constant', '(Intercept)']):
- self.expr_factor_dict = {}
+ add_intercept=True
+ add_main_effects=False
+ aliases_for_intercept=[1, 'constant', '(Intercept)']
- # create a copy of expr_factor_dict
+ def __init__(self, *expr_factor_tuples):
+ self.graded_dict = {}
+
+ # create a copy of graded_dict
# removing duplicate tuples of factors in the values
- for expr in expr_factor_dict:
- self.expr_factor_dict[expr] = []
- for sequence_of_factors in expr_factor_dict[expr]:
- if list(sequence_of_factors) not in self.expr_factor_dict[expr]:
- self.expr_factor_dict[expr].append(list(sequence_of_factors))
+ for expr_factors in expr_factor_tuples:
+ # each element of the sequence should have the form
+ # (sympy, [factors]) or sympy
+ try:
+ expr, factors = tuple(expr_factors)
+ except TypeError: # not a sequence
+ expr, factors = expr_factors, ()
+
+ if is_factor(factors):
+ factors = [factors]
+ factors = tuple(factors)
+ self.graded_dict.setdefault(expr, {}).setdefault(len(factors), []).append(factors)
+
+ ## for sequence_of_factors in graded_dict[expr]:
+ ## if list(sequence_of_factors) not in self.graded_dict[expr]:
+ ## self.graded_dict[expr].append(list(sequence_of_factors))
# aliases for the intercept
- for s in aliases_for_intercept:
- if s in self.expr_factor_dict:
- self.expr_factor_dict[sympy.Number(1)] = list(self.expr_factor_dict[s])
- del(self.expr_factor_dict[s])
+ for s in ANCOVA.aliases_for_intercept:
+ if s in self.graded_dict:
+ for k in self.graded_dict[s].keys():
+ self.graded_dict.setdefault(sympy.Number(1), {}).setdefault(k, []).extend(self.graded_dict[s][k])
+ del(self.graded_dict[s])
# This is here because (()) == ()
# so to specify a numeric variable 'x' with no categorical variables
# we need an entry like {'x':(())} or {'x':[()]} or {'x':[None]}
- for expr in self.expr_factor_dict:
- if not self.expr_factor_dict[expr]:
- self.expr_factor_dict[expr] = [()]
+ ## for expr in self.graded_dict:
+ ## if not self.graded_dict[expr]:
+ ## self.graded_dict[expr] = [()]
+
+ if ANCOVA.add_intercept:
+ self.graded_dict.setdefault(sympy.Number(1), {})[0] = [[]]
- if add_intercept:
- self.expr_factor_dict.setdefault(sympy.Number(1), []).append([])
+ if ANCOVA.add_main_effects:
+ for expr in self.graded_dict:
+ self.graded_dict[expr][0] = [[]]
- if add_main_effects:
- for expr in self.expr_factor_dict:
- if () not in [tuple(f) for f in self.expr_factor_dict[expr]]:
- self.expr_factor_dict[expr] = list(self.expr_factor_dict[expr]) + [()]
-
+ def __repr__(self):
+ terms = []
+ for expr in self.graded_dict:
+ for order in self.graded_dict[expr]:
+ for factors in self.graded_dict[expr][order]:
+ terms.append(`(expr, tuple([f.name for f in factors]))`)
+
+ return "ANCOVA(%s)" % ','.join(terms)
@property
def sorted_factors(self):
@@ -93,11 +105,12 @@ def sorted_factors(self):
"""
if not hasattr(self, "_sorted_factors"):
self._sorted_factors = {}
- for expr in self.expr_factor_dict:
- for factors in self.expr_factor_dict[expr]:
- for factor in factors:
- if is_factor(factor) and factor.name not in self._sorted_factors:
- self._sorted_factors[factor.name] = Factor(factor.name, sorted(factor.levels))
+ for expr in self.graded_dict:
+ for order in self.graded_dict[expr]:
+ for factors in self.graded_dict[expr][order]:
+ for factor in factors:
+ if is_factor(factor) and factor.name not in self._sorted_factors:
+ self._sorted_factors[factor.name] = Factor(factor.name, sorted(factor.levels))
return self._sorted_factors
@property
@@ -108,8 +121,8 @@ def codings(self):
"""
if not hasattr(self, "_codings"):
self._codings = {}
- for expr in sorted(self.expr_factor_dict):
- self._codings[expr] = get_factor_codings(self.expr_factor_dict[expr])
+ for expr in sorted(self.graded_dict.keys()):
+ self._codings[expr] = get_factor_codings(self.graded_dict[expr])
return self._codings
@property
@@ -124,8 +137,8 @@ def contrasts(self):
self._contrasts = {}
self._contrast_reps = []
self._formulae = []
- for expr in sorted(self.expr_factor_dict.keys()):
- formulae = get_contributions(self.expr_factor_dict[expr],
+ for expr in sorted(self.graded_dict.keys()):
+ formulae = get_contributions(self.codings[expr],
self.sorted_factors)
for formula, srep in formulae:
v = formula * Formula([expr])
@@ -181,17 +194,19 @@ def Rstr(self):
"""
results = []
- for expr in self.expr_factor_dict:
+ for expr in self.graded_dict:
_expr = str(expr).replace("*", ":")
if _expr != '1':
- for factors in self.expr_factor_dict[expr]:
- results.append(':'.join([_expr] + [f.name for f in factors]))
+ for order in self.graded_dict[expr]:
+ for factors in self.graded_dict[expr][order]:
+ results.append(':'.join([_expr] + [f.name for f in factors]))
else:
- for factors in self.expr_factor_dict[expr]:
- if factors:
- results.append(':'.join([f.name for f in factors]))
- else:
- results.append('1')
+ for order in self.graded_dict[expr]:
+ for factors in self.graded_dict[expr][order]:
+ if factors:
+ results.append(':'.join([f.name for f in factors]))
+ else:
+ results.append('1')
return "+".join(results)
@property
@@ -225,31 +240,30 @@ def multiply_by_factor(self, factor):
Create a new ANCOVA with each
existing factor multiplied by factor.
"""
- expr_factor_dict = self.expr_factor_dict.copy()
- for expr in expr_factor_dict:
+ graded_dict = self.graded_dict.copy()
+ for expr in graded_dict:
result = []
- for factors in expr_factor_dict[expr]:
+ for factors in graded_dict[expr]:
result.append(list(factors) + [factor])
- expr_factor_dict[expr] = result
- return ANCOVA(expr_factor_dict)
+ graded_dict[expr] = result
+ return ANCOVA(graded_dict)
def multiply_by_expression(self, expr):
"""
Create a new ANCOVA with each
existing expression multiplied by
expr.
"""
- expr_factor_dict = {}
- for expr in self.expr_factor_dict:
- expr_factor_dict[expr * expr] = self.expr_factor_dict[expr]
- return ANCOVA(expr_factor_dict)
+ graded_dict = {}
+ for expr in self.graded_dict:
+ graded_dict[expr * expr] = self.graded_dict[expr]
+ return ANCOVA(graded_dict)
-def get_contributions(subsets_of_factors, sorted_factors):
+def get_contributions(codings, sorted_factors):
"""
Determine which columns a subset of factors
"""
- codings = get_factor_codings(subsets_of_factors)
if codings:
formulae = []
for prod_of_factors in codings:
@@ -264,27 +278,20 @@ def get_contributions(subsets_of_factors, sorted_factors):
formulae = [(Formula([1]),'1')]
return formulae
-def get_factor_codings(subsets_of_factors):
+def get_factor_codings(graded_subsets_of_factors):
"""
Given a sequence of subsets of factors, determine
which will be coded with all their degrees of freedom ("indicator")
and which will be coded as contrasts ("contrast").
"""
formula = Formula([])
all_factors = set([])
- subsets_of_names = {}
-
- for subset_of_factors in subsets_of_factors:
- all_factors = all_factors.union(set(subset_of_factors))
- t = [f.name for f in subset_of_factors]
- subsets_of_names.setdefault(len(t), []).append(t)
-
- if subsets_of_names and all_factors:
- # R uses the sorted levels as well
- graded_subsets_of_names = []
- for i in sorted(subsets_of_names.keys()):
- graded_subsets_of_names += subsets_of_names[i]
+ graded_subsets_of_names = []
+ for order in graded_subsets_of_factors:
+ graded_subsets_of_names.extend([sorted([f.name for f in factors]) for
+ factors in graded_subsets_of_factors[order]])
+ if graded_subsets_of_names != [[]]:
codings = factor_codings(*[sorted(f) for f in graded_subsets_of_names])
else:
codings = {}
View
107 formula/make.py
@@ -1,107 +0,0 @@
-#!/usr/bin/env python
-import fileinput
-import glob
-import os
-import shutil
-import sys
-
-def check_build():
- build_dirs = ['build', 'build/doctrees', 'build/html', 'build/latex',
- '_static', '_templates']
- for d in build_dirs:
- try:
- os.mkdir(d)
- except OSError:
- pass
-
-def figs():
- os.system('cd pyplots/ && python make.py')
-
-def html():
- check_build()
- if small_docs:
- options = "-D plot_formats=\"['png']\""
- else:
- options = ''
- if os.system('sphinx-build %s -P -b html -d build/doctrees . build/html' % options):
- raise SystemExit("Building HTML failed.")
-
- figures_dest_path = 'build/html/pyplots'
- if os.path.exists(figures_dest_path):
- shutil.rmtree(figures_dest_path)
- shutil.copytree('pyplots', figures_dest_path)
-
-def latex():
- check_build()
- #figs()
- if sys.platform != 'win32':
- # LaTeX format.
- if os.system('sphinx-build -b latex -d build/doctrees . build/latex'):
- raise SystemExit("Building LaTeX failed.")
-
- # Produce pdf.
- os.chdir('build/latex')
-
- # Copying the makefile produced by sphinx...
- if (os.system('pdflatex sampledoc.tex') or
- os.system('pdflatex sampledoc.tex') or
- os.system('makeindex -s python.ist Matplotlib.idx') or
- os.system('makeindex -s python.ist modMatplotlib.idx') or
- os.system('pdflatex sampledoc.tex')):
- raise SystemExit("Rendering LaTeX failed.")
-
- os.chdir('../..')
- else:
- print 'latex build has not been tested on windows'
-
-def clean():
- for dirpath in ['build', 'examples']:
- if os.path.exists(dirpath):
- shutil.rmtree(dirpath)
- for pattern in ['mpl_examples/api/*.png',
- 'mpl_examples/pylab_examples/*.png',
- 'mpl_examples/pylab_examples/*.pdf',
- 'mpl_examples/units/*.png',
- 'pyplots/tex_demo.png',
- '_static/matplotlibrc',
- '_templates/gallery.html']:
- for filename in glob.glob(pattern):
- if os.path.exists(filename):
- os.remove(filename)
-
-def all():
- #figs()
- html()
- latex()
-
-
-funcd = {
- 'figs' : figs,
- 'html' : html,
- 'latex' : latex,
- 'clean' : clean,
- 'all' : all,
- }
-
-
-small_docs = False
-
-# Change directory to the one containing this file
-current_dir = os.getcwd()
-os.chdir(os.path.dirname(os.path.join(current_dir, __file__)))
-
-if len(sys.argv)>1:
- if '--small' in sys.argv[1:]:
- small_docs = True
- sys.argv.remove('--small')
- for arg in sys.argv[1:]:
- func = funcd.get(arg)
- if func is None:
- raise SystemExit('Do not know how to handle %s; valid args are %s'%(
- arg, funcd.keys()))
- func()
-else:
- small_docs = False
- all()
-os.chdir(current_dir)
-os.system('killall -9 R')
View
42 tests/test_design.py
@@ -32,15 +32,12 @@ def random_recarray(size):
def random_categorical_formula(size=500):
nterms = np.random.poisson(5)
X, n, c = random_recarray(size)
- d = {}
+ d = []
for _ in range(nterms):
- expr = random_subset(n, np.random.binomial(len(n), 0.5))
- f = []
- for _ in range(np.random.poisson(2)):
- factors = random_subset(c, np.random.poisson(1))
- f.append(np.unique(factors))
- d[np.product(np.unique(expr))] = f
- return ANCOVA(d)
+ expr = np.product(np.unique(random_subset(n, np.random.binomial(len(n), 0.5))))
+ factors = tuple(set(random_subset(c, np.random.poisson(1))))
+ d.append((expr, factors))
+ return ANCOVA(*tuple(set(d)))
def random_from_factor(factor, size):
return random_subset(factor.levels, size)
@@ -60,11 +57,12 @@ def random_from_terms_factors(terms, factors, size):
def random_from_categorical_formula(cf, size):
exprs = []
factors = []
- for key, value in cf.expr_factor_dict.items():
+ for key, value in cf.graded_dict.items():
if str(key) != '1':
exprs += str(key).split("*")
- for fs in value:
- factors += list(fs)
+ for order in value:
+ for fs in value[order]:
+ factors += list(fs)
return random_from_terms_factors(list(set(exprs)), list(set(factors)), size)
def simple():
@@ -73,10 +71,8 @@ def simple():
f = Factor('f', ['a','b','c'])
g = Factor('g', ['aa','bb','cc'])
h = Factor('h', ['a','b','c','d','e','f','g','h','i','j'])
- d = ANCOVA({x*y:((g,f),),x:([f],[g,f]),
- 1:([g],),
- z:([h],[h,g,f]),
- x*y*z:([h],[f])})
+ d = ANCOVA((x*y,(g,f)),(x,(f,)),(x,[g,f]),(1,[g]),
+ (z,[h]),(z,[h,g,f]), (x*y*z,[h]),(x*y*z,[f]))
return d
@@ -86,25 +82,27 @@ def simple2():
f = Factor('f', ['a','b','c'])
g = Factor('g', ['aa','bb','cc'])
h = Factor('h', ['a','b','c','d','e','f','g','h','i','j'])
- d = ANCOVA({x*y:((g,f),),x:([f],[g,f]),
- 1:([g],),
- z:([h],[h,g,f]),
- x*y*z:([f],[h])})
+ d = ANCOVA((x*y,(g,f)),(x,(f,)),(x,[g,f]),(1,[g]),
+ (z,[h]),(z,[h,g,f]), (x*y*z,[f]),(x*y*z,[h]))
return d
-def testR(d=simple(), size=500):
-
+def testR(d=None, size=500):
+ if d is None:
+ d = simple()
X = random_from_categorical_formula(d, size)
X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size))
+
fname = tempfile.mktemp()
ML.rec2csv(X, fname)
+
Rstr = '''
data = read.table("%s", sep=',', header=T)
cur.lm = lm(response ~ %s, data)
COEF = coef(cur.lm)
''' % (fname, d.Rstr)
+
rpy2.robjects.r(Rstr)
remove(fname)
nR = list(np.array(rpy2.robjects.r("names(COEF)")))
@@ -124,7 +122,7 @@ def testR(d=simple(), size=500):
return d, X, nR, nF
def test2():
- testR()
+ testR(d=random_categorical_formula())
def test3():
testR(d=simple2())

0 comments on commit 7a6e2d1

Please sign in to comment.
Something went wrong with that request. Please try again.