Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Introduce end-of-line normalization

  • Loading branch information...
commit aa2c2cc859cd2959b0a16894b76c282afc409685 1 parent bdbee1a
@stevenbird stevenbird authored
View
1  .gitattributes
@@ -0,0 +1 @@
+* text=auto
View
122 examples/grammars/sample_grammars/drt_glue.semtype
@@ -1,61 +1,61 @@
-########################################################################
-# DRT-Glue Semantics Formulas Using DRT and Event Representation
-#
-# Entries are made up of three parts, separated by colons (":")
-#
-# 1) The semtype name.
-# - May appear multiple times with different relationship sets (3)
-# - May "extend" other semtypes: "type(parent)"
-#
-# 2) The glue formulas.
-# - A comma-separated list of tuples representing glue formulas
-# - If the entry is an extension, then the listed formulas will be added to
-# the list from the super type
-#
-# 3) The relationship set (OPTIONAL)
-# - If not specified, then assume the entry covers ALL relationship sets
-# - If the entry is an extension, then the relationship set dictates which
-# particular entry should be extended. If no relationship set is
-# specified, then every entry of the parent type is extended.
-#
-########################################################################
-
-#Quantifiers
-def_art : (\P Q.([x],[((([y],[])+Q(y)) <-> (x = y)), P(x)]), ((v -o r) -o ((f -o Var) -o Var)))
-ex_quant : (\P Q.(([x],[])+P(x)+Q(x)), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
-univ_quant : (\P Q.([],[((([x],[])+P(x)) -> Q(x))]), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
-no_quant : (\P Q.(-(([x],[])+P(x)+Q(x))), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
-
-#Nouns
-NN : (\Q.(([x],[<word>(x)])+Q(x)), ((f -o Var) -o Var)) : [] # treat a noun missing its spec as implicitly existentially quantified
-
-NN : (\x.([],[<word>(x)]), (v -o r)) : [spec]
-NN : (\P Q.(([x],[]) + P(x) + Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[<word>(x)]), (v -o r)) : [] # treat a noun missing its spec as implicitly existentially quantified
-NNP : (\P Q.(([x],[]) + P(x) + Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[<word>(x)]), (v -o r))
-NNS(NN)
-PRP : (\P Q.(([x],[]) + P(x) + Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[PRO(x)]), (v -o r))
-
-#Verbs
-VB : (\x.([],[<word>(x)]), (subj -o f)) : [subj] #iv
-VB : (\x y.([],[<word>(x,y)]), (subj -o (obj -o f))) : [subj, obj] #tv
-VB : (\y.exists x.([],[<word>(x,y)]), (obj -o f)) : [obj] #incomplete tv
-VB : (\x y z.([],[<word>(x,y,z)]), (subj -o (obj -o (theme -o f)))) : [subj, obj, theme] #dtv
-VB : (\y z.exists x.([],[<word>(x,y,z)]), obj -o (theme -o f)) : [obj, theme] #incomplete dtv
-VB : (\x z.exists y.([],[<word>(x,y,z)]), subj -o (theme -o f)) : [subj, theme] #incomplete dtv
-VB : (\z.exists x y.([],[<word>(x,y,z)]), theme -o f) : [theme] #incomplete dtv
-VB : (\x y.([],[<word>(x,y)]), (subj -o (comp -o f))) : [subj, comp] #tv_comp
-VB : (\x P.([],[<word>(x,P)]), (subj -o ((xcomp.subj -o xcomp) -o f))) : [subj, xcomp] #equi
-VB : (\x y P.([],[<word>(x,y,P)]), (subj -o (obj -o ((xcomp.subj -o xcomp) -o f)))) : [subj, obj, xcomp] # object equi
-VB : (\P.([],[<word>(P)]), (xcomp -o f)) : [xcomp] #raising
-VBD(VB) : (\P.PAST(P), (f -o f))
-VBZ(VB)
-
-#Modifiers
-nmod : (\x.([],[<word>(x)]), f), (\P Q x.(P(x)+Q(x)), (f -o ((super.v -o super.r) -o (super.v -o super.r))))
-JJ(nmod)
-vmod : (\x.([],[<word>(x)]), f), (\P Q x.P(Q(x)), (f -o (super -o super)))
-RB(vmod)
-tense : (\P.([],[<word>(P)]), (super.f -o super.f))
-
-#Conjunctions
-cc_clause : (\P Q.(P + Q), (a -o (b -o f)))
+########################################################################
+# DRT-Glue Semantics Formulas Using DRT and Event Representation
+#
+# Entries are made up of three parts, separated by colons (":")
+#
+# 1) The semtype name.
+# - May appear multiple times with different relationship sets (3)
+# - May "extend" other semtypes: "type(parent)"
+#
+# 2) The glue formulas.
+# - A comma-separated list of tuples representing glue formulas
+# - If the entry is an extension, then the listed formulas will be added to
+# the list from the super type
+#
+# 3) The relationship set (OPTIONAL)
+# - If not specified, then assume the entry covers ALL relationship sets
+# - If the entry is an extension, then the relationship set dictates which
+# particular entry should be extended. If no relationship set is
+# specified, then every entry of the parent type is extended.
+#
+########################################################################
+
+#Quantifiers
+def_art : (\P Q.([x],[((([y],[])+Q(y)) <-> (x = y)), P(x)]), ((v -o r) -o ((f -o Var) -o Var)))
+ex_quant : (\P Q.(([x],[])+P(x)+Q(x)), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
+univ_quant : (\P Q.([],[((([x],[])+P(x)) -> Q(x))]), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
+no_quant : (\P Q.(-(([x],[])+P(x)+Q(x))), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
+
+#Nouns
+NN : (\Q.(([x],[<word>(x)])+Q(x)), ((f -o Var) -o Var)) : [] # treat a noun missing its spec as implicitly existentially quantified
+
+NN : (\x.([],[<word>(x)]), (v -o r)) : [spec]
+NN : (\P Q.(([x],[]) + P(x) + Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[<word>(x)]), (v -o r)) : [] # treat a noun missing its spec as implicitly existentially quantified
+NNP : (\P Q.(([x],[]) + P(x) + Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[<word>(x)]), (v -o r))
+NNS(NN)
+PRP : (\P Q.(([x],[]) + P(x) + Q(x)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[PRO(x)]), (v -o r))
+
+#Verbs
+VB : (\x.([],[<word>(x)]), (subj -o f)) : [subj] #iv
+VB : (\x y.([],[<word>(x,y)]), (subj -o (obj -o f))) : [subj, obj] #tv
+VB : (\y.exists x.([],[<word>(x,y)]), (obj -o f)) : [obj] #incomplete tv
+VB : (\x y z.([],[<word>(x,y,z)]), (subj -o (obj -o (theme -o f)))) : [subj, obj, theme] #dtv
+VB : (\y z.exists x.([],[<word>(x,y,z)]), obj -o (theme -o f)) : [obj, theme] #incomplete dtv
+VB : (\x z.exists y.([],[<word>(x,y,z)]), subj -o (theme -o f)) : [subj, theme] #incomplete dtv
+VB : (\z.exists x y.([],[<word>(x,y,z)]), theme -o f) : [theme] #incomplete dtv
+VB : (\x y.([],[<word>(x,y)]), (subj -o (comp -o f))) : [subj, comp] #tv_comp
+VB : (\x P.([],[<word>(x,P)]), (subj -o ((xcomp.subj -o xcomp) -o f))) : [subj, xcomp] #equi
+VB : (\x y P.([],[<word>(x,y,P)]), (subj -o (obj -o ((xcomp.subj -o xcomp) -o f)))) : [subj, obj, xcomp] # object equi
+VB : (\P.([],[<word>(P)]), (xcomp -o f)) : [xcomp] #raising
+VBD(VB) : (\P.PAST(P), (f -o f))
+VBZ(VB)
+
+#Modifiers
+nmod : (\x.([],[<word>(x)]), f), (\P Q x.(P(x)+Q(x)), (f -o ((super.v -o super.r) -o (super.v -o super.r))))
+JJ(nmod)
+vmod : (\x.([],[<word>(x)]), f), (\P Q x.P(Q(x)), (f -o (super -o super)))
+RB(vmod)
+tense : (\P.([],[<word>(P)]), (super.f -o super.f))
+
+#Conjunctions
+cc_clause : (\P Q.(P + Q), (a -o (b -o f)))
View
124 examples/grammars/sample_grammars/drt_glue_event.semtype
@@ -1,62 +1,62 @@
-########################################################################
-# DRT-Glue Semantics Formulas Using DRT and Event Representation
-#
-# Entries are made up of three parts, separated by colons (":")
-#
-# 1) The semtype name.
-# - May appear multiple times with different relationship sets (3)
-# - May "extend" other semtypes: "type(parent)"
-#
-# 2) The glue formulas.
-# - A comma-separated list of tuples representing glue formulas
-# - If the entry is an extension, then the listed formulas will be added to
-# the list from the super type
-#
-# 3) The relationship set (OPTIONAL)
-# - If not specified, then assume the entry covers ALL relationship sets
-# - If the entry is an extension, then the relationship set dictates which
-# particular entry should be extended. If no relationship set is
-# specified, then every entry of the parent type is extended.
-#
-########################################################################
-
-#Quantifiers
-def_art : (\P Q.([x],[((([y],[])+Q(y)) <-> (x = y)), P(x)]), ((v -o r) -o ((f -o Var) -o Var)))
-ex_quant : (\P Q.(([x],[])+P(x)+Q(x)), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
-univ_quant : (\P Q.([],[((([x],[])+P(x)) -> Q(x))]), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
-no_quant : (\P Q.(-(([x],[])+P(x)+Q(x))), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
-
-#Nouns
-NN : (\x.([],[<word>(x)]), (v -o r)) : [spec]
-NN : (\P Q e.(([x],[]) + P(x) + Q(x,e)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[<word>(x)]), (v -o r)) : [] # treat a noun missing its spec as implicitly existentially quantified
-NNP : (\P Q e.(([x],[]) + P(x) + Q(x,e)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[<word>(x)]), (v -o r))
-NNS(NN)
-PRP : (\P Q e.(([x],[]) + P(x) + Q(x,e)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[PRO(x)]), (v -o r))
-
-#Verbs
-VB : (\x e.([],[<word>(e),subj(e,x)]), (subj -o f)) [subj] #iv
-VB : (\x y e.([],[<word>(e), subj(e,x), obj(e,y)]), (subj -o (obj -o f))) : [subj, obj] #tv
-VB : (\x y z e.([],[<word>(e), subj(e,x), obj(e,y), theme(e,z)]), (subj -o (obj -o (theme -o f)))) : [subj, obj, theme] #dtv
-VB : (\y z e.([x],[<word>(e), subj(e,x), obj(e,y), theme(e,z)]), obj -o (theme -o f)) : [obj, theme] #incomplete dtv
-VB : (\x z e.([y],[<word>(e), subj(e,x), obj(e,y), theme(e,z)]), subj -o (theme -o f)) : [subj, theme] #incomplete dtv
-VB : (\z e.([x,y],[<word>(e), subj(e,x), obj(e,y), theme(e,z)]), theme -o f) : [theme] #incomplete dtv
-VB : (\x y e.(([],[<word>(e), subj(e,x), comp(e,y)])+P(e)), (subj -o (comp -o f))) : [subj, comp] #tv_comp
-VB : (\x P e.([],[<word>(e), subj(e,x), xcomp(e,P)]), (subj -o ((xcomp.subj -o xcomp) -o f))) : [subj, xcomp] #equi
-VB : (\x y P e.([],[<word>(e), subj(e,x), obj(e,y), (xcomp e P)]), (subj -o (obj -o ((xcomp.subj -o xcomp) -o f)))) : [subj, obj, xcomp] # object equi
-VB : (\P e.([],[<word>(e), xcomp(e,P)]), (xcomp -o f)) : [xcomp] #raising
-VBD(VB) : (\P.PAST(P), (f -o f))
-VBZ(VB)
-
-#Modifiers
-nmod : (\x.([],[<word>(x)]), f), (\P Q x.(P(x)+Q(x)), (f -o ((super.v -o super.r) -o (super.v -o super.r)))) : []
-JJ(nmod) : []
-vmod : (\x.([],[<word>(x)]), f), (\P Q x.P(Q(x)), (f -o (super -o super))) : []
-RB(vmod) : []
-tense(vmod) : []
-
-#Prepositions
-IN : (\P Q e1.P(\x e2.(([],[<word>(e2,x)]) + Q(e2)),e1), ((subj -o subj.var) -o subj.var) -o (super -o super)) : [subj]
-IN(vmod) : []
-
-#Conjunctions
-cc_clause : (\P Q.(P + Q), (a -o (b -o f)))
+########################################################################
+# DRT-Glue Semantics Formulas Using DRT and Event Representation
+#
+# Entries are made up of three parts, separated by colons (":")
+#
+# 1) The semtype name.
+# - May appear multiple times with different relationship sets (3)
+# - May "extend" other semtypes: "type(parent)"
+#
+# 2) The glue formulas.
+# - A comma-separated list of tuples representing glue formulas
+# - If the entry is an extension, then the listed formulas will be added to
+# the list from the super type
+#
+# 3) The relationship set (OPTIONAL)
+# - If not specified, then assume the entry covers ALL relationship sets
+# - If the entry is an extension, then the relationship set dictates which
+# particular entry should be extended. If no relationship set is
+# specified, then every entry of the parent type is extended.
+#
+########################################################################
+
+#Quantifiers
+def_art : (\P Q.([x],[((([y],[])+Q(y)) <-> (x = y)), P(x)]), ((v -o r) -o ((f -o Var) -o Var)))
+ex_quant : (\P Q.(([x],[])+P(x)+Q(x)), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
+univ_quant : (\P Q.([],[((([x],[])+P(x)) -> Q(x))]), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
+no_quant : (\P Q.(-(([x],[])+P(x)+Q(x))), ((super.v -o super.r) -o ((super.f -o super.var) -o super.var)))
+
+#Nouns
+NN : (\x.([],[<word>(x)]), (v -o r)) : [spec]
+NN : (\P Q e.(([x],[]) + P(x) + Q(x,e)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[<word>(x)]), (v -o r)) : [] # treat a noun missing its spec as implicitly existentially quantified
+NNP : (\P Q e.(([x],[]) + P(x) + Q(x,e)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[<word>(x)]), (v -o r))
+NNS(NN)
+PRP : (\P Q e.(([x],[]) + P(x) + Q(x,e)), ((v -o r) -o ((f -o var) -o var))), (\x.([],[PRO(x)]), (v -o r))
+
+#Verbs
+VB : (\x e.([],[<word>(e),subj(e,x)]), (subj -o f)) [subj] #iv
+VB : (\x y e.([],[<word>(e), subj(e,x), obj(e,y)]), (subj -o (obj -o f))) : [subj, obj] #tv
+VB : (\x y z e.([],[<word>(e), subj(e,x), obj(e,y), theme(e,z)]), (subj -o (obj -o (theme -o f)))) : [subj, obj, theme] #dtv
+VB : (\y z e.([x],[<word>(e), subj(e,x), obj(e,y), theme(e,z)]), obj -o (theme -o f)) : [obj, theme] #incomplete dtv
+VB : (\x z e.([y],[<word>(e), subj(e,x), obj(e,y), theme(e,z)]), subj -o (theme -o f)) : [subj, theme] #incomplete dtv
+VB : (\z e.([x,y],[<word>(e), subj(e,x), obj(e,y), theme(e,z)]), theme -o f) : [theme] #incomplete dtv
+VB : (\x y e.(([],[<word>(e), subj(e,x), comp(e,y)])+P(e)), (subj -o (comp -o f))) : [subj, comp] #tv_comp
+VB : (\x P e.([],[<word>(e), subj(e,x), xcomp(e,P)]), (subj -o ((xcomp.subj -o xcomp) -o f))) : [subj, xcomp] #equi
+VB : (\x y P e.([],[<word>(e), subj(e,x), obj(e,y), (xcomp e P)]), (subj -o (obj -o ((xcomp.subj -o xcomp) -o f)))) : [subj, obj, xcomp] # object equi
+VB : (\P e.([],[<word>(e), xcomp(e,P)]), (xcomp -o f)) : [xcomp] #raising
+VBD(VB) : (\P.PAST(P), (f -o f))
+VBZ(VB)
+
+#Modifiers
+nmod : (\x.([],[<word>(x)]), f), (\P Q x.(P(x)+Q(x)), (f -o ((super.v -o super.r) -o (super.v -o super.r)))) : []
+JJ(nmod) : []
+vmod : (\x.([],[<word>(x)]), f), (\P Q x.P(Q(x)), (f -o (super -o super))) : []
+RB(vmod) : []
+tense(vmod) : []
+
+#Prepositions
+IN : (\P Q e1.P(\x e2.(([],[<word>(e2,x)]) + Q(e2)),e1), ((subj -o subj.var) -o subj.var) -o (super -o super)) : [subj]
+IN(vmod) : []
+
+#Conjunctions
+cc_clause : (\P Q.(P + Q), (a -o (b -o f)))
View
54 examples/grammars/sample_grammars/glue_train.conll
@@ -1,27 +1,27 @@
-1 John _ NNP _ _ 2 SUBJ _ _
-2 runs _ VB _ _ 0 ROOT _ _
-
-1 a _ DT _ _ 2 SPEC _ _
-2 man _ NN _ _ 3 SUBJ _ _
-3 runs _ VB _ _ 0 ROOT _ _
-
-1 John _ NNP _ _ 2 SUBJ _ _
-2 sees _ VB _ _ 0 ROOT _ _
-3 Mary _ NNP _ _ 2 OBJ _ _
-
-1 every _ DT _ _ 2 SPEC _ _
-2 girl _ NN _ _ 3 SUBJ _ _
-3 chases _ VB _ _ 0 ROOT _ _
-4 an _ DT _ _ 5 SPEC _ _
-5 animal _ NN _ _ 3 OBJ _ _
-
-1 Bill _ NNP _ _ 2 SUBJ _ _
-2 sees _ VB _ _ 0 ROOT _ _
-3 a _ DT _ _ 4 SPEC _ _
-4 dog _ NN _ _ 2 OBJ _ _
-
-1 every _ DT _ _ 2 SPEC _ _
-2 girl _ NN _ _ 3 SUBJ _ _
-3 chases _ VB _ _ 0 ROOT _ _
-4 John _ NNP _ _ 3 OBJ _ _
-
+1 John _ NNP _ _ 2 SUBJ _ _
+2 runs _ VB _ _ 0 ROOT _ _
+
+1 a _ DT _ _ 2 SPEC _ _
+2 man _ NN _ _ 3 SUBJ _ _
+3 runs _ VB _ _ 0 ROOT _ _
+
+1 John _ NNP _ _ 2 SUBJ _ _
+2 sees _ VB _ _ 0 ROOT _ _
+3 Mary _ NNP _ _ 2 OBJ _ _
+
+1 every _ DT _ _ 2 SPEC _ _
+2 girl _ NN _ _ 3 SUBJ _ _
+3 chases _ VB _ _ 0 ROOT _ _
+4 an _ DT _ _ 5 SPEC _ _
+5 animal _ NN _ _ 3 OBJ _ _
+
+1 Bill _ NNP _ _ 2 SUBJ _ _
+2 sees _ VB _ _ 0 ROOT _ _
+3 a _ DT _ _ 4 SPEC _ _
+4 dog _ NN _ _ 2 OBJ _ _
+
+1 every _ DT _ _ 2 SPEC _ _
+2 girl _ NN _ _ 3 SUBJ _ _
+3 chases _ VB _ _ 0 ROOT _ _
+4 John _ NNP _ _ 3 OBJ _ _
+
View
262 examples/grammars/sample_grammars/gluesemantics.fcfg
@@ -1,131 +1,131 @@
-% start S
-
-#############################
-# Grammar Rules
-#############################
-
-# S expansion rules
-S -> NP[num=?n, case=nom] VP[num=?n]
-S -> S CC[sem=cc_clause] S
-
-# NP expansion rules
-NP[num=?n, gender=?g] -> Det[num=?n] N[num=?n, gender=?g]
-NP[num=?n, gender=?g] -> PropN[num=?n, gender=?g]
-NP[num=?n, case=?c, gender=?g] -> Pro[num=?n, case=?c, gender=?g]
-NP[num=pl, gender=?g] -> N[num=pl]
-NP[num=?n, gender=?g] -> NP[num=?n, gender=?g] PP
-NP[num=pl] -> NP CC[sem=cc_np] NP
-
-# N's can have Adjectives in front
-N[num=?n] -> JJ[type=attributive] N[num=?n]
-
-# JJs can have ADVs in front
-JJ -> ADV JJ
-
-# VP expansion rules
-VP[tense=?t, num=?n] -> IV[tense=?t, num=?n]
-VP[tense=?t, num=?n] -> TV[tense=?t, num=?n] NP[case=acc]
-VP[tense=?t, num=?n] -> TVComp[tense=?t, num=?n] S
-VP[tense=?t, num=?n] -> DTV[tense=?t, num=?n] NP[case=acc] NP[case=acc]
-VP[tense=?t, num=?n] -> EquiV[tense=?t, num=?n] TO VP[tense=inf]
-VP[tense=?t, num=?n] -> ObjEquiV[tense=?t, num=?n] NP[case=acc] TO VP[tense=inf]
-VP[tense=?t, num=?n] -> RaisingV[tense=?t, num=?n] TO VP[tense=inf]
-VP[tense=?t, num=?n] -> ADV VP[tense=?t, num=?n]
-VP[tense=?t, num=?n] -> VP[tense=?t, num=?n] PP
-VP[tense=?t, num=?n] -> VP[tense=?t, num=?n] CC[sem=cc_vp] VP[tense=?t, num=?n]
-
-# PP expansion
-PP -> IN NP
-
-# Det types
-Det[num=sg] -> DT
-Det[num=pl] -> DTS
-Det -> AT
-Det[num=?n] -> DTI[num=?n]
-Det[num=?n] -> ABN[num=?n]
-
-
-#############################
-# Lexical Rules
-#############################
-
-DT -> 'this' | 'each'
-DTS -> 'these'
-AT[num=sg, sem=ex_quant] -> 'a' | 'an'
-AT[sem=art_def] -> 'the'
-DTI[num=sg, sem=univ_quant] -> 'every'
-DTI[sem=ex_quant] -> 'some'
-ABN[num=sg] -> 'half'
-ABN[num=pl, sem=univ_quant] -> 'all'
-
-PropN[num=sg, gender=m, sem=pn] -> 'Kim' | 'Jody' | 'Mary' | 'Sue'
-PropN[num=sg, gender=m, sem=pn] -> 'David' | 'John' | 'Tom'
-PropN[num=pl, sem=pn] -> 'JM'
-
-N[num=sg, sem=n] -> 'boy' | 'car' | 'cat' | 'child' | 'criminal' | 'dog' | 'gift' | 'girl' | 'man' | 'mouse' | 'person' | 'pizza' | 'racketeer' | 'sandwich' | 'senator' | 'student' | 'telescope' | 'thing' | 'unicorn' | 'woman'
-N[num=pl, sem=n] -> 'boys' | 'cars' | 'cats' | 'children' | 'criminals' | 'dogs' | 'gifts' | 'girls' | 'men' | 'mice' | 'people' | 'pizzas' | 'racketeers' | 'sandwiches' | 'senators' | 'students' | 'telescopes' | 'things' | 'unicorns' | 'women'
-
-IV[tense=pres, num=sg, sem=iv] -> 'approaches' | 'comes' | 'disappears' | 'goes' | 'leaves' | 'vanishes' | 'walks' | 'yawns'
-IV[tense=pres, num=pl, sem=iv] -> 'approach' | 'come' | 'disappear' | 'go' | 'leave' | 'vanish' | 'walk' | 'yawn'
-IV[tense=past, num=?n, sem=iv] -> 'approached' | 'came' | 'disappeared' | 'went' | 'left' | 'vanished' | 'walked' | 'yawned'
-IV[tense=inf, num=na, sem=iv] -> 'approach' | 'come' | 'disappear' | 'go' | 'leave' | 'vanish' | 'walk' | 'yawn'
-
-TV[tense=pres, num=sg, sem=tv] -> 'chases' | 'eats' | 'finds' | 'likes' | 'sees' | 'orders'
-TV[tense=pres, num=pl, sem=tv] -> 'chase' | 'eat' | 'find' | 'like' | 'see' | 'order'
-TV[tense=past, num=?n, sem=tv] -> 'chased' | 'ate' | 'found' | 'liked' | 'saw' | 'ordered'
-TV[tense=inf, num=na, sem=tv] -> 'chase' | 'eat' | 'find' | 'like' | 'see' | 'order'
-
-DTV[tense=pres, num=sg, sem=dtv] -> 'gives'
-DTV[tense=pres, num=pl, sem=dtv] -> 'give'
-DTV[tense=past, num=?n, sem=dtv] -> 'gave'
-DTV[tense=inf, num=na, sem=dtv] -> 'give'
-
-TVComp[tense=pres, num=sg, sem=tv_comp] -> 'believes'
-TVComp[tense=pres, num=pl, sem=tv_comp] -> 'believe'
-TVComp[tense=past, num=?n, sem=tv_comp] -> 'believed'
-TVComp[tense=inf, num=na, sem=tv_comp] -> 'believe'
-
-EquiV[tense=pres, num=sg, sem=equi] -> 'tries'
-EquiV[tense=pres, num=pl, sem=equi] -> 'try'
-EquiV[tense=past, num=?n, sem=equi] -> 'tried'
-EquiV[tense=inf, num=na, sem=equi] -> 'try'
-
-ObjEquiV[tense=pres, num=sg, sem=obj_equi] -> 'persuades'
-ObjEquiV[tense=pres, num=pl, sem=obj_equi] -> 'persuade'
-ObjEquiV[tense=past, num=?n, sem=obj_equi] -> 'persuaded'
-ObjEquiV[tense=inf, num=na, sem=obj_equi] -> 'persuade'
-
-RaisingV[tense=pres, num=sg, sem=raising] -> 'seems'
-RaisingV[tense=pres, num=pl, sem=raising] -> 'seem'
-RaisingV[tense=past, num=?n, sem=raising] -> 'seemed'
-RaisingV[tense=inf, num=na, sem=raising] -> 'seem'
-
-#infinitive marker
-TO -> 'to'
-
-JJ[type=attributive, sem=adj_attributive_intersective] -> 'gray' | 'swedish'
-JJ[type=attributive, sem=adj_attributive_nonintersective] -> 'alleged'
-JJ[type=attributive, sem=adj_attributive_relative_intersective] -> 'big' | 'fat'
-JJ[type=attributive, sem=adj_attributive_relative_nonintersective] -> 'confessed' | 'former'
-JJ[type=predicative, sem=adj_predicative] -> 'gray' | 'swedish'
-
-ADV[sem=adv] -> 'apparently' | 'possibly' | 'very'
-ADV[sem=adv_ModifyingRelativeAdj] -> 'very'
-
-CC[sem=cc_clause] -> 'and'
-CC[sem=cc_np] -> 'and'
-CC[sem=cc_vp] -> 'and'
-
-IN -> 'at' | 'by' | 'from' | 'on' | 'with'
-
-Pro[num=sg, gender=m, -reflex, case=nom, sem=pro] -> 'he'
-Pro[num=sg, gender=m, -reflex, case=acc, sem=pro] -> 'him'
-Pro[num=sg, gender=m, +reflex, case=acc, sem=pro] -> 'himself'
-Pro[num=sg, gender=f, -reflex, sem=pro] -> 'her'
-Pro[num=sg, gender=f, +reflex, case=acc, sem=pro] -> 'herself'
-Pro[num=sg, gender=n, -reflex, sem=pro] -> 'it'
-Pro[num=sg, gender=n, +reflex, case=acc, sem=pro] -> 'itself'
-Pro[num=pl, -reflex, case=nom, sem=pro] -> 'they'
-Pro[num=pl, -reflex, case=acc, sem=pro] -> 'them'
-Pro[num=pl, +reflex, case=acc, sem=pro] -> 'themselves'
-Pro[num=pl, +reflex, case=acc, sem=recip] -> 'eachother'
+% start S
+
+#############################
+# Grammar Rules
+#############################
+
+# S expansion rules
+S -> NP[num=?n, case=nom] VP[num=?n]
+S -> S CC[sem=cc_clause] S
+
+# NP expansion rules
+NP[num=?n, gender=?g] -> Det[num=?n] N[num=?n, gender=?g]
+NP[num=?n, gender=?g] -> PropN[num=?n, gender=?g]
+NP[num=?n, case=?c, gender=?g] -> Pro[num=?n, case=?c, gender=?g]
+NP[num=pl, gender=?g] -> N[num=pl]
+NP[num=?n, gender=?g] -> NP[num=?n, gender=?g] PP
+NP[num=pl] -> NP CC[sem=cc_np] NP
+
+# N's can have Adjectives in front
+N[num=?n] -> JJ[type=attributive] N[num=?n]
+
+# JJs can have ADVs in front
+JJ -> ADV JJ
+
+# VP expansion rules
+VP[tense=?t, num=?n] -> IV[tense=?t, num=?n]
+VP[tense=?t, num=?n] -> TV[tense=?t, num=?n] NP[case=acc]
+VP[tense=?t, num=?n] -> TVComp[tense=?t, num=?n] S
+VP[tense=?t, num=?n] -> DTV[tense=?t, num=?n] NP[case=acc] NP[case=acc]
+VP[tense=?t, num=?n] -> EquiV[tense=?t, num=?n] TO VP[tense=inf]
+VP[tense=?t, num=?n] -> ObjEquiV[tense=?t, num=?n] NP[case=acc] TO VP[tense=inf]
+VP[tense=?t, num=?n] -> RaisingV[tense=?t, num=?n] TO VP[tense=inf]
+VP[tense=?t, num=?n] -> ADV VP[tense=?t, num=?n]
+VP[tense=?t, num=?n] -> VP[tense=?t, num=?n] PP
+VP[tense=?t, num=?n] -> VP[tense=?t, num=?n] CC[sem=cc_vp] VP[tense=?t, num=?n]
+
+# PP expansion
+PP -> IN NP
+
+# Det types
+Det[num=sg] -> DT
+Det[num=pl] -> DTS
+Det -> AT
+Det[num=?n] -> DTI[num=?n]
+Det[num=?n] -> ABN[num=?n]
+
+
+#############################
+# Lexical Rules
+#############################
+
+DT -> 'this' | 'each'
+DTS -> 'these'
+AT[num=sg, sem=ex_quant] -> 'a' | 'an'
+AT[sem=art_def] -> 'the'
+DTI[num=sg, sem=univ_quant] -> 'every'
+DTI[sem=ex_quant] -> 'some'
+ABN[num=sg] -> 'half'
+ABN[num=pl, sem=univ_quant] -> 'all'
+
+PropN[num=sg, gender=m, sem=pn] -> 'Kim' | 'Jody' | 'Mary' | 'Sue'
+PropN[num=sg, gender=m, sem=pn] -> 'David' | 'John' | 'Tom'
+PropN[num=pl, sem=pn] -> 'JM'
+
+N[num=sg, sem=n] -> 'boy' | 'car' | 'cat' | 'child' | 'criminal' | 'dog' | 'gift' | 'girl' | 'man' | 'mouse' | 'person' | 'pizza' | 'racketeer' | 'sandwich' | 'senator' | 'student' | 'telescope' | 'thing' | 'unicorn' | 'woman'
+N[num=pl, sem=n] -> 'boys' | 'cars' | 'cats' | 'children' | 'criminals' | 'dogs' | 'gifts' | 'girls' | 'men' | 'mice' | 'people' | 'pizzas' | 'racketeers' | 'sandwiches' | 'senators' | 'students' | 'telescopes' | 'things' | 'unicorns' | 'women'
+
+IV[tense=pres, num=sg, sem=iv] -> 'approaches' | 'comes' | 'disappears' | 'goes' | 'leaves' | 'vanishes' | 'walks' | 'yawns'
+IV[tense=pres, num=pl, sem=iv] -> 'approach' | 'come' | 'disappear' | 'go' | 'leave' | 'vanish' | 'walk' | 'yawn'
+IV[tense=past, num=?n, sem=iv] -> 'approached' | 'came' | 'disappeared' | 'went' | 'left' | 'vanished' | 'walked' | 'yawned'
+IV[tense=inf, num=na, sem=iv] -> 'approach' | 'come' | 'disappear' | 'go' | 'leave' | 'vanish' | 'walk' | 'yawn'
+
+TV[tense=pres, num=sg, sem=tv] -> 'chases' | 'eats' | 'finds' | 'likes' | 'sees' | 'orders'
+TV[tense=pres, num=pl, sem=tv] -> 'chase' | 'eat' | 'find' | 'like' | 'see' | 'order'
+TV[tense=past, num=?n, sem=tv] -> 'chased' | 'ate' | 'found' | 'liked' | 'saw' | 'ordered'
+TV[tense=inf, num=na, sem=tv] -> 'chase' | 'eat' | 'find' | 'like' | 'see' | 'order'
+
+DTV[tense=pres, num=sg, sem=dtv] -> 'gives'
+DTV[tense=pres, num=pl, sem=dtv] -> 'give'
+DTV[tense=past, num=?n, sem=dtv] -> 'gave'
+DTV[tense=inf, num=na, sem=dtv] -> 'give'
+
+TVComp[tense=pres, num=sg, sem=tv_comp] -> 'believes'
+TVComp[tense=pres, num=pl, sem=tv_comp] -> 'believe'
+TVComp[tense=past, num=?n, sem=tv_comp] -> 'believed'
+TVComp[tense=inf, num=na, sem=tv_comp] -> 'believe'
+
+EquiV[tense=pres, num=sg, sem=equi] -> 'tries'
+EquiV[tense=pres, num=pl, sem=equi] -> 'try'
+EquiV[tense=past, num=?n, sem=equi] -> 'tried'
+EquiV[tense=inf, num=na, sem=equi] -> 'try'
+
+ObjEquiV[tense=pres, num=sg, sem=obj_equi] -> 'persuades'
+ObjEquiV[tense=pres, num=pl, sem=obj_equi] -> 'persuade'
+ObjEquiV[tense=past, num=?n, sem=obj_equi] -> 'persuaded'
+ObjEquiV[tense=inf, num=na, sem=obj_equi] -> 'persuade'
+
+RaisingV[tense=pres, num=sg, sem=raising] -> 'seems'
+RaisingV[tense=pres, num=pl, sem=raising] -> 'seem'
+RaisingV[tense=past, num=?n, sem=raising] -> 'seemed'
+RaisingV[tense=inf, num=na, sem=raising] -> 'seem'
+
+#infinitive marker
+TO -> 'to'
+
+JJ[type=attributive, sem=adj_attributive_intersective] -> 'gray' | 'swedish'
+JJ[type=attributive, sem=adj_attributive_nonintersective] -> 'alleged'
+JJ[type=attributive, sem=adj_attributive_relative_intersective] -> 'big' | 'fat'
+JJ[type=attributive, sem=adj_attributive_relative_nonintersective] -> 'confessed' | 'former'
+JJ[type=predicative, sem=adj_predicative] -> 'gray' | 'swedish'
+
+ADV[sem=adv] -> 'apparently' | 'possibly' | 'very'
+ADV[sem=adv_ModifyingRelativeAdj] -> 'very'
+
+CC[sem=cc_clause] -> 'and'
+CC[sem=cc_np] -> 'and'
+CC[sem=cc_vp] -> 'and'
+
+IN -> 'at' | 'by' | 'from' | 'on' | 'with'
+
+Pro[num=sg, gender=m, -reflex, case=nom, sem=pro] -> 'he'
+Pro[num=sg, gender=m, -reflex, case=acc, sem=pro] -> 'him'
+Pro[num=sg, gender=m, +reflex, case=acc, sem=pro] -> 'himself'
+Pro[num=sg, gender=f, -reflex, sem=pro] -> 'her'
+Pro[num=sg, gender=f, +reflex, case=acc, sem=pro] -> 'herself'
+Pro[num=sg, gender=n, -reflex, sem=pro] -> 'it'
+Pro[num=sg, gender=n, +reflex, case=acc, sem=pro] -> 'itself'
+Pro[num=pl, -reflex, case=nom, sem=pro] -> 'they'
+Pro[num=pl, -reflex, case=acc, sem=pro] -> 'them'
+Pro[num=pl, +reflex, case=acc, sem=pro] -> 'themselves'
+Pro[num=pl, +reflex, case=acc, sem=recip] -> 'eachother'
View
2  nltk/VERSION
@@ -1 +1 @@
-2.0.2
+2.0.3
View
1,016 papers/acl-04/acl-04.tex
@@ -1,508 +1,508 @@
-\documentclass[11pt]{article}
-\usepackage{colacl}
-\usepackage{times}
-\usepackage{latexsym}
-\usepackage{url,alltt,epsfig,boxedminipage}
-
-% hyphenation control
-\pretolerance 250
-\tolerance 500
-\hyphenpenalty 200
-\exhyphenpenalty 100
-\doublehyphendemerits 7500
-\finalhyphendemerits 7500
-\brokenpenalty 10000
-\lefthyphenmin 3
-\righthyphenmin 3
-\widowpenalty 10000
-\clubpenalty 10000
-\displaywidowpenalty 10000
-\looseness 1
-
-\def\UrlFont{\tt\small}
-\def\object#1{\texttt{\small #1}}
-
-\title{NLTK: The Natural Language Toolkit}
-
-\author{
- Steven Bird \\
- Department of Computer Science \\
- \indent and Software Engineering \\
- University of Melbourne \\
- Victoria 3010, Australia \\
- {\tt\small sb@csse.unimelb.edu.au}
-\And
- Edward Loper\\
- Department of Computer \\
- \indent and Information Science \\
- University of Pennsylvania\\
- Philadelphia PA 19104-6389, USA\\
- {\tt\small edloper@gradient.cis.upenn.edu}
-}
-
-\newenvironment{sv}{\scriptsize\begin{alltt}}{\end{alltt}\normalsize}
-
-\begin{document}
-
-\maketitle
-
-\begin{abstract}\small
- The Natural Language Toolkit is a suite of program modules, data
- sets, tutorials and exercises, covering symbolic and statistical
- natural language processing. NLTK is written in Python and
- distributed under the GPL open source license. Over the past three
- years, NLTK has become popular in teaching and research. We
- describe the toolkit and report on its current state of development.
-\end{abstract}
-
-%========================= Introduction =========================
-\section{Introduction}
-
-The Natural Language Toolkit (NLTK) was developed in conjunction with
-a computational linguistics course at the University of Pennsylvania
-in 2001 \cite{LoperBird02}. It was designed with three pedagogical
-applications in mind: assignments, demonstrations, and projects.
-
-\textbf{Assignments.}
-NLTK supports assignments of varying difficulty
-and scope. In the simplest assignments, students experiment with
-existing components to perform a wide variety of NLP tasks. As students
-become more familiar with the toolkit, they can be asked to modify
-existing components, or to create complete systems out of existing
-components.
-
-\textbf{Demonstrations.}
-NLTK's interactive graphical demonstrations have proven to be very
-useful for students learning NLP concepts.
-The demonstrations give a step-by-step execution of important
-algorithms, displaying the current state of key data structures.
-A screenshot of the chart parsing demonstration is shown in Figure~\ref{fig:chart}.
-
-\textbf{Projects.} NLTK provides students with a flexible framework
-for advanced projects. Typical projects might involve implementing a
-new algorithm, developing a new component, or implementing a new
-task.
-
-We chose Python because it has a shallow learning curve, its syntax
-and semantics are transparent, and it has good string-handling
-functionality. As an interpreted language, Python facilitates
-interactive exploration. As an object-oriented language, Python
-permits data and methods to be encapsulated and re-used easily. Python comes with an extensive
-standard library, including tools for graphical programming and
-numerical processing. The recently added generator
-syntax makes it easy to create interactive implementations of
-algorithms \cite{Loper04,Rossum03intro,Rossum03ref}.
-
-\begin{figure}[bth]
-\epsfig{file=chart.eps, width=\linewidth}
-\caption{Interactive Chart Parsing Demonstration}
-\label{fig:chart}
-\end{figure}
-
-\section{Design}
-
-NLTK is implemented as a large collection of minimally interdependent
-modules, organized into a shallow hierarchy. A set of core
-modules defines basic data types that are used throughout the toolkit.
-The remaining modules are \emph{task modules}, each devoted to an
-individual natural language processing task. For example, the
-\object{nltk.parser} module encompasses to the task of
-\emph{parsing}, or deriving the syntactic structure of a sentence; and
-the \object{nltk.tokenizer} module is devoted to the task of
-\emph{tokenizing}, or dividing a text into its constituent parts.
-
-\subsection{Tokens and other core data types}
-
-To maximize interoperability between modules, we use a
-single class to encode information about natural language texts -- the
-\object{Token} class. Each \object{Token} instance represents a
-unit of text such as a word, sentence, or document, and is
-defined by a (partial) mapping from property names to values. For
-example, the \object{TEXT} property is used to encode a token's text
-content:\footnote{Some code samples are specific to NLTK
- version 1.4.}
-
-\begin{alltt}\small
-\textbf{>>> from nltk.token import *}
-\textbf{>>> Token(TEXT="Hello World!")}
-<Hello World!>
-\end{alltt}
-%
-The \object{TAG} property is used to encode a token's part-of-speech
-tag:
-
-\begin{alltt}\small
-\textbf{>>> Token(TEXT="python", TAG="NN")}
-<python/NN>
-\end{alltt}
-%
-The \object{SUBTOKENS} property is used to store a tokenized text:
-
-\begin{alltt}\small
-\textbf{>>> from nltk.tokenizer import *}
-\textbf{>>> tok = Token(TEXT="Hello World!")}
-\textbf{>>> WhitespaceTokenizer().tokenize(tok)}
-\textbf{>>> print tok['SUBTOKENS'])}
-[<Hello>, <World!>]
-\end{alltt}
-%
-In a similar fashion, other language processing tasks such as
-word-sense disambiguation, chunking and parsing all add properties to
-the \object{Token} data structure.
-
-In general, language processing tasks are formulated as
-annotations and transformations involving \object{Tokens}. In
-particular, each processing task takes a token and extends it to
-include new information. These modifications are typically
-\emph{monotonic}; new information is added but
-existing information is not deleted or modified. Thus, tokens serve
-as a \emph{blackboard}, where information about a piece of text is
-collated. This architecture contrasts with the more typical
-\emph{pipeline} architecture where each processing task's output
-discards its input information. We chose the blackboard approach
-over the pipeline approach because it allows more flexibility when
-combining tasks into a single system.
-
-In addition to the \object{Token} class and its derivatives, NLTK
-defines a variety of other data types. For instance, the
-\object{probability} module defines classes for
-probability distributions and statistical smoothing techniques; and
-the \object{cfg} module defines classes for encoding context free
-grammars and probabilistic context free grammars.
-
-\subsection{The corpus module}
-
-\begin{table*}
-\small\noindent
-\begin{boxedminipage}{\linewidth}
-\begin{tabular}{llll}
-\setlength{\tabcolsep}{0.5\tabcolsep}
-\textbf{Corpus} &
-\textbf{Contents and Wordcount} &
-\textbf{Example Application} \\
-
-20 Newsgroups (selection) &
-3 newsgroups, 4000 posts, 780kw &
-text classification \\
-
-Brown Corpus &
-15 genres, 1.15Mw, tagged &
-training \& testing taggers, text classification \\
-
-CoNLL 2000 Chunking Data &
-270kw, tagged and chunked &
-training \& testing chunk parsers \\
-
-Project Gutenberg (selection) &
-14 texts, 1.7Mw &
-text classification, language modelling \\
-
-NIST 1999 IEER (selection) &
-63kw, named-entity markup &
-training \& testing named-entity recognizers \\
-
-Levin Verb Index &
-3k verbs with Levin classes &
-parser development \\
-
-Names Corpus &
-8k male \& female names &
-text classification \\
-
-PP Attachment Corpus &
-28k prepositional phrases, tagged &
-parser development \\
-
-Roget's Thesaurus &
-200kw, formatted text &
-word-sense disambiguation \\
-
-SEMCOR &
-880kw, POS \& sense tagged &
-word-sense disambiguation \\
-
-SENSEVAL 2 Corpus &
-600kw, POS \& sense tagged &
-word-sense disambiguation \\
-
-Stopwords Corpus &
-2,400 stopwords for 11 lgs &
-text retrieval \\
-
-Penn Treebank (sample) &
-40kw, tagged \& parsed &
-parser development \\
-
-Wordnet 1.7 &
-180kw in a semantic network &
-WSD, NL understanding \\
-
-Wordlist Corpus &
-960kw and 20k affixes for 8 lgs &
-spell checking
- \\
-\end{tabular}
-\caption{Corpora and Corpus Samples Distributed with NLTK}\label{tab:data}
-\end{boxedminipage}
-\end{table*}
-
-Many language processing tasks must be developed and tested using
-annotated data sets or corpora. Several such corpora are distributed
-with NLTK, as listed in Table~\ref{tab:data}. The
-\object{corpus} module defines classes for reading and processing
-many of these corpora. The following code fragment illustrates
-how the Brown Corpus is accessed.
-
-\begin{alltt}\small
-\textbf{>>> from nltk.corpus import brown}
-\textbf{>>> brown.groups()}
-['skill and hobbies', 'popular lore',
-'humor', 'fiction: mystery', ...]
-\textbf{>>> brown.items('humor')}
-('cr01', 'cr02', 'cr03', 'cr04', 'cr05',
-'cr06', 'cr07', 'cr08', 'cr09')
-\textbf{>>> brown.tokenize('cr01')}
-<[<It/pps>, <was/bedz>, <among/in>,
-<these/dts>, <that/cs>, <Hinkle/np>,
-<identified/vbd>, <a/at>, ...]>
-\end{alltt}
-%
-A selection of 5\% of the Penn Treebank corpus is included with
-NLTK, and it is accessed as follows:
-
-\begin{alltt}\small
-\textbf{>>> from nltk.corpus import treebank}
-\textbf{>>> treebank.groups()}
-('raw', 'tagged', 'parsed', 'merged')
-\textbf{>>> treebank.items('parsed')}
-['wsj_0001.prd', 'wsj_0002.prd', ...]
-\textbf{>>> item = 'parsed/wsj_0001.prd'}
-\textbf{>>> sentences = treebank.tokenize(item)}
-\textbf{>>> for sent in sentences['SUBTOKENS']:}
-\textbf{... print sent.pp()} \emph{# pretty-print}
-(S:
- (NP-SBJ:
- (NP: <Pierre> <Vinken>)
- (ADJP:
- (NP: <61> <years>)
- <old>
- )
- ...
-\end{alltt}
-%
-
-\subsection{Processing modules}
-
-Each language processing algorithm is implemented as a class. For
-example, the \object{ChartParser} and
-\object{Recursive\-Descent\-Parser} classes each define a single
-algorithm for parsing a text. We implement language processing
-algorithms using classes instead of functions for three reasons.
-First, all algorithm-specific options can be passed to the
-constructor, allowing a consistent interface for applying the
-algorithms. Second, a number of algorithms need to have their state
-initialized before they can be used. For example, the
-\object{NthOrderTagger} class must be initialized by training on a
-tagged corpus before it can be used. Third, subclassing can be used
-to create specialized versions of a given algorithm.
-
-Each processing module defines an \emph{interface} for its task.
-Interface classes are distinguished by naming them with a trailing
-capital ``\object{I},'' such as \object{ParserI}.
-Each interface defines a single \emph{action method} which
-performs the task defined by the interface. For example, the
-\object{ParserI} interface defines the \object{parse} method and the
-\object{Tokenizer} interface defines the \object{tokenize} method.
-When appropriate, an interface defines \emph{extended action
- methods}, which provide variations on the basic action method. For
-example, the \object{ParserI} interface defines the \object{parse\_n}
-method which finds at most $n$ parses for a given sentence; and
-the \object{TokenizerI} interface defines the \object{xtokenize}
-method, which outputs an iterator over subtokens instead of a list of
-subtokens.
-
-NLTK includes the following modules:
-\object{cfg},
-\object{corpus},
-\object{draw}
-(\object{cfg},
-\object{chart},
-\object{corpus},
-\object{featurestruct},
-\object{fsa},
-\object{graph},
-\object{plot},
-\object{rdparser},
-\object{srparser},
-\object{tree}),
-\object{eval},
-\object{featurestruct},
-\object{parser}
-(\object{chart},
-\object{chunk},
-\object{probabilistic}),
-\object{probability},
-\object{sense},
-\object{set},
-\object{stemmer}
-(\object{porter}),
-\object{tagger},
-\object{test},
-\object{token},
-\object{tokenizer},
-\object{tree}, and
-\object{util}.
-Please see the online documentation for details.
-
-\subsection{Documentation}
-
-Three different types of documentation are available. Tutorials
-explain how to use the toolkit, with detailed worked examples. The
-API documentation describes every module, interface, class, method,
-function, and variable in the toolkit. Technical reports explain and
-justify the toolkit's design and implementation. All are available
-from \url{http://nltk.sf.net/docs.html}.
-
-\section{Installing NLTK}
-
-NLTK is available from \url{nltk.sf.net}, and is packaged for
-easy installation under Unix, Mac OS X and Windows. The full
-distribution consists of four packages: the Python source code
-(\object{nltk}); the corpora (\object{nltk-data}); the documentation
-(\object{nltk-docs}); and third-party contributions
-(\object{nltk-contrib}). Before installing NLTK, it is necessary to
-install Python version 2.3 or later, available from
-\url{www.python.org}. Full installation instructions and a quick
-start guide are available from the NLTK homepage.
-
-As soon as NLTK is installed, users can run the demonstrations. On
-Windows, the demonstrations can be run by double-clicking on their
-Python source files. Alternatively, from the Python interpreter, this
-can be done as follows:
-
-\begin{alltt} \small
-\textbf{>>> import nltk.draw.rdparser}
-\textbf{>>> nltk.draw.rdparser.demo()}
-\textbf{>>> nltk.draw.srparser.demo()}
-\textbf{>>> nltk.draw.chart.demo()}
-\end{alltt}
-
-\section{Using and contributing to NLTK}
-
-NLTK has been used at the University of Pennsylvania since 2001, and has
-subsequently been adopted by several NLP courses at other
-universities, including those listed in Table~\ref{tab:courses}.
-
-Third party contributions to NLTK include: Brill tagger (Chris Maloof),
-hidden Markov model tagger (Trevor Cohn, Phil Blunsom), GPSG-style feature-based grammar
-and parser (Rob Speer, Bob Berwick), finite-state morphological analyzer
-(Carl de Marcken, Beracah Yankama, Bob Berwick), decision list and decision tree
-classifiers (Trevor Cohn), and Discourse Representation Theory
-implementation (Edward Ivanovic).
-
-NLTK is an open source project, and we welcome any contributions.
-There are several ways to contribute: users can report bugs, suggest
-features, or contribute patches on Sourceforge; users can participate
-in discussions on the \textit{NLTK-Devel} mailing
-list\footnote{\url{http://lists.sourceforge.net/lists/listinfo/nltk-devel}}
-or in the NLTK public
-forums; and users can submit their own NLTK-based projects for
-inclusion in the nltk\_contrib directory. New code modules that are
-relevant, substantial, original and well-documented will be considered
-for inclusion in NLTK proper.
-All source code is distributed under the GNU General Public License,
-and all documentation is distributed under a Creative Commons
-non-commercial license. Thus, potential contributors can be confident that their
-work will remain freely available to all. Further information about
-contributing to NLTK is available at \url{http://nltk.sf.net/contrib.html}.
-
-\begin{table}[bt]
-\small\noindent
-\begin{boxedminipage}{\linewidth}
-\begin{tabular}{l}
-
-Graz University of Technology, Austria \\
-\hspace{2ex}
-\textit{Information Search and Retrieval} \\[.5ex]
-
-Macquarie University, Australia \\
-\hspace{2ex}
-\textit{Intelligent Text Processing} \\[.5ex]
-
-Massachusetts Institute of Technology, USA \\
-\hspace{2ex}
-\textit{Natural Language Processing} \\[.5ex]
-
-National Autonomous University of Mexico, Mexico \\
-\hspace{2ex}
-\textit{Introduction to Natural Language Processing}\\
-\hspace{2ex}
-\textit{in Python} \\[.5ex]
-
-Ohio State University, USA \\
-\hspace{2ex}
-\textit{Statistical Natural Language Processing}\\[.5ex]
-
-University of Amsterdam, Netherlands \\
-\hspace{2ex}
-\textit{Language Processing and Information Access} \\[.5ex]
-
-University of Colorado, USA \\
-\hspace{2ex}
-\textit{Natural Language Processing} \\[.5ex]
-
-University of Edinburgh, UK \\
-\hspace{2ex}
-\textit{Introduction to Computational Linguistics} \\[.5ex]
-
-University of Magdeburg, Germany \\
-\hspace{2ex}
-\textit{Natural Language Systems} \\[.5ex]
-
-University of Malta, Malta \\
-\hspace{2ex}
-\textit{Natural Language Algorithms}\\[.5ex]
-
-University of Melbourne, Australia \\
-\hspace{2ex}
-\textit{Human Language Technology} \\[.5ex]
-
-University of Pennsylvania, USA \\
-\hspace{2ex}
-\textit{Introduction to Computational Linguistics} \\[.5ex]
-
-University of Pittsburgh, USA \\
-\hspace{2ex}
-\textit{Artificial Intelligence Application Development} \\[.5ex]
-
-Simon Fraser University, Canada \\
-\hspace{2ex}
-\textit{Computational Linguistics} \\[.5ex]
-
-\end{tabular}
-\caption{University Courses using NLTK}\label{tab:courses}
-\end{boxedminipage}
-\end{table}
-
-\section{Conclusion}
-
-NLTK is a broad-coverage natural language toolkit that provides a simple, extensible,
-uniform framework for assignments, demonstrations and projects. It is
-thoroughly documented, easy to learn, and simple to use. NLTK is now widely
-used in research and teaching. Readers who
-would like to receive occasional announcements about NLTK are
-encouraged to sign up for the low-volume, moderated mailing list
-\textit{NLTK-Announce}.\footnote{\url{http://lists.sourceforge.net/lists/listinfo/nltk-announce}}
-
-\section{Acknowledgements}
-
-We are indebted to our students and colleagues for feedback on the
-toolkit, and to many contributors listed on the NLTK website.
-
-\pagebreak
-
-\bibliographystyle{acl}
-\bibliography{nltk}
-
-\end{document}
-
+\documentclass[11pt]{article}
+\usepackage{colacl}
+\usepackage{times}
+\usepackage{latexsym}
+\usepackage{url,alltt,epsfig,boxedminipage}
+
+% hyphenation control
+\pretolerance 250
+\tolerance 500
+\hyphenpenalty 200
+\exhyphenpenalty 100
+\doublehyphendemerits 7500
+\finalhyphendemerits 7500
+\brokenpenalty 10000
+\lefthyphenmin 3
+\righthyphenmin 3
+\widowpenalty 10000
+\clubpenalty 10000
+\displaywidowpenalty 10000
+\looseness 1
+
+\def\UrlFont{\tt\small}
+\def\object#1{\texttt{\small #1}}
+
+\title{NLTK: The Natural Language Toolkit}
+
+\author{
+ Steven Bird \\
+ Department of Computer Science \\
+ \indent and Software Engineering \\
+ University of Melbourne \\
+ Victoria 3010, Australia \\
+ {\tt\small sb@csse.unimelb.edu.au}
+\And
+ Edward Loper\\
+ Department of Computer \\
+ \indent and Information Science \\
+ University of Pennsylvania\\
+ Philadelphia PA 19104-6389, USA\\
+ {\tt\small edloper@gradient.cis.upenn.edu}
+}
+
+\newenvironment{sv}{\scriptsize\begin{alltt}}{\end{alltt}\normalsize}
+
+\begin{document}
+
+\maketitle
+
+\begin{abstract}\small
+ The Natural Language Toolkit is a suite of program modules, data
+ sets, tutorials and exercises, covering symbolic and statistical
+ natural language processing. NLTK is written in Python and
+ distributed under the GPL open source license. Over the past three
+ years, NLTK has become popular in teaching and research. We
+ describe the toolkit and report on its current state of development.
+\end{abstract}
+
+%========================= Introduction =========================
+\section{Introduction}
+
+The Natural Language Toolkit (NLTK) was developed in conjunction with
+a computational linguistics course at the University of Pennsylvania
+in 2001 \cite{LoperBird02}. It was designed with three pedagogical
+applications in mind: assignments, demonstrations, and projects.
+
+\textbf{Assignments.}
+NLTK supports assignments of varying difficulty
+and scope. In the simplest assignments, students experiment with
+existing components to perform a wide variety of NLP tasks. As students
+become more familiar with the toolkit, they can be asked to modify
+existing components, or to create complete systems out of existing
+components.
+
+\textbf{Demonstrations.}
+NLTK's interactive graphical demonstrations have proven to be very
+useful for students learning NLP concepts.
+The demonstrations give a step-by-step execution of important
+algorithms, displaying the current state of key data structures.
+A screenshot of the chart parsing demonstration is shown in Figure~\ref{fig:chart}.
+
+\textbf{Projects.} NLTK provides students with a flexible framework
+for advanced projects. Typical projects might involve implementing a
+new algorithm, developing a new component, or implementing a new
+task.
+
+We chose Python because it has a shallow learning curve, its syntax
+and semantics are transparent, and it has good string-handling
+functionality. As an interpreted language, Python facilitates
+interactive exploration. As an object-oriented language, Python
+permits data and methods to be encapsulated and re-used easily. Python comes with an extensive
+standard library, including tools for graphical programming and
+numerical processing. The recently added generator
+syntax makes it easy to create interactive implementations of
+algorithms \cite{Loper04,Rossum03intro,Rossum03ref}.
+
+\begin{figure}[bth]
+\epsfig{file=chart.eps, width=\linewidth}
+\caption{Interactive Chart Parsing Demonstration}
+\label{fig:chart}
+\end{figure}
+
+\section{Design}
+
+NLTK is implemented as a large collection of minimally interdependent
+modules, organized into a shallow hierarchy. A set of core
+modules defines basic data types that are used throughout the toolkit.
+The remaining modules are \emph{task modules}, each devoted to an
+individual natural language processing task. For example, the
+\object{nltk.parser} module encompasses to the task of
+\emph{parsing}, or deriving the syntactic structure of a sentence; and
+the \object{nltk.tokenizer} module is devoted to the task of
+\emph{tokenizing}, or dividing a text into its constituent parts.
+
+\subsection{Tokens and other core data types}
+
+To maximize interoperability between modules, we use a
+single class to encode information about natural language texts -- the
+\object{Token} class. Each \object{Token} instance represents a
+unit of text such as a word, sentence, or document, and is
+defined by a (partial) mapping from property names to values. For
+example, the \object{TEXT} property is used to encode a token's text
+content:\footnote{Some code samples are specific to NLTK
+ version 1.4.}
+
+\begin{alltt}\small
+\textbf{>>> from nltk.token import *}
+\textbf{>>> Token(TEXT="Hello World!")}
+<Hello World!>
+\end{alltt}
+%
+The \object{TAG} property is used to encode a token's part-of-speech
+tag:
+
+\begin{alltt}\small
+\textbf{>>> Token(TEXT="python", TAG="NN")}
+<python/NN>
+\end{alltt}
+%
+The \object{SUBTOKENS} property is used to store a tokenized text:
+
+\begin{alltt}\small
+\textbf{>>> from nltk.tokenizer import *}
+\textbf{>>> tok = Token(TEXT="Hello World!")}
+\textbf{>>> WhitespaceTokenizer().tokenize(tok)}
+\textbf{>>> print tok['SUBTOKENS'])}
+[<Hello>, <World!>]
+\end{alltt}
+%
+In a similar fashion, other language processing tasks such as
+word-sense disambiguation, chunking and parsing all add properties to
+the \object{Token} data structure.
+
+In general, language processing tasks are formulated as
+annotations and transformations involving \object{Tokens}. In
+particular, each processing task takes a token and extends it to
+include new information. These modifications are typically
+\emph{monotonic}; new information is added but
+existing information is not deleted or modified. Thus, tokens serve
+as a \emph{blackboard}, where information about a piece of text is
+collated. This architecture contrasts with the more typical
+\emph{pipeline} architecture where each processing task's output
+discards its input information. We chose the blackboard approach
+over the pipeline approach because it allows more flexibility when
+combining tasks into a single system.
+
+In addition to the \object{Token} class and its derivatives, NLTK
+defines a variety of other data types. For instance, the
+\object{probability} module defines classes for
+probability distributions and statistical smoothing techniques; and
+the \object{cfg} module defines classes for encoding context free
+grammars and probabilistic context free grammars.
+
+\subsection{The corpus module}
+
+\begin{table*}
+\small\noindent
+\begin{boxedminipage}{\linewidth}
+\begin{tabular}{llll}
+\setlength{\tabcolsep}{0.5\tabcolsep}
+\textbf{Corpus} &
+\textbf{Contents and Wordcount} &
+\textbf{Example Application} \\
+
+20 Newsgroups (selection) &
+3 newsgroups, 4000 posts, 780kw &
+text classification \\
+
+Brown Corpus &
+15 genres, 1.15Mw, tagged &
+training \& testing taggers, text classification \\
+
+CoNLL 2000 Chunking Data &
+270kw, tagged and chunked &
+training \& testing chunk parsers \\
+
+Project Gutenberg (selection) &
+14 texts, 1.7Mw &
+text classification, language modelling \\
+
+NIST 1999 IEER (selection) &
+63kw, named-entity markup &
+training \& testing named-entity recognizers \\
+
+Levin Verb Index &
+3k verbs with Levin classes &
+parser development \\
+
+Names Corpus &
+8k male \& female names &
+text classification \\
+
+PP Attachment Corpus &
+28k prepositional phrases, tagged &
+parser development \\
+
+Roget's Thesaurus &
+200kw, formatted text &
+word-sense disambiguation \\
+
+SEMCOR &
+880kw, POS \& sense tagged &
+word-sense disambiguation \\
+
+SENSEVAL 2 Corpus &
+600kw, POS \& sense tagged &
+word-sense disambiguation \\
+
+Stopwords Corpus &
+2,400 stopwords for 11 lgs &
+text retrieval \\
+
+Penn Treebank (sample) &
+40kw, tagged \& parsed &
+parser development \\
+
+Wordnet 1.7 &
+180kw in a semantic network &
+WSD, NL understanding \\
+
+Wordlist Corpus &
+960kw and 20k affixes for 8 lgs &
+spell checking
+ \\
+\end{tabular}
+\caption{Corpora and Corpus Samples Distributed with NLTK}\label{tab:data}
+\end{boxedminipage}
+\end{table*}
+
+Many language processing tasks must be developed and tested using
+annotated data sets or corpora. Several such corpora are distributed
+with NLTK, as listed in Table~\ref{tab:data}. The
+\object{corpus} module defines classes for reading and processing
+many of these corpora. The following code fragment illustrates
+how the Brown Corpus is accessed.
+
+\begin{alltt}\small
+\textbf{>>> from nltk.corpus import brown}
+\textbf{>>> brown.groups()}
+['skill and hobbies', 'popular lore',
+'humor', 'fiction: mystery', ...]
+\textbf{>>> brown.items('humor')}
+('cr01', 'cr02', 'cr03', 'cr04', 'cr05',
+'cr06', 'cr07', 'cr08', 'cr09')
+\textbf{>>> brown.tokenize('cr01')}
+<[<It/pps>, <was/bedz>, <among/in>,
+<these/dts>, <that/cs>, <Hinkle/np>,
+<identified/vbd>, <a/at>, ...]>
+\end{alltt}
+%
+A selection of 5\% of the Penn Treebank corpus is included with
+NLTK, and it is accessed as follows:
+
+\begin{alltt}\small
+\textbf{>>> from nltk.corpus import treebank}
+\textbf{>>> treebank.groups()}
+('raw', 'tagged', 'parsed', 'merged')
+\textbf{>>> treebank.items('parsed')}
+['wsj_0001.prd', 'wsj_0002.prd', ...]
+\textbf{>>> item = 'parsed/wsj_0001.prd'}
+\textbf{>>> sentences = treebank.tokenize(item)}
+\textbf{>>> for sent in sentences['SUBTOKENS']:}
+\textbf{... print sent.pp()} \emph{# pretty-print}
+(S:
+ (NP-SBJ:
+ (NP: <Pierre> <Vinken>)
+ (ADJP:
+ (NP: <61> <years>)
+ <old>
+ )
+ ...
+\end{alltt}
+%
+
+\subsection{Processing modules}
+
+Each language processing algorithm is implemented as a class. For
+example, the \object{ChartParser} and
+\object{Recursive\-Descent\-Parser} classes each define a single
+algorithm for parsing a text. We implement language processing
+algorithms using classes instead of functions for three reasons.
+First, all algorithm-specific options can be passed to the
+constructor, allowing a consistent interface for applying the
+algorithms. Second, a number of algorithms need to have their state
+initialized before they can be used. For example, the
+\object{NthOrderTagger} class must be initialized by training on a
+tagged corpus before it can be used. Third, subclassing can be used
+to create specialized versions of a given algorithm.
+
+Each processing module defines an \emph{interface} for its task.
+Interface classes are distinguished by naming them with a trailing
+capital ``\object{I},'' such as \object{ParserI}.
+Each interface defines a single \emph{action method} which
+performs the task defined by the interface. For example, the
+\object{ParserI} interface defines the \object{parse} method and the
+\object{Tokenizer} interface defines the \object{tokenize} method.
+When appropriate, an interface defines \emph{extended action
+ methods}, which provide variations on the basic action method. For
+example, the \object{ParserI} interface defines the \object{parse\_n}
+method which finds at most $n$ parses for a given sentence; and
+the \object{TokenizerI} interface defines the \object{xtokenize}
+method, which outputs an iterator over subtokens instead of a list of
+subtokens.
+
+NLTK includes the following modules:
+\object{cfg},
+\object{corpus},
+\object{draw}
+(\object{cfg},
+\object{chart},
+\object{corpus},
+\object{featurestruct},
+\object{fsa},
+\object{graph},
+\object{plot},
+\object{rdparser},
+\object{srparser},
+\object{tree}),
+\object{eval},
+\object{featurestruct},
+\object{parser}
+(\object{chart},
+\object{chunk},
+\object{probabilistic}),
+\object{probability},
+\object{sense},
+\object{set},
+\object{stemmer}
+(\object{porter}),
+\object{tagger},
+\object{test},
+\object{token},
+\object{tokenizer},
+\object{tree}, and
+\object{util}.
+Please see the online documentation for details.
+
+\subsection{Documentation}
+
+Three different types of documentation are available. Tutorials
+explain how to use the toolkit, with detailed worked examples. The
+API documentation describes every module, interface, class, method,
+function, and variable in the toolkit. Technical reports explain and
+justify the toolkit's design and implementation. All are available
+from \url{http://nltk.sf.net/docs.html}.
+
+\section{Installing NLTK}
+
+NLTK is available from \url{nltk.sf.net}, and is packaged for
+easy installation under Unix, Mac OS X and Windows. The full
+distribution consists of four packages: the Python source code
+(\object{nltk}); the corpora (\object{nltk-data}); the documentation
+(\object{nltk-docs}); and third-party contributions
+(\object{nltk-contrib}). Before installing NLTK, it is necessary to
+install Python version 2.3 or later, available from
+\url{www.python.org}. Full installation instructions and a quick
+start guide are available from the NLTK homepage.
+
+As soon as NLTK is installed, users can run the demonstrations. On
+Windows, the demonstrations can be run by double-clicking on their
+Python source files. Alternatively, from the Python interpreter, this
+can be done as follows:
+
+\begin{alltt} \small
+\textbf{>>> import nltk.draw.rdparser}
+\textbf{>>> nltk.draw.rdparser.demo()}
+\textbf{>>> nltk.draw.srparser.demo()}
+\textbf{>>> nltk.draw.chart.demo()}
+\end{alltt}
+
+\section{Using and contributing to NLTK}
+
+NLTK has been used at the University of Pennsylvania since 2001, and has
+subsequently been adopted by several NLP courses at other
+universities, including those listed in Table~\ref{tab:courses}.
+
+Third party contributions to NLTK include: Brill tagger (Chris Maloof),
+hidden Markov model tagger (Trevor Cohn, Phil Blunsom), GPSG-style feature-based grammar
+and parser (Rob Speer, Bob Berwick), finite-state morphological analyzer
+(Carl de Marcken, Beracah Yankama, Bob Berwick), decision list and decision tree
+classifiers (Trevor Cohn), and Discourse Representation Theory
+implementation (Edward Ivanovic).
+
+NLTK is an open source project, and we welcome any contributions.
+There are several ways to contribute: users can report bugs, suggest
+features, or contribute patches on Sourceforge; users can participate
+in discussions on the \textit{NLTK-Devel} mailing
+list\footnote{\url{http://lists.sourceforge.net/lists/listinfo/nltk-devel}}
+or in the NLTK public
+forums; and users can submit their own NLTK-based projects for
+inclusion in the nltk\_contrib directory. New code modules that are
+relevant, substantial, original and well-documented will be considered
+for inclusion in NLTK proper.
+All source code is distributed under the GNU General Public License,
+and all documentation is distributed under a Creative Commons
+non-commercial license. Thus, potential contributors can be confident that their
+work will remain freely available to all. Further information about
+contributing to NLTK is available at \url{http://nltk.sf.net/contrib.html}.
+
+\begin{table}[bt]
+\small\noindent
+\begin{boxedminipage}{\linewidth}
+\begin{tabular}{l}
+
+Graz University of Technology, Austria \\
+\hspace{2ex}
+\textit{Information Search and Retrieval} \\[.5ex]
+
+Macquarie University, Australia \\
+\hspace{2ex}
+\textit{Intelligent Text Processing} \\[.5ex]
+
+Massachusetts Institute of Technology, USA \\
+\hspace{2ex}
+\textit{Natural Language Processing} \\[.5ex]
+
+National Autonomous University of Mexico, Mexico \\
+\hspace{2ex}
+\textit{Introduction to Natural Language Processing}\\
+\hspace{2ex}
+\textit{in Python} \\[.5ex]
+
+Ohio State University, USA \\
+\hspace{2ex}
+\textit{Statistical Natural Language Processing}\\[.5ex]
+
+University of Amsterdam, Netherlands \\
+\hspace{2ex}
+\textit{Language Processing and Information Access} \\[.5ex]
+
+University of Colorado, USA \\
+\hspace{2ex}
+\textit{Natural Language Processing} \\[.5ex]
+
+University of Edinburgh, UK \\
+\hspace{2ex}
+\textit{Introduction to Computational Linguistics} \\[.5ex]
+
+University of Magdeburg, Germany \\
+\hspace{2ex}
+\textit{Natural Language Systems} \\[.5ex]
+
+University of Malta, Malta \\
+\hspace{2ex}
+\textit{Natural Language Algorithms}\\[.5ex]
+
+University of Melbourne, Australia \\
+\hspace{2ex}
+\textit{Human Language Technology} \\[.5ex]
+
+University of Pennsylvania, USA \\
+\hspace{2ex}
+\textit{Introduction to Computational Linguistics} \\[.5ex]
+
+University of Pittsburgh, USA \\
+\hspace{2ex}
+\textit{Artificial Intelligence Application Development} \\[.5ex]
+
+Simon Fraser University, Canada \\
+\hspace{2ex}
+\textit{Computational Linguistics} \\[.5ex]
+
+\end{tabular}
+\caption{University Courses using NLTK}\label{tab:courses}
+\end{boxedminipage}
+\end{table}
+
+\section{Conclusion}
+
+NLTK is a broad-coverage natural language toolkit that provides a simple, extensible,
+uniform framework for assignments, demonstrations and projects. It is
+thoroughly documented, easy to learn, and simple to use. NLTK is now widely
+used in research and teaching. Readers who
+would like to receive occasional announcements about NLTK are
+encouraged to sign up for the low-volume, moderated mailing list
+\textit{NLTK-Announce}.\footnote{\url{http://lists.sourceforge.net/lists/listinfo/nltk-announce}}
+
+\section{Acknowledgements}
+
+We are indebted to our students and colleagues for feedback on the
+toolkit, and to many contributors listed on the NLTK website.
+
+\pagebreak
+
+\bibliographystyle{acl}
+\bibliography{nltk}
+
+\end{document}
+
View
722 papers/acl-04/acl04.sty
@@ -1,361 +1,361 @@
-% File acl04.sty
-% January 07, 2004
-% Contact: rambow@cs.columbia.edu
-
-% This is the LaTeX style file for ACL 2004. It is identical to the
-% style files for ACL 2003, ACL 2002, ACL 2001, ACL 2000, EACL 95 and EACL
-% 99.
-%
-% -- Roberto Zamparelli, March 26, 2001
-% -- Dekang Lin, October 1, 2001
-
-% This is the LaTeX style file for ACL 2000. It is nearly identical to the
-% style files for EACL 95 and EACL 99. Minor changes include editing the
-% instructions to reflect use of \documentclass rather than \documentstyle
-% and removing the white space before the title on the first page
-% -- John Chen, June 29, 2000
-
-% To convert from submissions prepared using the style file aclsub.sty
-% prepared for the ACL 2000 conference, proceed as follows:
-% 1) Remove submission-specific information: \whichsession, \id,
-% \wordcount, \otherconferences, \area, \keywords
-% 2) \summary should be removed. The summary material should come
-% after \maketitle and should be in the ``abstract'' environment
-% 3) Check all citations. This style should handle citations correctly
-% and also allows multiple citations separated by semicolons.
-% 4) Check figures and examples. Because the final format is double-
-% column, some adjustments may have to be made to fit text in the column
-% or to choose full-width (\figure*} figures.
-% 5) Change the style reference from aclsub to acl2000, and be sure
-% this style file is in your TeX search path
-
-
-% This is the LaTeX style file for EACL-95. It is identical to the
-% style file for ANLP '94 except that the margins are adjusted for A4
-% paper. -- abney 13 Dec 94
-
-% The ANLP '94 style file is a slightly modified
-% version of the style used for AAAI and IJCAI, using some changes
-% prepared by Fernando Pereira and others and some minor changes
-% by Paul Jacobs.
-
-% Papers prepared using the aclsub.sty file and acl.bst bibtex style
-% should be easily converted to final format using this style.
-% (1) Submission information (\wordcount, \subject, and \makeidpage)
-% should be removed.
-% (2) \summary should be removed. The summary material should come
-% after \maketitle and should be in the ``abstract'' environment
-% (between \begin{abstract} and \end{abstract}).
-% (3) Check all citations. This style should handle citations correctly
-% and also allows multiple citations separated by semicolons.
-% (4) Check figures and examples. Because the final format is double-
-% column, some adjustments may have to be made to fit text in the column
-% or to choose full-width (\figure*} figures.
-
-% Place this in a file called aclap.sty in the TeX search path.
-% (Placing it in the same directory as the paper should also work.)
-
-% Prepared by Peter F. Patel-Schneider, liberally using the ideas of
-% other style hackers, including Barbara Beeton.
-% This style is NOT guaranteed to work. It is provided in the hope
-% that it will make the preparation of papers easier.
-%
-% There are undoubtably bugs in this style. If you make bug fixes,
-% improvements, etc. please let me know. My e-mail address is:
-% pfps@research.att.com
-
-% Papers are to be prepared using the ``acl'' bibliography style,
-% as follows:
-% \documentclass[11pt]{article}
-% \usepackage{acl2000}
-% \title{Title}
-% \author{Author 1 \and Author 2 \\ Address line \\ Address line \And
-% Author 3 \\ Address line \\ Address line}
-% \begin{document}
-% ...
-% \bibliography{bibliography-file}
-% \bibliographystyle{acl}
-% \end{document}
-
-% Author information can be set in various styles:
-% For several authors from the same institution:
-% \author{Author 1 \and ... \and Author n \\
-% Address line \\ ... \\ Address line}
-% if the names do not fit well on one line use
-% Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
-% For authors from different institutions:
-% \author{Author 1 \\ Address line \\ ... \\ Address line
-% \And ... \And
-% Author n \\ Address line \\ ... \\ Address line}
-% To start a seperate ``row'' of authors use \AND, as in
-% \author{Author 1 \\ Address line \\ ... \\ Address line
-% \AND
-% Author 2 \\ Address line \\ ... \\ Address line \And
-% Author 3 \\ Address line \\ ... \\ Address line}
-
-% If the title and author information does not fit in the area allocated,
-% place \setlength\titlebox{<new height>} right after
-% \usepackage{acl2000}
-% where <new height> can be something larger than 2.25in
-
-% \typeout{Conference Style for ACL 2000 -- released June 20, 2000}
-\typeout{Conference Style for ACL 2002 -- released April 8, 2002}
-
-% NOTE: Some laser printers have a serious problem printing TeX output.
-% These printing devices, commonly known as ``write-white'' laser
-% printers, tend to make characters too light. To get around this
-% problem, a darker set of fonts must be created for these devices.
-%
-
-% % Physical page layout - slightly modified from IJCAI by pj
-% \setlength\topmargin{0.0in} \setlength\oddsidemargin{-0.0in}
-% \setlength\textheight{9.0in} \setlength\textwidth{6.5in}
-% \setlength\columnsep{0.2in}
-% \newlength\titlebox
-% \setlength\titlebox{2.25in}
-% \setlength\headheight{0pt} \setlength\headsep{0pt}
-% %\setlength\footheight{0pt}
-% \setlength\footskip{0pt}
-% \thispagestyle{empty} \pagestyle{empty}
-% \flushbottom \twocolumn \sloppy
-
-% %% A4 version of page layout
-% \setlength\topmargin{-0.45cm} % changed by Rz -1.4
-% \setlength\oddsidemargin{.8mm} % was -0cm, changed by Rz
-% \setlength\textheight{23.5cm}
-% \setlength\textwidth{15.8cm}
-% \setlength\columnsep{0.6cm}
-% \newlength\titlebox
-% \setlength\titlebox{2.00in}
-% \setlength\headheight{5pt}
-% \setlength\headsep{0pt}
-% %\setlength\footheight{0pt}
-% \setlength\footskip{0pt}
-% \thispagestyle{empty}
-% \pagestyle{empty}
-
-
-% EACL 2003 A4 version of page layout
-\setlength\topmargin{3mm}
-\setlength\oddsidemargin{11mm}
-\setlength\evensidemargin{-7mm}
-\setlength\textheight{230mm}
-\setlength\textwidth{160mm}
-\setlength\columnsep{6mm}
-\newlength\titlebox
-\setlength\titlebox{50mm}
-\setlength\headheight{5pt}
-\setlength\headsep{0pt}
-%\setlength\footheight{0pt}
-\setlength\footskip{0pt}
-\thispagestyle{empty}
-\pagestyle{empty}
-
-
-\flushbottom \twocolumn \sloppy
-
-% We're never going to need a table of contents, so just flush it to
-% save space --- suggested by drstrip@sandia-2
-\def\addcontentsline#1#2#3{}
-
-% Title stuff, taken from deproc.
-\def\maketitle{\par
- \begingroup
- \def\thefootnote{\fnsymbol{footnote}}
- \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}}
- \twocolumn[\@maketitle] \@thanks
- \endgroup
- \setcounter{footnote}{0}
- \let\maketitle\relax \let\@maketitle\relax
- \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
-\def\@maketitle{\vbox to \titlebox{\hsize\textwidth
- \linewidth\hsize \vskip 0.125in minus 0.125in \centering
- {\Large\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in
- {\def\and{\unskip\enspace{\rm and}\enspace}%
- \def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil
- \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}%
- \def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup
- \vskip 0.25in plus 1fil minus 0.125in
- \hbox to \linewidth\bgroup\large \hfil\hfil
- \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}
- \hbox to \linewidth\bgroup\large \hfil\hfil
- \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf\@author
- \end{tabular}\hss\egroup
- \hfil\hfil\egroup}
- \vskip 0.3in plus 2fil minus 0.1in
-}}
-\renewenvironment{abstract}{\centerline{\large\bf
- Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
-
-
-% bibliography
-
-\def\thebibliography#1{\section*{References}
- \global\def\@listi{\leftmargin\leftmargini
- \labelwidth\leftmargini \advance\labelwidth-\labelsep
- \topsep 1pt plus 2pt minus 1pt
- \parsep 0.25ex plus 1pt \itemsep 0.25ex plus 1pt}
- \list {[\arabic{enumi}]}{\settowidth\labelwidth{[#1]}\leftmargin\labelwidth
- \advance\leftmargin\labelsep\usecounter{enumi}}
- \def\newblock{\hskip .11em plus .33em minus -.07em}
- \sloppy
- \sfcode`\.=1000\relax}
-
-\def\@up#1{\raise.2ex\hbox{#1}}
-
-% most of cite format is from aclsub.sty by SMS
-
-% don't box citations, separate with ; and a space
-% also, make the penalty between citations negative: a good place to break
-% changed comma back to semicolon pj 2/1/90
-% \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
-% \def\@citea{}\@cite{\@for\@citeb:=#2\do
-% {\@citea\def\@citea{;\penalty\@citeseppen\ }\@ifundefined
-% {b@\@citeb}{{\bf ?}\@warning
-% {Citation `\@citeb' on page \thepage \space undefined}}%
-% {\csname b@\@citeb\endcsname}}}{#1}}
-
-% don't box citations, separate with ; and a space
-% Replaced for multiple citations (pj)
-% don't box citations and also add space, semicolon between multiple citations
-\def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
- \def\@citea{}\@cite{\@for\@citeb:=#2\do
- {\@citea\def\@citea{; }\@ifundefined
- {b@\@citeb}{{\bf ?}\@warning
- {Citation `\@citeb' on page \thepage \space undefined}}%
- {\csname b@\@citeb\endcsname}}}{#1}}
-
-% Allow short (name-less) citations, when used in
-% conjunction with a bibliography style that creates labels like
-% \citename{<names>, }<year>
-%
-\let\@internalcite\cite
-\def\cite{\def\citename##1{##1, }\@internalcite}
-\def\shortcite{\def\citename##1{}\@internalcite}
-\def\newcite{\leavevmode\def\citename##1{{##1} (}\@internalciteb}
-
-% Macros for \newcite, which leaves name in running text, and is
-% otherwise like \shortcite.
-\def\@citexb[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
- \def\@citea{}\@newcite{\@for\@citeb:=#2\do
- {\@citea\def\@citea{;\penalty\@m\ }\@ifundefined
- {b@\@citeb}{{\bf ?}\@warning
- {Citation `\@citeb' on page \thepage \space undefined}}%
-\hbox{\csname b@\@citeb\endcsname}}}{#1}}
-\def\@internalciteb{\@ifnextchar [{\@tempswatrue\@citexb}{\@tempswafalse\@citexb[]}}
-
-\def\@newcite#1#2{{#1\if@tempswa, #2\fi)}}
-
-\def\@biblabel#1{\def\citename##1{##1}[#1]\hfill}
-
-%%% More changes made by SMS (originals in latex.tex)
-% Use parentheses instead of square brackets in the text.
-\def\@cite#1#2{({#1\if@tempswa , #2\fi})}
-
-% Don't put a label in the bibliography at all. Just use the unlabeled format
-% instead.
-\def\thebibliography#1{\vskip\parskip%
-\vskip\baselineskip%
-\def\baselinestretch{1}%
-\ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
-\vskip-\parskip%
-\vskip-\baselineskip%
-\section*{References\@mkboth
- {References}{References}}\list
- {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
- \setlength{\itemindent}{-\parindent}}
- \def\newblock{\hskip .11em plus .33em minus -.07em}
- \sloppy\clubpenalty4000\widowpenalty4000
- \sfcode`\.=1000\relax}
-\let\endthebibliography=\endlist
-
-% Allow for a bibliography of sources of attested examples
-\def\thesourcebibliography#1{\vskip\parskip%
-\vskip\baselineskip%
-\def\baselinestretch{1}%
-\ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
-\vskip-\parskip%
-\vskip-\baselineskip%
-\section*{Sources of Attested Examples\@mkboth
- {Sources of Attested Examples}{Sources of Attested Examples}}\list
- {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
- \setlength{\itemindent}{-\parindent}}
- \def\newblock{\hskip .11em plus .33em minus -.07em}
- \sloppy\clubpenalty4000\widowpenalty4000
- \sfcode`\.=1000\relax}
-\let\endthesourcebibliography=\endlist
-
-\def\@lbibitem[#1]#2{\item[]\if@filesw
- { \def\protect##1{\string ##1\space}\immediate
- \write\@auxout{\string\bibcite{#2}{#1}}\fi\ignorespaces}}
-
-\def\@bibitem#1{\item\if@filesw \immediate\write\@auxout
- {\string\bibcite{#1}{\the\c@enumi}}\fi\ignorespaces}
-
-% sections with less space
-\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
- -0.5ex minus -.2ex}{1.5ex plus 0.3ex minus .2ex}{\large\bf\raggedright}}
-\def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
- -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}}
-\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{1.5ex plus
- 0.5ex minus .2ex}{0.5ex plus .2ex}{\normalsize\bf\raggedright}}
-\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
- 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
-\def\subparagraph{\@startsection{subparagraph}{5}{\parindent}{1.5ex plus
- 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
-
-% Footnotes
-\footnotesep 6.65pt %
-\skip\footins 9pt plus 4pt minus 2pt
-\def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt }
-\setcounter{footnote}{0}
-
-% Lists and paragraphs
-\parindent 1em
-\topsep 4pt plus 1pt minus 2pt
-\partopsep 1pt plus 0.5pt minus 0.5pt
-\itemsep 2pt plus 1pt minus 0.5pt
-\parsep 2pt plus 1pt minus 0.5pt
-
-\leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
-\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em \leftmarginvi .5em
-\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
-
-\def\@listi{\leftmargin\leftmargini}
-\def\@listii{\leftmargin\leftmarginii
- \labelwidth\leftmarginii\advance\labelwidth-\labelsep
- \topsep 2pt plus 1pt minus 0.5pt
- \parsep 1pt plus 0.5pt minus 0.5pt
- \itemsep \parsep}
-\def\@listiii{\leftmargin\leftmarginiii
- \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
- \topsep 1pt plus 0.5pt minus 0.5pt
- \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
- \itemsep \topsep}
-\def\@listiv{\leftmargin\leftmarginiv
- \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
-\def\@listv{\leftmargin\leftmarginv
- \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
-\def\@listvi{\leftmargin\leftmarginvi
- \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
-
-\abovedisplayskip 7pt plus2pt minus5pt%
-\belowdisplayskip \abovedisplayskip
-\abovedisplayshortskip 0pt plus3pt%
-\belowdisplayshortskip 4pt plus3pt minus3pt%
-
-% Less leading in most fonts (due to the narrow columns)
-% The choices were between 1-pt and 1.5-pt leading
-\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
-\def\small{\@setsize\small{10pt}\ixpt\@ixpt}
-\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
-\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
-\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
-\def\large{\@setsize\large{14pt}\xiipt\@xiipt}
-\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
-\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
-\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
-\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
-
-% Expanding the titlebox
-\setlength\titlebox{6.5cm}
+% File acl04.sty
+% January 07, 2004
+% Contact: rambow@cs.columbia.edu
+
+% This is the LaTeX style file for ACL 2004. It is identical to the
+% style files for ACL 2003, ACL 2002, ACL 2001, ACL 2000, EACL 95 and EACL
+% 99.
+%
+% -- Roberto Zamparelli, March 26, 2001
+% -- Dekang Lin, October 1, 2001
+
+% This is the LaTeX style file for ACL 2000. It is nearly identical to the
+% style files for EACL 95 and EACL 99. Minor changes include editing the
+% instructions to reflect use of \documentclass rather than \documentstyle
+% and removing the white space before the title on the first page
+% -- John Chen, June 29, 2000
+
+% To convert from submissions prepared using the style file aclsub.sty
+% prepared for the ACL 2000 conference, proceed as follows:
+% 1) Remove submission-specific information: \whichsession, \id,
+% \wordcount, \otherconferences, \area, \keywords
+% 2) \summary should be removed. The summary material should come
+% after \maketitle and should be in the ``abstract'' environment
+% 3) Check all citations. This style should handle citations correctly
+% and also allows multiple citations separated by semicolons.
+% 4) Check figures and examples. Because the final format is double-
+% column, some adjustments may have to be made to fit text in the column
+% or to choose full-width (\figure*} figures.
+% 5) Change the style reference from aclsub to acl2000, and be sure
+% this style file is in your TeX search path
+
+
+% This is the LaTeX style file for EACL-95. It is identical to the
+% style file for ANLP '94 except that the margins are adjusted for A4
+% paper. -- abney 13 Dec 94
+
+% The ANLP '94 style file is a slightly modified
+% version of the style used for AAAI and IJCAI, using some changes
+% prepared by Fernando Pereira and others and some minor changes
+% by Paul Jacobs.
+
+% Papers prepared using the aclsub.sty file and acl.bst bibtex style
+% should be easily converted to final format using this style.
+% (1) Submission information (\wordcount, \subject, and \makeidpage)
+% should be removed.
+% (2) \summary should be removed. The summary material should come
+% after \maketitle and should be in the ``abstract'' environment
+% (between \begin{abstract} and \end{abstract}).
+% (3) Check all citations. This style should handle citations correctly
+% and also allows multiple citations separated by semicolons.
+% (4) Check figures and examples. Because the final format is double-
+% column, some adjustments may have to be made to fit text in the column
+% or to choose full-width (\figure*} figures.
+
+% Place this in a file called aclap.sty in the TeX search path.
+% (Placing it in the same directory as the paper should also work.)
+
+% Prepared by Peter F. Patel-Schneider, liberally using the ideas of
+% other style hackers, including Barbara Beeton.
+% This style is NOT guaranteed to work. It is provided in the hope
+% that it will make the preparation of papers easier.
+%
+% There are undoubtably bugs in this style. If you make bug fixes,
+% improvements, etc. please let me know. My e-mail address is:
+% pfps@research.att.com
+
+% Papers are to be prepared using the ``acl'' bibliography style,
+% as follows:
+% \documentclass[11pt]{article}
+% \usepackage{acl2000}
+% \title{Title}
+% \author{Author 1 \and Author 2 \\ Address line \\ Address line \And
+% Author 3 \\ Address line \\ Address line}
+% \begin{document}
+% ...
+% \bibliography{bibliography-file}
+% \bibliographystyle{acl}
+% \end{document}
+
+% Author information can be set in various styles:
+% For several authors from the same institution:
+% \author{Author 1 \and ... \and Author n \\
+% Address line \\ ... \\ Address line}
+% if the names do not fit well on one line use
+% Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
+% For authors from different institutions:
+% \author{Author 1 \\ Address line \\ ... \\ Address line
+% \And ... \And
+% Author n \\ Address line \\ ... \\ Address line}
+% To start a seperate ``row'' of authors use \AND, as in
+% \author{Author 1 \\ Address line \\ ... \\ Address line
+% \AND
+% Author 2 \\ Address line \\ ... \\ Address line \And
+% Author 3 \\ Address line \\ ... \\ Address line}
+
+% If the title and author information does not fit in the area allocated,
+% place \setlength\titlebox{<new height>} right after
+% \usepackage{acl2000}
+% where <new height> can be something larger than 2.25in
+
+% \typeout{Conference Style for ACL 2000 -- released June 20, 2000}
+\typeout{Conference Style for ACL 2002 -- released April 8, 2002}
+
+% NOTE: Some laser printers have a serious problem printing TeX output.
+% These printing devices, commonly known as ``write-white'' laser
+% printers, tend to make characters too light. To get around this
+% problem, a darker set of fonts must be created for these devices.
+%
+
+% % Physical page layout - slightly modified from IJCAI by pj
+% \setlength\topmargin{0.0in} \setlength\oddsidemargin{-0.0in}
+% \setlength\textheight{9.0in} \setlength\textwidth{6.5in}
+% \setlength\columnsep{0.2in}
+% \newlength\titlebox
+% \setlength\titlebox{2.25in}
+% \setlength\headheight{0pt} \setlength\headsep{0pt}
+% %\setlength\footheight{0pt}
+% \setlength\footskip{0pt}
+% \thispagestyle{empty} \pagestyle{empty}
+% \flushbottom \twocolumn \sloppy
+
+% %% A4 version of page layout
+% \setlength\topmargin{-0.45cm} % changed by Rz -1.4
+% \setlength\oddsidemargin{.8mm} % was -0cm, changed by Rz
+% \setlength\textheight{23.5cm}
+% \setlength\textwidth{15.8cm}
+% \setlength\columnsep{0.6cm}
+% \newlength\titlebox
+% \setlength\titlebox{2.00in}
+% \setlength\headheight{5pt}
+% \setlength\headsep{0pt}
+% %\setlength\footheight{0pt}
+% \setlength\footskip{0pt}
+% \thispagestyle{empty}
+% \pagestyle{empty}
+
+
+% EACL 2003 A4 version of page layout
+\setlength\topmargin{3mm}
+\setlength\oddsidemargin{11mm}
+\setlength\evensidemargin{-7mm}
+\setlength\textheight{230mm}
+\setlength\textwidth{160mm}
+\setlength\columnsep{6mm}
+\newlength\titlebox
+\setlength\titlebox{50mm}
+\setlength\headheight{5pt}
+\setlength\headsep{0pt}
+%\setlength\footheight{0pt}
+\setlength\footskip{0pt}
+\thispagestyle{empty}
+\pagestyle{empty}
+
+
+\flushbottom \twocolumn \sloppy
+
+% We're never going to need a table of contents, so just flush it to
+% save space --- suggested by drstrip@sandia-2
+\def\addcontentsline#1#2#3{}
+
+% Title stuff, taken from deproc.
+\def\maketitle{\par
+ \begingroup
+ \def\thefootnote{\fnsymbol{footnote}}
+ \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}}
+ \twocolumn[\@maketitle] \@thanks
+ \endgroup
+ \setcounter{footnote}{0}
+ \let\maketitle\relax \let\@maketitle\relax
+ \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
+\def\@maketitle{\vbox to \titlebox{\hsize\textwidth
+ \linewidth\hsize \vskip 0.125in minus 0.125in \centering
+ {\Large\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in
+ {\def\and{\unskip\enspace{\rm and}\enspace}%
+ \def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil
+ \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}%
+ \def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup
+ \vskip 0.25in plus 1fil minus 0.125in
+ \hbox to \linewidth\bgroup\large \hfil\hfil
+ \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}
+ \hbox to \linewidth\bgroup\large \hfil\hfil
+ \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf\@author
+ \end{tabular}\hss\egroup
+ \hfil\hfil\egroup}
+ \vskip 0.3in plus 2fil minus 0.1in
+}}
+\renewenvironment{abstract}{\centerline{\large\bf
+ Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
+
+
+% bibliography
+
+\def\thebibliography#1{\section*{References}
+ \global\def\@listi{\leftmargin\leftmargini
+ \labelwidth\leftmargini \advance\labelwidth-\labelsep
+ \topsep 1pt plus 2pt minus 1pt
+ \parsep 0.25ex plus 1pt \itemsep 0.25ex plus 1pt}
+ \list {[\arabic{enumi}]}{\settowidth\labelwidth{[#1]}\leftmargin\labelwidth
+ \advance\leftmargin\labelsep\usecounter{enumi}}
+ \def\newblock{\hskip .11em plus .33em minus -.07em}
+ \sloppy
+ \sfcode`\.=1000\relax}
+
+\def\@up#1{\raise.2ex\hbox{#1}}
+
+% most of cite format is from aclsub.sty by SMS
+
+% don't box citations, separate with ; and a space
+% also, make the penalty between citations negative: a good place to break
+% changed comma back to semicolon pj 2/1/90
+% \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
+% \def\@citea{}\@cite{\@for\@citeb:=#2\do
+% {\@citea\def\@citea{;\penalty\@citeseppen\ }\@ifundefined
+% {b@\@citeb}{{\bf ?}\@warning
+% {Citation `\@citeb' on page \thepage \space undefined}}%
+% {\csname b@\@citeb\endcsname}}}{#1}}
+
+% don't box citations, separate with ; and a space
+% Replaced for multiple citations (pj)
+% don't box citations and also add space, semicolon between multiple citations
+\def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
+ \def\@citea{}\@cite{\@for\@citeb:=#2\do
+ {\@citea\def\@citea{; }\@ifundefined
+ {b@\@citeb}{{\bf ?}\@warning
+ {Citation `\@citeb' on page \thepage \space undefined}}%
+ {\csname b@\@citeb\endcsname}}}{#1}}
+
+% Allow short (name-less) citations, when used in
+% conjunction with a bibliography style that creates labels like
+% \citename{<names>, }<year>
+%
+\let\@internalcite\cite
+\def\cite{\def\citename##1{##1, }\@internalcite}
+\def\shortcite{\def\citename##1{}\@internalcite}
+\def\newcite{\leavevmode\def\citename##1{{##1} (}\@internalciteb}
+
+% Macros for \newcite, which leaves name in running text, and is
+% otherwise like \shortcite.
+\def\@citexb[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
+ \def\@citea{}\@newcite{\@for\@citeb:=#2\do
+ {\@citea\def\@citea{;\penalty\@m\ }\@ifundefined
+ {b@\@citeb}{{\bf ?}\@warning
+ {Citation `\@citeb' on page \thepage \space undefined}}%
+\hbox{\csname b@\@citeb\endcsname}}}{#1}}
+\def\@internalciteb{\@ifnextchar [{\@tempswatrue\@citexb}{\@tempswafalse\@citexb[]}}
+
+\def\@newcite#1#2{{#1\if@tempswa, #2\fi)}}
+
+\def\@biblabel#1{\def\citename##1{##1}[#1]\hfill}
+
+%%% More changes made by SMS (originals in latex.tex)
+% Use parentheses instead of square brackets in the text.
+\def\@cite#1#2{({#1\if@tempswa , #2\fi})}
+
+% Don't put a label in the bibliography at all. Just use the unlabeled format
+% instead.
+\def\thebibliography#1{\vskip\parskip%
+\vskip\baselineskip%
+\def\baselinestretch{1}%
+\ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
+\vskip-\parskip%
+\vskip-\baselineskip%
+\section*{References\@mkboth
+ {References}{References}}\list
+ {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
+ \setlength{\itemindent}{-\parindent}}
+ \def\newblock{\hskip .11em plus .33em minus -.07em}
+ \sloppy\clubpenalty4000\widowpenalty4000
+ \sfcode`\.=1000\relax}
+\let\endthebibliography=\endlist
+
+% Allow for a bibliography of sources of attested examples
+\def\thesourcebibliography#1{\vskip\parskip%
+\vskip\baselineskip%
+\def\baselinestretch{1}%
+\ifx\@currsize\normalsize\@normalsize\else\@currsize\fi%
+\vskip-\parskip%
+\vskip-\baselineskip%
+\section*{Sources of Attested Examples\@mkboth
+ {Sources of Attested Examples}{Sources of Attested Examples}}\list
+ {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
+ \setlength{\itemindent}{-\parindent}}
+ \def\newblock{\hskip .11em plus .33em minus -.07em}
+ \sloppy\clubpenalty4000\widowpenalty4000
+ \sfcode`\.=1000\relax}
+\let\endthesourcebibliography=\endlist
+
+\def\@lbibitem[#1]#2{\item[]\if@filesw
+ { \def\protect##1{\string ##1\space}\immediate
+ \write\@auxout{\string\bibcite{#2}{#1}}\fi\ignorespaces}}
+
+\def\@bibitem#1{\item\if@filesw \immediate\write\@auxout
+ {\string\bibcite{#1}{\the\c@enumi}}\fi\ignorespaces}
+
+% sections with less space
+\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
+ -0.5ex minus -.2ex}{1.5ex plus 0.3ex minus .2ex}{\large\bf\raggedright}}
+\def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
+ -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright}}
+\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{1.5ex plus
+ 0.5ex minus .2ex}{0.5ex plus .2ex}{\normalsize\bf\raggedright}}
+\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
+ 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
+\def\subparagraph{\@startsection{subparagraph}{5}{\parindent}{1.5ex plus
+ 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
+
+% Footnotes
+\footnotesep 6.65pt %
+\skip\footins 9pt plus 4pt minus 2pt
+\def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt }
+\setcounter{footnote}{0}
+
+% Lists and paragraphs
+\parindent 1em
+\topsep 4pt plus 1pt minus 2pt
+\partopsep 1pt plus 0.5pt minus 0.5pt
+\itemsep 2pt plus 1pt minus 0.5pt
+\parsep 2pt plus 1pt minus 0.5pt
+
+\leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
+\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em \leftmarginvi .5em
+\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
+
+\def\@listi{\leftmargin\leftmargini}
+\def\@listii{\leftmargin\leftmarginii
+ \labelwidth\leftmarginii\advance\labelwidth-\labelsep
+ \topsep 2pt plus 1pt minus 0.5pt
+ \parsep 1pt plus 0.5pt minus 0.5pt
+ \itemsep \parsep}
+\def\@listiii{\leftmargin\leftmarginiii
+ \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
+ \topsep 1pt plus 0.5pt minus 0.5pt
+ \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
+ \itemsep \topsep}
+\def\@listiv{\leftmargin\leftmarginiv
+ \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
+\def\@listv{\leftmargin\leftmarginv
+ \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
+\def\@listvi{\leftmargin\leftmarginvi
+ \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
+
+\abovedisplayskip 7pt plus2pt minus5pt%
+\belowdisplayskip \abovedisplayskip
+\abovedisplayshortskip 0pt plus3pt%
+\belowdisplayshortskip 4pt plus3pt minus3pt%
+
+% Less leading in most fonts (due to the narrow columns)
+% The choices were between 1-pt and 1.5-pt leading
+\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
+\def\small{\@setsize\small{10pt}\ixpt\@ixpt}
+\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
+\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
+\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
+\def\large{\@setsize\large{14pt}\xiipt\@xiipt}
+\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
+\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
+\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
+\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
+
+% Expanding the titlebox
+\setlength\titlebox{6.5cm}
View
736 papers/acl-06/colacl06.sty
@@ -1,368 +1,368 @@
-% File colacl06.sty
-% This is the LaTeX style file for COLING/ACL 2006. It is identical to the style file for EACL 2006.
-
-% File eacl2006.sty
-% September 19, 2005
-% Contact: e.agirre@ehu.es or Sergi.Balari@uab.es
-
-% This is the LaTeX style file for EACL 2006. It is nearly identical to the
-% style files for ACL2005, ACL 2002, ACL 2001, ACL 2000, EACL 95 and EACL
-% 99.
-%
-% Changes made include: adapt layout to A4 and centimeters, widden abstract
-
-% This is the LaTeX style file for ACL 2000. It is nearly identical to the
-% style files for EACL 95 and EACL 99. Minor changes include editing the
-% instructions to reflect use of \documentclass rather than \documentstyle
-% and removing the white space before the title on the first page
-% -- John Chen, June 29, 2000
-
-% To convert from submissions prepared using the style file aclsub.sty
-% prepared for the ACL 2000 conference, proceed as follows:
-% 1) Remove submission-specific information: \whichsession, \id,
-% \wordcount, \otherconferences, \area, \keywords
-% 2) \summary should be removed. The summary material should come
-% after \maketitle and should be in the ``abstract'' environment
-% 3) Check all citations. This style should handle citations correctly
-% and also allows multiple citations separated by semicolons.
-% 4) Check figures and examples. Because the final format is double-
-% column, some adjustments may have to be made to fit text in the column
-% or to choose full-width (\figure*} figures.
-% 5) Change the style reference from aclsub to acl2000, and be sure
-% this style file is in your TeX search path
-
-
-% This is the LaTeX style file for EACL-95. It is identical to the
-% style file for ANLP '94 except that the margins are adjusted for A4
-% paper. -- abney 13 Dec 94
-
-% The ANLP '94 style file is a slightly modified
-% version of the style used for AAAI and IJCAI, using some changes
-% prepared by Fernando Pereira and others and some minor changes
-% by Paul Jacobs.
-
-% Papers prepared using the aclsub.sty file and acl.bst bibtex style
-% should be easily converted to final format using this style.
-% (1) Submission information (\wordcount, \subject, and \makeidpage)
-% should be removed.
-% (2) \summary should be removed. The summary material should come
-% after \maketitle and should be in the ``abstract'' environment
-% (between \begin{abstract} and \end{abstract}).
-% (3) Check all citations. This style should handle citations correctly
-% and also allows multiple citations separated by semicolons.
-% (4) Check figures and examples. Because the final format is double-
-% column, some adjustments may have to be made to fit text in the column
-% or to choose full-width (\figure*} figures.
-
-% Place this in a file called aclap.sty in the TeX search path.
-% (Placing it in the same directory as the paper should also work.)
-
-% Prepared by Peter F. Patel-Schneider, liberally using the ideas of
-% other style hackers, including Barbara Beeton.
-% This style is NOT guaranteed to work. It is provided in the hope
-% that it will make the preparation of papers easier.
-%
-% There are undoubtably bugs in this style. If you make bug fixes,
-% improvements, etc. please let me know. My e-mail address is:
-% pfps@research.att.com
-
-% Papers are to be prepared using the ``acl'' bibliography style,
-% as follows:
-% \documentclass[11pt]{article}
-% \usepackage{acl2000}
-% \title{Title}
-% \author{Author 1 \and Author 2 \\ Address line \\ Address line \And
-% Author 3 \\ Address line \\ Address line}
-% \begin{document}
-% ...
-% \bibliography{bibliography-file}
-% \bibliographystyle{acl}
-% \end{document}
-
-% Author information can be set in various styles:
-% For several authors from the same institution:
-% \author{Author 1 \and ... \and Author n \\
-% Address line \\ ... \\ Address line}
-% if the names do not fit well on one line use
-% Author 1 \\ {\bf Author 2} \\ ... \\ {\bf Author n} \\
-% For authors from different institutions:
-% \author{Author 1 \\ Address line \\ ... \\ Address line
-% \And ... \And
-% Author n \\ Address line \\ ... \\ Address line}
-% To start a seperate ``row'' of authors use \AND, as in
-% \author{Author 1 \\ Address line \\ ... \\ Address line
-% \AND
-% Author 2 \\ Address line \\ ... \\ Address line \And
-% Author 3 \\ Address line \\ ... \\ Address line}
-
-% If the title and author information does not fit in the area allocated,
-% place \setlength\titlebox{<new height>} right after
-% \usepackage{acl2000}
-% where <new height> can be something larger than 2.25in
-
-% \typeout{Conference Style for ACL 2000 -- released June 20, 2000}
-\typeout{Conference Style for ACL 2005 -- released Octobe 11, 2004}
-
-% NOTE: Some laser printers have a serious problem printing TeX output.
-% These printing devices, commonly known as ``write-white'' laser
-% printers, tend to make characters too light. To get around this
-% problem, a darker set of fonts must be created for these devices.
-%
-
-%% % Physical page layout - slightly modified from IJCAI by pj
-%% \setlength\topmargin{0.0in} \setlength\oddsidemargin{-0.0in}
-%% \setlength\textheight{9.0in} \setlength\textwidth{6.5in}
-%% \setlength\columnsep{0.2in}
-%% \newlength\titlebox
-%% \setlength\titlebox{2.25in}
-%% \setlength\headheight{0pt} \setlength\headsep{0pt}
-%% %\setlength\footheight{0pt}
-%% \setlength\footskip{0pt}
-%% \thispagestyle{empty} \pagestyle{empty}
-%% \flushbottom \twocolumn \sloppy
-
-%% Original A4 version of page layout
-%% \setlength\topmargin{-0.45cm} % changed by Rz -1.4
-%% \setlength\oddsidemargin{.8mm} % was -0cm, changed by Rz
-%% \setlength\textheight{23.5cm}
-%% \setlength\textwidth{15.8cm}
-%% \setlength\columnsep{0.6cm}
-%% \newlength\titlebox
-%% \setlength\titlebox{2.00in}
-%% \setlength\headheight{5pt}
-%% \setlength\headsep{0pt}
-%% \setlength\footheight{0pt}
-%% \setlength\footskip{0pt}
-%% \thispagestyle{empty}
-%% \pagestyle{empty}
-
-% A4 modified by Eneko
-\setlength{\paperwidth}{21cm} % A4
-\setlength{\paperheight}{29.7cm}% A4
-\setlength\topmargin{-0.5cm}
-\setlength\oddsidemargin{0cm}
-\setlength\textheight{24.7cm}
-\setlength\textwidth{16.0cm}
-\setlength\columnsep{0.6cm}
-\newlength\titlebox
-\setlength\titlebox{2.00in}
-\setlength\headheight{5pt}
-\setlength\headsep{0pt}
-\thispagestyle{empty}
-\pagestyle{empty}
-
-
-\flushbottom \twocolumn \sloppy
-
-% We're never going to need a table of contents, so just flush it to
-% save space --- suggested by drstrip@sandia-2
-\def\addcontentsline#1#2#3{}
-
-% Title stuff, taken from deproc.
-\def\maketitle{\par
- \begingroup
- \def\thefootnote{\fnsymbol{footnote}}
- \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}}
- \twocolumn[\@maketitle] \@thanks
- \endgroup
- \setcounter{footnote}{0}
- \let\maketitle\relax \let\@maketitle\relax
- \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
-\def\@maketitle{\vbox to \titlebox{\hsize\textwidth
- \linewidth\hsize \vskip 0.125in minus 0.125in \centering
- {\Large\bf \@title \par} \vskip 0.2in plus 1fil minus 0.1in
- {\def\and{\unskip\enspace{\rm and}\enspace}%
- \def\And{\end{tabular}\hss \egroup \hskip 1in plus 2fil
- \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}%
- \def\AND{\end{tabular}\hss\egroup \hfil\hfil\egroup
- \vskip 0.25in plus 1fil minus 0.125in
- \hbox to \linewidth\bgroup\large \hfil\hfil
- \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf}
- \hbox to \linewidth\bgroup\large \hfil\hfil
- \hbox to 0pt\bgroup\hss \begin{tabular}[t]{c}\bf\@author
- \end{tabular}\hss\egroup
- \hfil\hfil\egroup}
- \vskip 0.3in plus 2fil minus 0.1in
-}}
-
-% margins for abstract
-\renewenvironment{abstract}%
- {\centerline{\large\bf Abstract}%
- \begin{list}{}%
- {\setlength{\rightmargin}{0.6cm}%
- \setlength{\leftmargin}{0.6cm}}%
- \item[]\ignorespaces}%
- {\unskip\end{list}}
-
-%\renewenvironment{abstract}{\centerline{\large\bf
-% Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
-
-
-% bibliography
-
-\def\thebibliography#1{\section*{References}
- \global\def\@listi{\leftmargin\leftmargini
- \labelwidth\leftmargini \advance\labelwidth-\labelsep
- \topsep 1pt plus 2pt minus 1pt
- \parsep 0.25ex plus 1pt \itemsep 0.25ex plus 1pt}
- \list {[\arabic{enumi}]}{\settowidth\labelwidth{[#1]}\leftmargin\labelwidth
- \advance\leftmargin\labelsep\usecounter{enumi}}
- \def\newblock{\hskip .11em plus .33em minus -.07em}
- \sloppy
- \sfcode`\.=1000\relax}
-
-\def\@up#1{\raise.2ex\hbox{#1}}
-
-% most of cite format is from aclsub.sty by SMS
-
-% don't box citations, separate with ; and a space
-% also, make the penalty between citations negative: a good place to break
-% changed comma back to semicolon pj 2/1/90
-% \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
-% \def\@citea{}\@cite{\@for\@citeb:=#2\do
-% {\@citea\def\@citea{;\penalty\@citeseppen\ }\@ifundefined
-% {b@\@citeb}{{\bf ?}\@warning
-% {Citation `\@citeb' on page \thepage \space undefined}}%
-% {\csname b@\@citeb\endcsname}}}{#1}}
-
-% don't box citations, separate with ; and a space
-% Replaced for multiple citations (pj)
-% don't box citations and also add space, semicolon between multiple citations
-\def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
- \def\@citea{}\@cite{\@for\@citeb:=#2\do
- {\@citea\def\@citea{; }\@ifundefined
- {b@\@citeb}{{\bf ?}\@warning
- {Citation `\@citeb' on page \thepage \space undefined}}%
- {\csname b@\@citeb\endcsname}}}{#1}}
-
-% Allow short (name-less) citations, when used in
-% conjunction with a bibliography style that creates labels like
-% \citename{<names>, }<year>
-%
-\let\@internalcite\cite
-\def\cite{\def\citename##1{##1, }\@internalcite}
-\def\shortcite{\def\citename##1{}\@internalcite}
-\def\newcite{\def\citename##1{{\frenchspacing##1} (}\@internalciteb}
-
-% Macros for \newcite, which leaves name in running text, and is
-% otherwise like \shortcite.
-\def\@citexb[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
- \def\@citea{}\@newcite{\@for\@citeb:=#2\do
- {\@citea\def\@citea{;\penalty\@m\ }\@ifundefined