Permalink
Browse files

initial commit

  • Loading branch information...
jt6211 committed Mar 16, 2013
0 parents commit 82dd73c0c01a4548c91a22d7babefd124b59227f
@@ -0,0 +1,2 @@
+*.pyc
+
716 LICENSE

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1,9 @@
+pyaccumulo
+==========
+
+A python client library for Apache Accumulo
+
+Licensed under the Apache 2.0 License
+
+This is still a work in progress.
+
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pyaccumulo import Accumulo, Mutation, Range
+from pyaccumulo.iterators import *
+
+conn = Accumulo()
+
+table = "analytics"
+
+if conn.table_exists(table):
+ conn.delete_table(table)
+conn.create_table(table)
+
+summing = SummingCombiner(priority=10)
+summing.add_column("sum")
+summing.add_column("count")
+summing.attach(conn, table)
+
+sumarray = SummingArrayCombiner(priority=11)
+sumarray.add_column("histo")
+sumarray.attach(conn, table)
+
+mincom = MinCombiner(priority=12)
+mincom.add_column("min")
+mincom.attach(conn, table)
+
+maxcom = MaxCombiner(priority=13)
+maxcom.add_column("max")
+maxcom.attach(conn, table)
+
+wr = conn.create_batch_writer(table)
+
+for num in range(0, 1000):
+ m = Mutation("row")
+ m.put(cf="sum", cq="cq", val="%d"%num)
+ m.put(cf="count", cq="cq", val="%d"%1)
+ m.put(cf="min", cq="cq", val="%d"%num)
+ m.put(cf="max", cq="cq", val="%d"%num)
+ m.put(cf="histo", cq="cq", val=",".join( [str(x) for x in [1,2,3,4,5,6,7,8,9]]))
+
+ wr.add_mutation(m)
+wr.close()
+
+for e in conn.scan(table):
+ print e
+
+conn.close()
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pyaccumulo import Accumulo, Mutation, Range
+from pyaccumulo.iterators import *
+
+from proxy.ttypes import IteratorSetting, IteratorScope
+from util import hashcode
+import hashlib, re
+
+conn = Accumulo()
+
+table = "doc_search"
+if conn.table_exists(table):
+ conn.delete_table(table)
+conn.create_table(table)
+
+wr = conn.create_batch_writer(table)
+
+license_file = "LICENSE"
+linenum = 0
+
+with file(license_file) as infile:
+ for line in infile:
+ linenum += 1
+ line = line.strip()
+ uuid = str(linenum)
+
+ m = Mutation("s%02d"% ((hashcode(uuid) & 0x0ffffffff)%4))
+ m.put(cf="e\0license", cq=uuid, val=line)
+ for tok in set(re.split('[^\w.]+', line.lower())):
+ m.put(cf="i", cq="%s\0license\0%s\0info"%(tok, uuid), val="")
+ wr.add_mutation(m)
+wr.close()
+
+for e in conn.batch_scan(table, scanranges=[Range(srow="s0", erow="s1")], iterators=[IndexedDocIterator(priority=21, terms=["derived", "from"])]):
+ print e
+conn.close()
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pyaccumulo import Accumulo, Mutation, Range
+from pyaccumulo.iterators import *
+
+from proxy.ttypes import IteratorSetting, IteratorScope
+from util import hashcode
+import hashlib, re
+
+conn = Accumulo()
+
+table = "search"
+if conn.table_exists(table):
+ conn.delete_table(table)
+conn.create_table(table)
+
+wr = conn.create_batch_writer(table)
+
+license_file = "LICENSE"
+linenum = 0
+
+with file(license_file) as infile:
+ for line in infile:
+ linenum += 1
+ line = line.strip()
+ uuid = str(linenum)
+
+ m = Mutation(uuid)
+ m.put(cf="e", cq="", val=line)
+ wr.add_mutation(m)
+
+ m = Mutation("s%02d"% ((hashcode(uuid) & 0x0ffffffff)%4))
+ for tok in set(re.split('[\W]+', line.lower())):
+ m.put(tok, cq=uuid, val="")
+ wr.add_mutation(m)
+wr.close()
+
+uuids = []
+for e in conn.batch_scan(table, scanranges=[Range(srow="s0", erow="s1")], iterators=[IntersectingIterator(priority=21, terms=["software", "source", "code"])]):
+ uuids.append(e.cq)
+
+for doc in conn.batch_scan(table, scanranges=[Range(srow=uuid, erow=uuid) for uuid in uuids]):
+ print doc
+
+conn.close()
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pyaccumulo import Accumulo, Mutation, Range
+from pyaccumulo.iterators import *
+
+from proxy.ttypes import IteratorSetting, IteratorScope
+from util import hashcode
+import hashlib, re
+
+conn = Accumulo()
+
+table = "regexes"
+if conn.table_exists(table):
+ conn.delete_table(table)
+conn.create_table(table)
+
+wr = conn.create_batch_writer(table)
+
+license_file = "LICENSE"
+linenum = 0
+
+with file(license_file) as infile:
+ for line in infile:
+ linenum += 1
+
+ m = Mutation(str(linenum))
+ m.put(cf="e", cq="", val=line.strip())
+ wr.add_mutation(m)
+wr.close()
+
+regex1 = RegExFilter(priority=21, val_regex=".*stated.*", match_substring=True, name="RegExFilter1")
+regex2 = RegExFilter(priority=22, val_regex='.*patent', match_substring=True, name="RegExFilter2")
+regex3 = RegExFilter(priority=23, val_regex='have made', match_substring=True, name="RegExFilter3")
+
+for e in conn.batch_scan(table, cols=[["e"]], iterators=[regex1, regex2, regex3]):
+ print e
+
+conn.close()
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pyaccumulo import Accumulo, Mutation, Range
+
+table = "pythontest"
+
+conn = Accumulo()
+
+if conn.table_exists(table):
+ conn.delete_table(table)
+
+conn.create_table(table)
+wr = conn.create_batch_writer(table)
+
+print "Ingesting some data ..."
+for num in range(1, 100):
+ label = '%03d'%num
+ mut = Mutation('r_%s'%label)
+ mut.put(cf='cf_%s'%label, cq='cq1', val='value_%s'%label)
+ mut.put(cf='cf_%s'%label, cq='cq2', val='value_%s'%label)
+ wr.add_mutation(mut)
+wr.close()
+
+
+print "Rows 001 through 003 ..."
+for entry in conn.scan(table, scanrange=Range(srow='r_001', erow='r_003'), cols=[]):
+ print entry
+
+print "Rows 001 and 011 ..."
+for entry in conn.batch_scan(table, scanranges=[Range(srow='r_001', erow='r_001'), Range(srow='r_011', erow='r_011')]):
+ print entry
+
+conn.close()
Oops, something went wrong.

0 comments on commit 82dd73c

Please sign in to comment.