add docs to main

phrocker · Feb 24, 2021 · e7959b0 · e7959b0
1 parent 5b59525
commit e7959b0
Show file tree

Hide file tree

Showing 7 changed files with 2,208 additions and 54 deletions.
diff --git a/docs/asynciterator.rst b/docs/asynciterator.rst
@@ -0,0 +1,163 @@
+.. image:: https://camo.githubusercontent.com/dbf39cef1a973d8741437693e96b59e31d9e3754/68747470733a2f2f7777772e736861726b626974652e696f2f77702d636f6e74656e742f75706c6f6164732f323031372f30322f736861726b626974652e6a7067
+
+Async Example
+==================
+
+Below is an example client based on pysharkbite. We'll step through important aspects followed by the entirety of the code 
+at the end.
+
+The first interesting piece of code that we come across is printasync, which is intended to asynchronously
+print the rows from all keys. This coroutine will be used later to loop through the asynchronous iterator.
+
+.. code-block:: python
+
+    async def printasync(iter):
+        async for keyvalue in iter:
+            key = keyvalue.getKey()
+            print(key.getRow())
+
+After writing data the example creates a scanner. This scanner creates a range from 'row' to 'row3' then 
+creates an async event loop to call the coroutine printasync. As stated above this enables us to asynchronously
+print the rows.
+
+.. code-block:: python
+ 
+    scanner = table_operations.createScanner(auths, 2)
+    
+    range = pysharkbite.Range("row"",True,"row3"",False)
+    
+    scanner.addRange( range )
+    
+    resultset = scanner.getResultSet()
+    
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(printasync(resultset))
+
+
+.. code-block:: python
+
+    #!/usr/bin/python
+    # Licensed to the Apache Software Foundation (ASF) under one
+    # or more contributor license agreements.  See the NOTICE file
+    # distributed with this work for additional information
+    # regarding copyright ownership.  The ASF licenses this file
+    # to you under the Apache License, Version 2.0 (the
+    # "License"); you may not use this file except in compliance
+    # with the License.  You may obtain a copy of the License at
+    #
+    #   http://www.apache.org/licenses/LICENSE-2.0
+    #
+    # Unless required by applicable law or agreed to in writing,
+    # software distributed under the License is distributed on an
+    # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    # KIND, either express or implied.  See the License for the
+    # specific language governing permissions and limitations
+    # under the License.
+    from ctypes import cdll
+    from argparse import ArgumentParser
+    from ctypes import cdll
+    import ctypes
+    import traceback
+    import time
+    import asyncio
+
+
+    """
+
+    This is an Example of using the Python connectors. The example will accept user input
+    create a table writing arbitrary information to it via the BatchWriter and scanner will put the written data      
+        
+                
+    """
+
+    parser = ArgumentParser(description="This is an Apache Accummulo Python connector")
+
+    parser.add_argument("-i", "--instance", dest="instance",
+                        help="Apache Accumulo Instance Name", required=True)
+    parser.add_argument("-z", "--zookeepers", dest="zookeepers",
+                        help="Comma Separated Zookeeper List", required=True)
+    parser.add_argument("-u", "--username", dest="username",
+                        help="User to access Apache Accumulo", required=True)
+    parser.add_argument("-p", "--password", dest="password",
+                        help="Password to access Apache Accumulo. May also be supplied at the command line")
+    parser.add_argument("-t", "--table", dest="table",
+                        help="Table to create/update")
+    args = parser.parse_args()
+
+    password = args.password
+    table = args.table
+
+    async def printasync(iter):
+        async for keyvalue in iter:
+            key = keyvalue.getKey()
+            print(key.getRow())
+
+    if not password:
+        print("Please enter your password")
+        password = input()
+        
+    if not table:
+        table = "blahblahd"
+
+    import pysharkbite
+
+    configuration = pysharkbite.Configuration()
+
+    zk = pysharkbite.ZookeeperInstance(args.instance, args.zookeepers, 1000, configuration)
+
+    user = pysharkbite.AuthInfo(args.username, password, zk.getInstanceId()) 
+
+    try:
+        connector = pysharkbite.AccumuloConnector(user, zk)
+
+
+        table_operations = connector.tableOps(table)
+
+        if not table_operations.exists(False):
+            print ("Creating table " + table)
+            table_operations.create(False)  
+        else:
+            print (table + " already exists, so not creating it")  
+        
+        
+        auths = pysharkbite.Authorizations()
+        
+        """ Add authorizations """ 
+        """ mutation.put("cf","cq","cv",1569786960) """
+        
+        writer = table_operations.createWriter(auths, 10)
+        
+        mutation = pysharkbite.Mutation("row2");    
+        
+        mutation.put("cf","cq","",1569786960, "value")
+        mutation.put("cf2","cq2","",1569786960, "value2")
+        """ no value """
+        mutation.put("cf3","cq3","",1569786960, "") 
+        
+        writer.addMutation( mutation )
+        
+        writer.close()
+        
+        time.sleep(2)
+        
+        """ auths.addAuthorization("cv") """
+        
+        scanner = table_operations.createScanner(auths, 2)
+        
+        range = pysharkbite.Range("row"",True,"row3"",False)
+        
+        scanner.addRange( range )
+        
+        resultset = scanner.getResultSet()
+        
+        loop = asyncio.get_event_loop()
+        loop.run_until_complete(printasync(resultset))
+            
+        
+        """ delete your table if user did not create temp """
+        if not args.table:
+            table_operations.remove()
+        
+    except RuntimeError as e:
+        traceback.print_exc()
+        print("Oops, error caused: " + str(e))
diff --git a/docs/conf.py b/docs/conf.py
@@ -24,9 +24,9 @@
 author = 'Marc Parisi'
 
 # The short X.Y version
-version = ''
+version = '0.7.2'
 # The full version, including alpha/beta/rc tags
-release = ''
+release = '0.7.2'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/hdfsclient.rst b/docs/hdfsclient.rst
@@ -0,0 +1,20 @@
+.. image:: https://camo.githubusercontent.com/dbf39cef1a973d8741437693e96b59e31d9e3754/68747470733a2f2f7777772e736861726b626974652e696f2f77702d636f6e74656e742f75706c6f6164732f323031372f30322f736861726b626974652e6a7067
+
+HDFS Client
+==================
+
+The `hdfs client  <https://docs.sharkbite.io/en/latest/sharkbitedocs.html#pysharkbite.Hdfs>`_ is nearly full client. It lacks 
+features found in the C++ client. These will be added over time. Please visit the link, above, to find the API of what is currently supported
+
+An example usage of these functions is below. Note that if Opening RFiles in pysharkbite, you must specify the full path including
+the hdfs protocol if it is located on HDFS. This will open a full HDFS client to access these files.
+
+.. code-block:: python
+
+    import pysharkbite
+
+    hdfs = pysharkbite.Hdfs("hdfs://namenode:8020",8020);
+
+    hdfs.mkdir("/directoryA/directoryB");
+
+    hdfs.list("/");
diff --git a/docs/index.rst b/docs/index.rst
@@ -6,11 +6,21 @@
 Welcome to Sharkbite's documentation!
 =====================================
 
+.. image:: https://camo.githubusercontent.com/dbf39cef1a973d8741437693e96b59e31d9e3754/68747470733a2f2f7777772e736861726b626974652e696f2f77702d636f6e74656e742f75706c6f6164732f323031372f30322f736861726b626974652e6a7067
+
 .. toctree::
    :maxdepth: 2
 
    intro
-
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Examples
+
+   asynciterator
+   hdfsclient
+   stats
+
 .. toctree::
    :maxdepth: 2
    :caption: API Documentation

diff --git a/docs/intro.rst b/docs/intro.rst
@@ -3,21 +3,17 @@
 About Sharkbite
 ==================
 **Sharkbite** is an HDFS and native client for key/value stores. With 
-initial support for [Apache Accumulo][accumulo], the design can and has been used to support other key/value
-stores. Development began in 2014 and has  slowly evolved. there is no structural specificity to Accumulo
+initial support for Apache Accumulo Accumulo, the design can and has been used to support other key/value
+stores. Development began in 2014 and has slowly evolved. there is no structural specificity to Accumulo
 despite being the base implementation. Despite this the examples below will look very much like Accumulo due to aliasing. This is intentional.
 
-Capabilities That will be supported in V0.6 : 
+Capabilities That will be supported in V1.0 : 
 
  * Works with Accumulo 1.6.x, 1.7.x, 1.8.x, 1.9.x and 2.x
  * **Read/Write** : Reading and writing data to Accumulo is currently supported.
  * **Table Operations** : Most table operations are currently supported. This includes the fate operations that the normal Accumulo client performs.
- * **Security Operations** : Security operations aren't all implemented, but you should be able to add users, change authorizations, passwords, and remove users.
+ * **Security Operations** : Most security operations are implemented. Please let us know of any gaps.
 
-Current Master progress 
-  * Development is focusing on abstracting out 2.x changes for Apache Accumulo.
-  * Help is desired in abstracting these thrift changes.  
-
 About the name
 **************
 **Sharkbite's** name originated from design as a connector that abstracted components in which we tightly
@@ -30,36 +26,59 @@ Installing
 **********
 This python client can be installed via `pip install sharkbite`
 
-[A Python example](https://github.com/phrocker/sharkbite/blob/master/examples/pythonexample.py) is included. This is your primary example of the Python bound sharkbite
+`A Python example <https://github.com/phrocker/sharkbite/blob/master/examples/pythonexample.py>`_ is included. This is your primary example of the Python bound sharkbite
 library.
 
-Sharkbite now supports async iteration [A simple example](https://github.com/phrocker/sharkbite/blob/master/examples/asyncexample.py) is provided. 
+**Sharkbite** supports async iteration `A simple example <https://github.com/phrocker/sharkbite/blob/master/examples/asyncexample.py>`_ is provided. 
+
 Features
 ********
 
+HDFS Client
+************
+
+**Sharkbite** supports a limited HDFS client. As this functionality grows so will the capabilities. Version 0.7 will support a complete
+HDFS client. Since Sharkbite it built as a python bindings around a C++ Client, the python client will mature slightly behind the C++ client,
+hence the delta with building this into V 0.7
+
+Version Detection
+*****************
+
+**Sharkbite** detects the version of Apache Accumulo. Therefore you will be able to simply create a connector to the zookeeper instance.
+
 
 Hedged Reads
 ************
 
-Sharkbite supports hedged reads ( executing scans against RFiles when they can be accessed ) concurrently with 
+**Sharkbite** supports hedged reads ( executing scans against RFiles when they can be accessed ) concurrently with 
 Accumulo RPC scans. The first executor to complete will return your results. This feature is in beta and not suggested
 for production environments.
 
 Enable it with the following option:
 
 .. code-block:: python
-	import pysharkbite as sharkbite	
+
+	import pysharkbite as sharkbite
+
 	connector = sharkbite.AccumuloConnector(user, zk)
-	table_operations = connector.tableOps(table)  
-	scanner = table_operations.createScanner(auths, 2)
-	range = sharkbite.Range("myrow")
-	scanner.addRange( range )
-	### enable the beta option of hedged reads
-	scanner.setOption( sharkbite.ScannerOptions.HedgedReads )
-	resultset = scanner.getResultSet()
-	for keyvalue in resultset:
-	    key = keyvalue.getKey()
-	    value = keyvalue.getValue()
+
+    table_operations = connector.tableOps(table)  
+	
+ 	scanner = table_operations.createScanner(auths, 2)
+    
+    range = sharkbite.Range("myrow")
+    
+    scanner.addRange( range )
+    
+    ### enable the beta option of hedged reads
+    
+    scanner.setOption( sharkbite.ScannerOptions.HedgedReads )
+    
+    resultset = scanner.getResultSet()
+    
+    for keyvalue in resultset:
+        key = keyvalue.getKey()
+        value = keyvalue.getValue()
 	
 
 
@@ -73,43 +92,49 @@ Iterators can be defined as single function lambdas or by implementing the seek
 
 The first example implements the seek and onNext methods. seek is optional if you don't wish to adjust the range. Once keys are being iterated you may get the top key. You may call 
 iterator.next() after or the infrastructure will do that for you. 
+
 .. code-block:: python
-  class myIterator: 
-    def seek(iterator,soughtRange):
-      range = Range("a")
-      iterator.seek(range)
-
-
-    def onNext(iterator):
-      if (iterator.hasTop()):
-      	kv = KeyValue()
-    	  key = iterator.getTopKey()
-  	    cf = key.getColumnFamily()
-  	    value = iterator.getTopValue()
-  	    key.setColumnFamily("oh changed " + cf)
-  	    iterator.next()
-  	    return KeyValue(key,value)
-      else: 
-        return None
+
+	class myIterator: 
+	  def seek(iterator,soughtRange):
+	    range = Range("a")
+	    iterator.seek(range)
+	
+	
+	  def onNext(iterator):
+	    if (iterator.hasTop()):
+	    	kv = KeyValue()
+	  	  key = iterator.getTopKey()
+	  	  cf = key.getColumnFamily()
+	  	  value = iterator.getTopValue()
+	  	  key.setColumnFamily("oh changed " + cf)
+	  	  iterator.next()
+	  	  return KeyValue(key,value)
+	    else: 
+	      return None
+
 
 If this is defined in a separate file, you may use it with the following code snippet
 
 .. code-block:: python
-  with open('test.iter', 'r') as file:
-  iterator = file.read()
-  ## name, iterator text, priority
-  iterator = pysharkbite.PythonIterator("PythonIterator",iteratortext,100)
-  scanner.addIterator(iterator)    
+
+	with open('test.iter', 'r') as file:
+	 iterator = file.read()
+	## name, iterator text, priority
+	 iterator = pysharkbite.PythonIterator("PythonIterator",iteratortext,100)
+	 scanner.addIterator(iterator)    
 
 Alternative you may use lambdas. The lambda you provide will be passed the KeyValue ( getKey() and getValue() return the constituent parts). A partial code example of setting it up is below.
 You may return a Key or KeyValue object. If you return the former an empty value will be return ed.
 
 .. code-block:: python
-## define only the name and priority 
-  iterator = pysharkbite.PythonIterator("PythonIterator",100)
-  ## define a lambda to ajust the column family.
-  iterator = iterator.onNext("lambda x : Key( x.getKey().getRow(), 'new cf', x.getKey().getColumnQualifier()) ")
-  scanner.addIterator(iterator)
+
+	## define only the name and priority 
+	iterator = pysharkbite.PythonIterator("PythonIterator",100)
+	## define a lambda to ajust the column family.
+	iterator = iterator.onNext("lambda x : Key( x.getKey().getRow(), 'new cf', x.getKey().getColumnQualifier()) ")
+	
+	scanner.addIterator(iterator)
 	
 You may either define a python iterator as a text implementation or a lambda. Both cannot be used simulaneously.