# 用python操作HBase


使用happybase包来实现用python操作hbase

happybase主页：http://happybase.readthedocs.io/en/latest/user.html

安装:
pip install thrift happybase

验证： python -c "import happybase"

如果没有错误则安装成功 如果遇到thriftPy does not support generating module with path in protocol 'c'问题

请参考： http://blog.csdn.net/sinolover/article/details/77714648

hbase表结构参考：http://blog.csdn.net/cnbird2008/article/details/9151585

外网访问时，host=112.17.80.185，port=8000
即用代码：conn = happybase.Connection('112.17.80.185',port=8000)连接

In [89]:
import happybase

In [90]:
host = '192.168.1.250'

# 建立连接，1分钟之内无交互服务端会自动断开连接

In [91]:
conn = happybase.Connection(host)

# 创建表格

In [95]:
conn.disable_table('mytable')

In [96]:
conn.delete_table('mytable')

In [97]:
conn.create_table('mytable',{'cf1':dict(max_versions=10),
                            'cf2':dict(max_versions=1,block_cache_enabled=False),
                            'cf3':dict(),})

In [98]:
table=conn.table('mytable')

In [99]:
print(conn.tables())

[b'emp', b'myproject_mytable', b'mytable']


## 为了防止取同名的表造成名字冲突，可以赋值给table_prefix,生成的表名为prefix_tablename

In [100]:
print(conn.tables())

[b'emp', b'myproject_mytable', b'mytable']


In [101]:
conn = happybase.Connection(host,table_prefix='myproject')

In [102]:
print(conn.tables())

[b'mytable']


In [107]:
#conn.disable_table('mytable')

In [108]:
#conn.delete_table('mytable')#conn.is_table_enabled('mytable') 

In [105]:
conn.create_table('mytable',{'cf1':dict(),})

In [106]:
print(conn.tables())

[b'mytable']


# 向表格添加数据

In [109]:
cloth_data = {'cf1:content':u'牛仔裤', 'cf1:price':'299', 'cf1:rating':'98%'}
hat_data = {'cf1:content':u'鸭舌帽', 'cf1:price':'88', 'cf1:rating':'99%'}
shoe_data = {'cf1:content':u'耐克', 'cf1:price':'988', 'cf1:rating':'100%'}
author_data = {'cf2:name':u'xiebing','cf2:date':'2017-03-09'}

table.put(row='www.test1.com',data=cloth_data)
table.put(row='www.test2.com',data=hat_data)
table.put(row='www.test3.com',data=shoe_data)
table.put(row='www.test4.com',data=author_data)

## 以批处理的方式向表格添加数据，数据会先缓存在内存中，到了batch_size的阈值在写入hbase,效率更高，而且能夹杂删除操作


In [110]:
with table.batch(batch_size=10) as bat:
    bat.put('www.test5.com',{'cf1:price':'999','cf2:title':'Hello Python','cf2:length':'34','cf3:code':'A43'})
    bat.put('www.test6.com',{'cf1:content':u'剃须刀','cf1:price':'168','cf1:rating':'97%'})
    bat.put('www.test7.com',{'cf3:function':'print'})
    bat.delete('www.test1.com')

# 扫描表格

In [111]:
for key,value in table.scan():
    print(key,value)

b'www.test2.com' {b'cf1:content': b'\xe9\xb8\xad\xe8\x88\x8c\xe5\xb8\xbd', b'cf1:price': b'88', b'cf1:rating': b'99%'}
b'www.test3.com' {b'cf1:content': b'\xe8\x80\x90\xe5\x85\x8b', b'cf1:price': b'988', b'cf1:rating': b'100%'}
b'www.test4.com' {b'cf2:date': b'2017-03-09', b'cf2:name': b'xiebing'}
b'www.test5.com' {b'cf1:price': b'999', b'cf2:length': b'34', b'cf2:title': b'Hello Python', b'cf3:code': b'A43'}
b'www.test6.com' {b'cf1:content': b'\xe5\x89\x83\xe9\xa1\xbb\xe5\x88\x80', b'cf1:price': b'168', b'cf1:rating': b'97%'}
b'www.test7.com' {b'cf3:function': b'print'}


# 定义起止位置扫描表格

In [112]:
for key,value in table.scan(row_start='www.test3.com'):
    print(key,value)

b'www.test3.com' {b'cf1:content': b'\xe8\x80\x90\xe5\x85\x8b', b'cf1:price': b'988', b'cf1:rating': b'100%'}
b'www.test4.com' {b'cf2:date': b'2017-03-09', b'cf2:name': b'xiebing'}
b'www.test5.com' {b'cf1:price': b'999', b'cf2:length': b'34', b'cf2:title': b'Hello Python', b'cf3:code': b'A43'}
b'www.test6.com' {b'cf1:content': b'\xe5\x89\x83\xe9\xa1\xbb\xe5\x88\x80', b'cf1:price': b'168', b'cf1:rating': b'97%'}
b'www.test7.com' {b'cf3:function': b'print'}


In [113]:
for key,value in table.scan(row_stop='www.test6.com'):
    print(key,value)

b'www.test2.com' {b'cf1:content': b'\xe9\xb8\xad\xe8\x88\x8c\xe5\xb8\xbd', b'cf1:price': b'88', b'cf1:rating': b'99%'}
b'www.test3.com' {b'cf1:content': b'\xe8\x80\x90\xe5\x85\x8b', b'cf1:price': b'988', b'cf1:rating': b'100%'}
b'www.test4.com' {b'cf2:date': b'2017-03-09', b'cf2:name': b'xiebing'}
b'www.test5.com' {b'cf1:price': b'999', b'cf2:length': b'34', b'cf2:title': b'Hello Python', b'cf3:code': b'A43'}


In [114]:
for key,value in table.scan(row_start='www.test3.com',row_stop='www.test6'):
    print(key,value)

b'www.test3.com' {b'cf1:content': b'\xe8\x80\x90\xe5\x85\x8b', b'cf1:price': b'988', b'cf1:rating': b'100%'}
b'www.test4.com' {b'cf2:date': b'2017-03-09', b'cf2:name': b'xiebing'}
b'www.test5.com' {b'cf1:price': b'999', b'cf2:length': b'34', b'cf2:title': b'Hello Python', b'cf3:code': b'A43'}


# 查询表格数据

In [115]:
row = table.row('www.test4.com')
print(row)

{b'cf2:date': b'2017-03-09', b'cf2:name': b'xiebing'}


In [116]:
row = table.row('www.test2.com',columns=['cf1:price','cf1:rating'])
print(row)
print(row[b'cf1:price'])

{b'cf1:price': b'88', b'cf1:rating': b'99%'}
b'88'


In [117]:
row = table.row('www.test2.com',columns=['cf1:price','cf1:rating'],include_timestamp=True)
print(row)

{b'cf1:price': (b'88', 1511267787213), b'cf1:rating': (b'99%', 1511267787213)}


In [118]:
cells = table.cells(b'www.test2.com',column='cf1:price')
print(cells)

[b'88']


In [119]:
cells = table.cells(b'www.test2.com',column='cf1:price',versions=3)
print(cells)

[b'88']


# 删除表格数据

In [120]:
table.delete('www.test4.com')

In [121]:
table.delete('www.test2.com',columns=['cf1'])

In [122]:
table.delete('www.test2.com',columns=['cf1:name','cf1:price'])

# 连接池

In [123]:
pool = happybase.ConnectionPool(size=3,host= host,table_prefix='myproject')

In [124]:
with pool.connection() as conn:
    print(conn.tables())

[b'mytable']
