In [4]:
#HappyBase is a developer-friendly Python library to interact with Apache HBas

!pip install happybase

Collecting happybase
  Downloading happybase-1.2.0.tar.gz (40 kB)
[K     |████████████████████████████████| 40 kB 1.4 MB/s eta 0:00:01
Collecting thriftpy2>=0.4
  Downloading thriftpy2-0.4.14.tar.gz (361 kB)
[K     |████████████████████████████████| 361 kB 1.3 MB/s eta 0:00:01
[?25hCollecting ply<4.0,>=3.4
  Downloading ply-3.11-py2.py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 3.2 MB/s eta 0:00:01
[?25hBuilding wheels for collected packages: happybase, thriftpy2
  Building wheel for happybase (setup.py) ... [?25ldone
[?25h  Created wheel for happybase: filename=happybase-1.2.0-py2.py3-none-any.whl size=26606 sha256=dbf9270da69aff825c30582378cafb3ff6d287edbf5e82568a61614e3ed75007
  Stored in directory: /home/jovyan/.cache/pip/wheels/9e/00/b2/3a44a68da0a23b4f274c24f3bda10e84283d63fb8b8dfb3709
  Building wheel for thriftpy2 (setup.py) ... [?25ldone
[?25h  Created wheel for thriftpy2: filename=thriftpy2-0.4.14-cp39-cp39-linux_x86_64.whl size=507857 sha2

In [5]:
import happybase

#與hbase-master容器連線
connection = happybase.Connection('hbase-master')
print(connection.tables())

[b'my_table', b'people']


In [6]:
#建立一個table
connection.create_table(
    'my_table',
    {
        'cf1': dict(max_versions=10), #版本保留10個
        'cf2': dict(max_versions=1, block_cache_enabled=False), #不進行緩存
        'cf3': dict()  #使用預設值
    }
)

AlreadyExists: AlreadyExists(message=b'table name already in use')

In [8]:
print(connection.tables())

[b'my_table', b'people']


In [9]:
#建立連線
table = connection.table('my_table')

In [10]:
#若RowKey已經存在，就會變成更新資料

sensor1 = {'cf1:temperature': '36.5', 'cf1:humidity': '56', 'cf1:vibration': '265'}
sensor2 = {'cf1:temperature': '36.7', 'cf1:humidity': '59', 'cf1:vibration': '287'}
sensor3 = {'cf1:temperature': '36.8', 'cf1:humidity': '57', 'cf1:vibration': '289'}
sensor4 = {'cf2:voltage': '220', 'cf2:current': '6'}

#這樣會put四次
table.put(row='machine1', data=sensor1)
table.put(row='machine2', data=sensor2)
table.put(row='machine3', data=sensor3)
table.put(row='machine4', data=sensor4)

In [11]:
#查看
for key, value in table.scan():
    print(key, value)

b'machine1' {b'cf1:humidity': b'56', b'cf1:temperature': b'36.5', b'cf1:vibration': b'265'}
b'machine2' {b'cf1:humidity': b'59', b'cf1:temperature': b'36.7', b'cf1:vibration': b'287'}
b'machine3' {b'cf1:humidity': b'57', b'cf1:temperature': b'36.8', b'cf1:vibration': b'289'}
b'machine4' {b'cf2:current': b'6', b'cf2:voltage': b'220'}
b'machine6' {b'cf1:temperature': b'37', b'cf2:current': b'8', b'cf2:humidity': b'55'}
b'machine7' {b'cf2:humidity': b'45', b'cf2:voltage': b'110'}


In [12]:
#批次寫入
bat = table.batch()
bat.put('machine5', {'cf1:temperature': '37', 'cf2:humidity': '55', 'cf2:voltage': '220'})
bat.put('machine6', {'cf1:temperature': '37', 'cf2:humidity': '55', 'cf2:current': '8'})
bat.send()

In [13]:
#查看
for key, value in table.scan():
    print(key, value)

b'machine1' {b'cf1:humidity': b'56', b'cf1:temperature': b'36.5', b'cf1:vibration': b'265'}
b'machine2' {b'cf1:humidity': b'59', b'cf1:temperature': b'36.7', b'cf1:vibration': b'287'}
b'machine3' {b'cf1:humidity': b'57', b'cf1:temperature': b'36.8', b'cf1:vibration': b'289'}
b'machine4' {b'cf2:current': b'6', b'cf2:voltage': b'220'}
b'machine5' {b'cf1:temperature': b'37', b'cf2:humidity': b'55', b'cf2:voltage': b'220'}
b'machine6' {b'cf1:temperature': b'37', b'cf2:current': b'8', b'cf2:humidity': b'55'}
b'machine7' {b'cf2:humidity': b'45', b'cf2:voltage': b'110'}


In [14]:
#刪除
with table.batch() as bat:
    bat.put('machine7', {'cf1:temperature': '39', 'cf2:humidity': '45', 'cf2:voltage': '110'})
    bat.delete('machine5')

In [15]:
#查看
for key, value in table.scan():
    print(key, value)

b'machine1' {b'cf1:humidity': b'56', b'cf1:temperature': b'36.5', b'cf1:vibration': b'265'}
b'machine2' {b'cf1:humidity': b'59', b'cf1:temperature': b'36.7', b'cf1:vibration': b'287'}
b'machine3' {b'cf1:humidity': b'57', b'cf1:temperature': b'36.8', b'cf1:vibration': b'289'}
b'machine4' {b'cf2:current': b'6', b'cf2:voltage': b'220'}
b'machine6' {b'cf1:temperature': b'37', b'cf2:current': b'8', b'cf2:humidity': b'55'}
b'machine7' {b'cf1:temperature': b'39', b'cf2:humidity': b'45', b'cf2:voltage': b'110'}


In [16]:
#通過row_start引數來設定開始掃描的RowKey
for key, value in table.scan(row_start='machine3'):
    print(key, value)

b'machine3' {b'cf1:humidity': b'57', b'cf1:temperature': b'36.8', b'cf1:vibration': b'289'}
b'machine4' {b'cf2:current': b'6', b'cf2:voltage': b'220'}
b'machine6' {b'cf1:temperature': b'37', b'cf2:current': b'8', b'cf2:humidity': b'55'}
b'machine7' {b'cf1:temperature': b'39', b'cf2:humidity': b'45', b'cf2:voltage': b'110'}


In [17]:
#通過row_stop引數來設定結束掃描的RowKey
for key, value in table.scan(row_stop='machine3'):
    print(key, value)

b'machine1' {b'cf1:humidity': b'56', b'cf1:temperature': b'36.5', b'cf1:vibration': b'265'}
b'machine2' {b'cf1:humidity': b'59', b'cf1:temperature': b'36.7', b'cf1:vibration': b'287'}


In [18]:
#通過row_start和row_stop引數來設定開始和結束掃描的RowKey
for key, value in table.scan(row_start='machine1', row_stop='machine5'):
    print(key, value)

b'machine1' {b'cf1:humidity': b'56', b'cf1:temperature': b'36.5', b'cf1:vibration': b'265'}
b'machine2' {b'cf1:humidity': b'59', b'cf1:temperature': b'36.7', b'cf1:vibration': b'287'}
b'machine3' {b'cf1:humidity': b'57', b'cf1:temperature': b'36.8', b'cf1:vibration': b'289'}
b'machine4' {b'cf2:current': b'6', b'cf2:voltage': b'220'}


In [19]:
#查看資料
row = table.row('machine1')
res=row
res

{b'cf1:humidity': b'56', b'cf1:temperature': b'36.5', b'cf1:vibration': b'265'}

In [20]:
#抓出資料
byte_humidity=res[b'cf1:humidity']

#將byte轉為string
encoding = 'utf-8'
humidity=str(byte_humidity, encoding)
print(humidity)

56


In [21]:
#查看多筆資料
rows = dict(table.rows(['machine1', 'machine2']))
print(rows)

{b'machine1': {b'cf1:humidity': b'56', b'cf1:temperature': b'36.5', b'cf1:vibration': b'265'}, b'machine2': {b'cf1:humidity': b'59', b'cf1:temperature': b'36.7', b'cf1:vibration': b'287'}}


In [22]:
#讀取
for key, value in rows.items():
    print(str(value[b'cf1:humidity'], 'utf-8'))

56
59


In [23]:
#通過指定條件查找資料
row = table.row('machine6', columns=['cf1'])
print(row)

{b'cf1:temperature': b'37'}


In [24]:
#通過指定條件查找資料
row = table.row('machine6', columns=[b'cf1:temperature', b'cf2:current'])
print(row)

{b'cf1:temperature': b'37', b'cf2:current': b'8'}


In [25]:
#時間戳記
row = table.row(row='machine1', include_timestamp=True)
print(row)

#透過取得時間轉換
#https://www.cadch.com/article/timestamp/index.php   

{b'cf1:humidity': (b'56', 1627257686060), b'cf1:temperature': (b'36.5', 1627257686060), b'cf1:vibration': (b'265', 1627257686060)}


In [26]:
#多put幾次，讓 machine1 有多個版本
sensor1 = {'cf1:temperature': '38'}
table.put(row='machine1', data=sensor1)

sensor1 = {'cf1:temperature': '39'}
table.put(row='machine1', data=sensor1)

sensor1 = {'cf1:temperature': '40'}
table.put(row='machine1', data=sensor1)

sensor1 = {'cf1:temperature': '41'}
table.put(row='machine1', data=sensor1)

In [27]:
#檢視某一個cell所有版本
cells = table.cells(b'machine1', column='cf1:temperature', include_timestamp=True)
print("找出每個版本的值，但該值並未按照時間排序:")
print(cells)

print("經過時間排序後的值:")
print(sorted(cells, key=lambda tup: tup[1]))

找出每個版本的值，但該值並未按照時間排序:
[(b'41', 1627257694752), (b'40', 1627257694747), (b'39', 1627257694741), (b'38', 1627257694730), (b'36.5', 1627257686060)]
經過時間排序後的值:
[(b'36.5', 1627257686060), (b'38', 1627257694730), (b'39', 1627257694741), (b'40', 1627257694747), (b'41', 1627257694752)]


In [28]:
#通過引數來檢索前n個版本
cells = table.cells(b'machine1', column=b'cf1:temperature', versions=3)
print(cells)

[b'41', b'40', b'39']


In [29]:
#刪除RowKey為machine1的資料
table.delete('machine1')

In [30]:
#刪除一個cf資料
table.delete('machine2', columns=[b'cf1'])

In [31]:
#刪除一個cf多個資料
table.delete('machine7', columns=[b'cf1:temperature',b'cf2:current'])

In [32]:
#查看
for key, value in table.scan():
    print(key, value)

b'machine3' {b'cf1:humidity': b'57', b'cf1:temperature': b'36.8', b'cf1:vibration': b'289'}
b'machine4' {b'cf2:current': b'6', b'cf2:voltage': b'220'}
b'machine6' {b'cf1:temperature': b'37', b'cf2:current': b'8', b'cf2:humidity': b'55'}
b'machine7' {b'cf2:humidity': b'45', b'cf2:voltage': b'110'}


In [33]:
connection.close()