In [1]:
import time
from datetime import datetime as dt

from google.cloud import bigtable
from google.cloud.bigtable import column_family, row_filters
from google.cloud.bigtable.row_set import RowSet
import google.cloud.bigtable.row_filters as row_filters
import pandas as pd



In [4]:
INSTANCE_ID = 'bigtable-1'
TABLE_ID = 'plant1generation_{}'.format(time.time())

In [5]:
TABLE_ID

'plant1generation_1649629196.0204175'

### Utils functions

In [30]:
def print_row(row):
    print("Reading data for {}:".format(row.row_key.decode('utf-8')))
    for cf, cols in sorted(row.cells.items()):
        print("Column Family {}".format(cf))
        for col, cells in sorted(cols.items()):
            for cell in cells:
                labels = " [{}]".format(",".join(cell.labels)) \
                    if len(cell.labels) else ""
                print(
                    "\t{}: {} @{}{}".format(col.decode('utf-8'),
                                            cell.value.decode('utf-8'),
                                            cell.timestamp, labels))
    print("")

### Connecting to BigTable

In [7]:
client = bigtable.Client.from_service_account_json('./json/unbosque-service-account.json', admin = True)

In [8]:
print('Connecting to database instance')
instance = client.instance(INSTANCE_ID)

Connecting to database instance


### Creating a table

In [9]:
print('Creating the {} table'.format(TABLE_ID))
table = instance.table(TABLE_ID)

if not table.exists():
    table.create()
else:
    print("ERROR: Table {} already exists".format(TABLE_ID))

Creating the plant1generation_1649629196.0204175 table


In [10]:
print('Creating all family columns')

plant1ID = 'Power_Generation'
plant1_cf = table.column_family(plant1ID)
plant1_cf.create()


Creating all family columns


### Inserting data

In [26]:

p1_generation = pd.read_csv("../data/Plant_1_Generation_Data.csv")
p1_json = p1_generation.to_dict(orient='records')

In [40]:
dt = dt.utcnow()
rows = []

print('Writing orders to the table')
for power in p1_json:
    
    row_key = 'plant1#{}#{}'.format(power['DATE_TIME'], power['SOURCE_KEY']).encode()
    row = table.direct_row(row_key)
    
    row.set_cell(plant1ID, 'yield'.encode(), str(power['TOTAL_YIELD']), timestamp = dt)
  
      
    rows.append(row)

table.mutate_rows(rows)

Writing orders to the table


[,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 ,
 

### Reading data

In [63]:
def read_row_range():

    row_key = "plant1#15-05-2020 14:15#adLQvlD726eNBSB"
    
    rows = table.read_rows(row_key, filter_=row_filters.RowKeyRegexFilter(".*#adLQvlD726eNBSB$".encode()), limit=1)
    for row in rows:
        print_row(row)

read_row_range()


Reading data for plant1#15-05-2020 14:15#adLQvlD726eNBSB:
Column Family Power_Generation
	yield: 6275988.143 @2022-04-10 23:43:05.092000+00:00

