# This notebook demonstates how to ingest images into Kinetica

## Import libs which will be used for this demo

In [19]:
import os
import fnmatch
import sys
sys.path.insert(0, '/opt/gpudb/api/python/gpudb/')
sys.path.insert(0, '/opt/gpudb/api/python/gpudb/packages')

import cStringIO
import collections
from avro import schema, io
import base64
import gpudb
import random
import math
from faker import Factory
fake = Factory.create()
import time

gpudb_host = "127.0.0.1"
my_collection = "MASTER"
my_table = "caltech256"
h_db = gpudb.GPUdb(encoding = 'BINARY', host = gpudb_host, port = '9191')

## Drop table

In [20]:
h_db.clear_table( table_name = my_table, authorization = '', options = {} )

{'status_info': {u'data_type': u'clear_table_response_avro',
  u'message': u'',
  'response_time': 1.52914,
  u'status': u'OK'},
 u'table_name': u'caltech256'}

## Fetch enrichment data

In [13]:
x=[]
y=[]
cities=[]
countries=[]
toFetch=40000
fetched=0

while(toFetch>fetched):
    response = h_db.get_records(table_name='netflow',offset=fetched,limit=20000)
    res_decoded = gpudb.GPUdbRecord.decode_binary_data(response["type_schema"], response["records_binary"])
    
    fetched+=len(res_decoded)
    
    for index in range(19999):
        for k, v in res_decoded[index].items():
            if k == 'x':
                x.append(float(v))
            if k == 'y':
                y.append(float(v))
            if k == 'CITY':
                cities.append(v)
            if k == 'COUNTRY':
                countries.append(v)            
   

## Method which returns enrichment data

In [14]:
eCOUNT = 0
def getEnrichmentData():
    global eCOUNT
    eCOUNT = eCOUNT+1
    return x[eCOUNT], y[eCOUNT], cities[eCOUNT], countries[eCOUNT]


## Create table which will store images and other related fields

In [22]:
columns = []
columns.append(gpudb.GPUdbRecordColumn("id", gpudb.GPUdbRecordColumn._ColumnType.STRING, [gpudb.GPUdbColumnProperty.CHAR16, gpudb.GPUdbColumnProperty.SHARD_KEY]))
columns.append(gpudb.GPUdbRecordColumn("city", gpudb.GPUdbRecordColumn._ColumnType.STRING, [gpudb.GPUdbColumnProperty.CHAR64]))
columns.append(gpudb.GPUdbRecordColumn("country", gpudb.GPUdbRecordColumn._ColumnType.STRING, [gpudb.GPUdbColumnProperty.CHAR64]))
columns.append(gpudb.GPUdbRecordColumn("x", gpudb.GPUdbRecordColumn._ColumnType.FLOAT))
columns.append(gpudb.GPUdbRecordColumn("y", gpudb.GPUdbRecordColumn._ColumnType.FLOAT))
columns.append(gpudb.GPUdbRecordColumn("date", gpudb.GPUdbRecordColumn._ColumnType.STRING, [gpudb.GPUdbColumnProperty.DATE]))
columns.append(gpudb.GPUdbRecordColumn("timestamp", gpudb.GPUdbRecordColumn._ColumnType.LONG, [gpudb.GPUdbColumnProperty.TIMESTAMP]))
columns.append(gpudb.GPUdbRecordColumn("image", gpudb.GPUdbRecordColumn._ColumnType.BYTES))

# Create the type object
image_record_type = gpudb.GPUdbRecordType(columns, label="image_record_type")
print image_record_type

""" Create the type in the database and save the type ID, needed to create
    a table in the next step """
image_record_type.create_type(h_db)
image_type_id = image_record_type.type_id



<gpudb.GPUdbRecordType object at 0x7f405b952b50>


In [23]:
response = h_db.create_table(table_name=my_table, type_id=image_type_id,options = {"collection_name":my_collection})
print "Table created:  {}".format(response['status_info']['status'])


Table created:  OK


## Grab images from local file system and ingest into Kinetica

In [24]:
encoded_obj_list = []

eCOUNT = 0
for root, dirs, files in os.walk("/home/ec2-user/images/256_ObjectCategories"):
    for file in files:
        if file.endswith(".jpg"):
             with open(os.path.join(root, file), "rb") as imageFile:
                  str = imageFile.read()
                  lon, lat, city, country=getEnrichmentData()
                  encoded_obj_list.append(gpudb.GPUdbRecord(image_record_type, [file, city, country, lon, lat, fake.date(), int(time.time()), str]).binary_data)



In [25]:
response = h_db.insert_records(table_name=my_table, data=encoded_obj_list, list_encoding="binary", options={})
print response

{u'count_updated': 0, u'record_ids': [], 'status_info': {u'status': u'OK', u'data_type': u'insert_records_response_avro', u'message': u'', 'response_time': 6.34109}, u'count_inserted': 30607}
