# Load images into table

This demonstrates different ways to load images into a database table.

We use the script called <em>madlib_image_loader.py</em> located at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts/Deep-learning which uses the Python Imaging Library so supports multiple formats http://www.pythonware.com/products/pil/

## Table of contents

<a href="#setup">1. Setup image loader</a>

<a href="#fetch_numpy">2. Fetch images then load NumPy array into table</a>

<a href="#file_system">3. Load from file system into table</a>

In [1]:
import sys
import h5py
import numpy as np
import psycopg2
import glob

In [2]:
sys.path.append('/home/gpadmin/.local/lib/python3.5/site-packages/')

In [4]:
%load_ext sql

In [5]:
# Greenplum Database 5.x on GCP for deep learning (PM demo machine)
#%sql postgresql://gpadmin@35.239.240.26:5432/madlib
        
# PostgreSQL local
%sql postgresql://gpadmin@localhost:5432/cerebro

'Connected: gpadmin@cerebro'

In [6]:
%sql select madlib.version();
#%sql select version();

 * postgresql://gpadmin@localhost:5432/cerebro
1 rows affected.


version
"MADlib version: 1.17.0, git revision: rel/v1.17.0, cmake configuration time: Fri Apr 17 06:35:28 UTC 2020, build type: RelWithDebInfo, build system: Linux-4.4.0-174-generic, C compiler: gcc 5.4.0, C++ compiler: g++ 5.4.0"


<a id="setup"></a>
# 1. Set up image loader

We use the script called <em>madlib_image_loader.py</em> located at https://github.com/apache/madlib-site/tree/asf-site/community-artifacts/Deep-learning

In [3]:
import sys
import os
from keras.datasets import cifar10

# madlib_site_dir = '/Users/fmcquillan/Documents/Product/MADlib/Demos/data'
# sys.path.append(madlib_site_dir)

# Import image loader module
from madlib_image_loader import ImageLoader, DbCredentials

# Specify database credentials, for connecting to db
#db_creds = DbCredentials(user='gpadmin',
#                         host='35.239.240.26',
#                         port='5432',
#                         password='')

# Specify database credentials, for connecting to db
db_creds = DbCredentials(db_name='cerebro',
    user='gpadmin',
                          host='localhost',
                          port='5432',
                          password='')
# connection = psycopg2.connect(user = db_creds.user,
#                                   password = db_creds.password,
#                                   host = db_creds.host,
#                                   port = db_creds.port,
#                                   database = db_creds.db_name)
# connection.autocommit = True
# cursor = connection.cursor()
# Initialize ImageLoader (increase num_workers to run faster)


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


<a id="fetch_numpy"></a>
# 2. Fetch images then load NumPy array into table

<em>iloader.load_dataset_from_np(data_x, data_y, table_name, append=False)</em>

- <em>data_x</em> contains image data in np.array format


- <em>data_y</em> is a 1D np.array of the image categories (labels).


- If the user passes a <em>table_name</em> while creating ImageLoader object, it will be used for all further calls to load_dataset_from_np.  It can be changed by passing it as a parameter during the actual call to load_dataset_from_np, and if so future calls will load to that table name instead.  This avoids needing to pass the table_name again every time, but also allows it to be changed at any time.

In [11]:
class ImageNetLoader(object):
    def __init__(self, db_creds, num_workers=1):
        self.connection = psycopg2.connect(user=db_creds.user,
                                           password=db_creds.password,
                                           host=db_creds.host,
                                           port=db_creds.port,
                                           database=db_creds.db_name)
        self.connection.autocommit = True
        self.cursor = self.connection.cursor()
        self.iloader = ImageLoader(num_workers=num_workers, db_creds=db_creds)
    def drop_table(self, name):
        self.cursor.execute("DROP TABLE IF EXISTS {}".format(name)) 
    def load_one(self, file_path, name, force=False):
        print ("Loading {}".format(file_path))
        exists = self.if_exists_table(name)
        if exists and not force:
            raise Exception("Table {} already exists!".format(name))
        h5f = h5py.File(file_path, 'r')
        np_images = np.asarray(h5f.get("images"))
        np_labels = np.eye(1000)[np.asarray(h5f.get("labels")).astype(int)]
        
        self.iloader.load_dataset_from_np(np_images, np_labels, name, append=exists)
    def load_many(self, file_list, name, force=False):
        exists = self.if_exists_table(name)
        if exists and not force:
            raise Exception("Table {} already exists!".format(name))
        for file_path in file_list:
            self.load_one(file_path, name, True)
    def if_exists_table(self, name):
        res = None
        try:
            self.cursor.execute("SELECT '{}'::regclass".format(name))
            res = self.cursor.fetchone()
        except Exception:
            pass
        return res is not None

In [12]:
imagenet_loader = ImageNetLoader(db_creds, 16)

In [7]:
name = 'imagenet_train_data'
file_path = '/mnt/imagenet/train/train_0.h5'

imagenet_loader.load_one(file_path, name, True)

In [None]:
train_root = '/mnt/imagenet/train'
valid_root = '/mnt/imagenet/valid'
name_list = ['imagenet_train_data', 'imagenet_valid_data']
file_list_list = [glob.glob(os.path.join(train_root, '*.h5')), glob.glob(os.path.join(valid_root, '*.h5'))]
for name, file_list in zip(name_list, file_list_list):
    imagenet_loader.drop_table(name)
    imagenet_loader.load_many(file_list, name)

Loading /mnt/imagenet/train/train_4.h5
MainProcess: Connected to cerebro db.
Executing: CREATE TABLE imagenet_train_data (id SERIAL, x REAL[], y TEXT)
CREATE TABLE
Created table imagenet_train_data in cerebro db
Spawning 16 workers...
Initializing ForkPoolWorker-17 [pid 178451]
ForkPoolWorker-17: Created temporary directory /mnt/madlib_PUkD3nYj62
Initializing ForkPoolWorker-18 [pid 178454]
ForkPoolWorker-17: Connected to cerebro db.
ForkPoolWorker-18: Created temporary directory /mnt/madlib_pE7sdzFdve
Initializing ForkPoolWorker-19 [pid 178463]
ForkPoolWorker-18: Connected to cerebro db.
ForkPoolWorker-19: Created temporary directory /mnt/madlib_lQSCFivXoo
Initializing ForkPoolWorker-20 [pid 178472]
ForkPoolWorker-19: Connected to cerebro db.
ForkPoolWorker-20: Created temporary directory /mnt/madlib_W3QjN9qhnR
Initializing ForkPoolWorker-21 [pid 178481]
ForkPoolWorker-20: Connected to cerebro db.
ForkPoolWorker-21: Created temporary directory /mnt/madlib_OdbRDSQWMq
Initializing ForkPo

ForkPoolWorker-17: Wrote 1000 images to /mnt/madlib_PUkD3nYj62/imagenet_train_data0002.tmp
ForkPoolWorker-25: Wrote 1000 images to /mnt/madlib_zrwmZZR1vQ/imagenet_train_data0002.tmp
ForkPoolWorker-23: Wrote 1000 images to /mnt/madlib_jfx4zLWs5q/imagenet_train_data0002.tmp
ForkPoolWorker-26: Wrote 1000 images to /mnt/madlib_C6kBFIjwi7/imagenet_train_data0002.tmp
ForkPoolWorker-21: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-22: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-19: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-20: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-18: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-25: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-17: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-27: Wrote 1000 images to /mnt/madlib_ytZ6ISfoef/imagenet_train_data0002.tmp
ForkPoolWorker-24: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-23: Loaded 1000 images int

ForkPoolWorker-28: Wrote 1000 images to /mnt/madlib_U6YouJBOB8/imagenet_train_data0005.tmp
ForkPoolWorker-23: Wrote 1000 images to /mnt/madlib_jfx4zLWs5q/imagenet_train_data0005.tmp
ForkPoolWorker-31: Wrote 1000 images to /mnt/madlib_8XTYJx5TOR/imagenet_train_data0005.tmp
ForkPoolWorker-32: Wrote 1000 images to /mnt/madlib_iTVeXpwZP4/imagenet_train_data0005.tmp
ForkPoolWorker-29: Wrote 1000 images to /mnt/madlib_GSMgtVNZMS/imagenet_train_data0005.tmp
ForkPoolWorker-24: Wrote 1000 images to /mnt/madlib_zLVVaBtU9P/imagenet_train_data0005.tmp
ForkPoolWorker-30: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-28: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-26: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-23: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-31: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-27: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-32: Loaded 1000 images into imagenet_train_data
ForkPoolWorke

ForkPoolWorker-31: Wrote 1000 images to /mnt/madlib_8XTYJx5TOR/imagenet_train_data0008.tmp
ForkPoolWorker-21: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-32: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-29: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-31: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-24: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-19: Wrote 1000 images to /mnt/madlib_lQSCFivXoo/imagenet_train_data0009.tmp
ForkPoolWorker-19: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-22: Wrote 1000 images to /mnt/madlib_eNQw3YZqGw/imagenet_train_data0009.tmp
ForkPoolWorker-22: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-25: Wrote 1000 images to /mnt/madlib_zrwmZZR1vQ/imagenet_train_data0009.tmp
ForkPoolWorker-23: Wrote 1000 images to /mnt/madlib_jfx4zLWs5q/imagenet_train_data0009.tmp
ForkPoolWorker-18: Wrote 1000 images to /mnt/madlib_pE7sdzFdve/imagenet_train_data0009.tmp
ForkPoolWorke

ForkPoolWorker-44: Wrote 1000 images to /mnt/madlib_5PKRtSYerw/imagenet_train_data0000.tmp
ForkPoolWorker-45: Wrote 1000 images to /mnt/madlib_H1frlbO6IP/imagenet_train_data0000.tmp
ForkPoolWorker-41: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-46: Wrote 1000 images to /mnt/madlib_UYfmX24MIi/imagenet_train_data0000.tmp
ForkPoolWorker-42: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-47: Wrote 1000 images to /mnt/madlib_ZqNnQJDwgZ/imagenet_train_data0000.tmp
ForkPoolWorker-48: Wrote 1000 images to /mnt/madlib_VGM3iKJHvv/imagenet_train_data0000.tmp
ForkPoolWorker-43: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-46: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-47: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-48: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-44: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-45: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-36: Wrote 1000 images to /

ForkPoolWorker-45: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-47: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-33: Wrote 1000 images to /mnt/madlib_FdsYA7P89o/imagenet_train_data0004.tmp
ForkPoolWorker-35: Wrote 1000 images to /mnt/madlib_d8pFzYa68M/imagenet_train_data0004.tmp
ForkPoolWorker-33: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-35: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-34: Wrote 1000 images to /mnt/madlib_iaArod6Yzm/imagenet_train_data0004.tmp
ForkPoolWorker-42: Wrote 1000 images to /mnt/madlib_Doayl9I8uB/imagenet_train_data0004.tmp
ForkPoolWorker-36: Wrote 1000 images to /mnt/madlib_1kLxKcyHys/imagenet_train_data0004.tmp
ForkPoolWorker-41: Wrote 1000 images to /mnt/madlib_j0lQ983IWV/imagenet_train_data0004.tmp
ForkPoolWorker-39: Wrote 1000 images to /mnt/madlib_0oprVpHeCr/imagenet_train_data0004.tmp
ForkPoolWorker-38: Wrote 1000 images to /mnt/madlib_v8TyXpGgfX/imagenet_train_data0004.tmp
ForkPoolWorker-37: W

ForkPoolWorker-38: Wrote 1000 images to /mnt/madlib_v8TyXpGgfX/imagenet_train_data0007.tmp
ForkPoolWorker-37: Wrote 1000 images to /mnt/madlib_IYUrowi3D8/imagenet_train_data0007.tmp
ForkPoolWorker-40: Wrote 1000 images to /mnt/madlib_tfmLlrFPz9/imagenet_train_data0007.tmp
ForkPoolWorker-43: Wrote 1000 images to /mnt/madlib_hulZmuYZrg/imagenet_train_data0007.tmp
ForkPoolWorker-48: Wrote 1000 images to /mnt/madlib_VGM3iKJHvv/imagenet_train_data0007.tmp
ForkPoolWorker-44: Wrote 1000 images to /mnt/madlib_5PKRtSYerw/imagenet_train_data0007.tmp
ForkPoolWorker-46: Wrote 1000 images to /mnt/madlib_UYfmX24MIi/imagenet_train_data0007.tmp
ForkPoolWorker-47: Wrote 1000 images to /mnt/madlib_ZqNnQJDwgZ/imagenet_train_data0007.tmp
ForkPoolWorker-45: Wrote 1000 images to /mnt/madlib_H1frlbO6IP/imagenet_train_data0007.tmp
ForkPoolWorker-40: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-33: Wrote 1000 images to /mnt/madlib_FdsYA7P89o/imagenet_train_data0008.tmp
ForkPoolWorker-38: Loaded 1

ForkPoolWorker-49: Connected to cerebro db.
Initializing ForkPoolWorker-50 [pid 180177]
ForkPoolWorker-50: Created temporary directory /mnt/madlib_w6qQxEoyUw
Initializing ForkPoolWorker-51 [pid 180186]
ForkPoolWorker-50: Connected to cerebro db.
ForkPoolWorker-51: Created temporary directory /mnt/madlib_0TbkBf4zCe
Initializing ForkPoolWorker-52 [pid 180195]
ForkPoolWorker-51: Connected to cerebro db.
ForkPoolWorker-52: Created temporary directory /mnt/madlib_ZcsSwRiwP5
Initializing ForkPoolWorker-53 [pid 180204]
ForkPoolWorker-52: Connected to cerebro db.
ForkPoolWorker-53: Created temporary directory /mnt/madlib_qm2BFhWdQr
Initializing ForkPoolWorker-54 [pid 180213]
ForkPoolWorker-53: Connected to cerebro db.
ForkPoolWorker-54: Created temporary directory /mnt/madlib_5J9e378cMy
Initializing ForkPoolWorker-55 [pid 180222]
ForkPoolWorker-54: Connected to cerebro db.
ForkPoolWorker-55: Created temporary directory /mnt/madlib_s6qiF2Ndqo
Initializing ForkPoolWorker-56 [pid 180231]
ForkPool

ForkPoolWorker-49: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-52: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-55: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-56: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-54: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-57: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-53: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-50: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-61: Wrote 1000 images to /mnt/madlib_CEQXTT87uL/imagenet_train_data0002.tmp
ForkPoolWorker-59: Wrote 1000 images to /mnt/madlib_eZnS7SwMOD/imagenet_train_data0002.tmp
ForkPoolWorker-62: Wrote 1000 images to /mnt/madlib_2ILYRw8Gp4/imagenet_train_data0002.tmp
ForkPoolWorker-63: Wrote 1000 images to /mnt/madlib_vOib9yz0ro/imagenet_train_data0002.tmp
ForkPoolWorker-58: Wrote 1000 images to /mnt/madlib_xdlRd1jS7K/imagenet_train_data0002.tmp
ForkPoolWorker-60: Wrote 1000 images to /

ForkPoolWorker-59: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-53: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-63: Wrote 1000 images to /mnt/madlib_vOib9yz0ro/imagenet_train_data0005.tmp
ForkPoolWorker-61: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-62: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-60: Wrote 1000 images to /mnt/madlib_i8DYF4dxv4/imagenet_train_data0005.tmp
ForkPoolWorker-50: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-64: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-60: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-58: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-63: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-51: Wrote 1000 images to /mnt/madlib_0TbkBf4zCe/imagenet_train_data0006.tmp
ForkPoolWorker-56: Wrote 1000 images to /mnt/madlib_LD0CORPwJ2/imagenet_train_data0006.tmp
ForkPoolWorker-51: Loaded 1000 images into imagenet_train_data
ForkPo

ForkPoolWorker-51: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-56: Wrote 1000 images to /mnt/madlib_LD0CORPwJ2/imagenet_train_data0009.tmp
ForkPoolWorker-56: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-62: Wrote 1000 images to /mnt/madlib_2ILYRw8Gp4/imagenet_train_data0009.tmp
ForkPoolWorker-55: Wrote 1000 images to /mnt/madlib_s6qiF2Ndqo/imagenet_train_data0009.tmp
ForkPoolWorker-61: Wrote 1000 images to /mnt/madlib_CEQXTT87uL/imagenet_train_data0009.tmp
ForkPoolWorker-59: Wrote 1000 images to /mnt/madlib_eZnS7SwMOD/imagenet_train_data0009.tmp
ForkPoolWorker-62: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-55: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-61: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-59: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-59: Wrote 146 images to /mnt/madlib_eZnS7SwMOD/imagenet_train_data0010.tmp
ForkPoolWorker-59: Loaded 146 images into imagenet_train_data
ForkPoolWorker-

ForkPoolWorker-78: Wrote 1000 images to /mnt/madlib_wzey9pHnTr/imagenet_train_data0000.tmp
ForkPoolWorker-79: Wrote 1000 images to /mnt/madlib_976VgTiChB/imagenet_train_data0000.tmp
ForkPoolWorker-80: Wrote 1000 images to /mnt/madlib_CHrTYWtNqf/imagenet_train_data0000.tmp
ForkPoolWorker-75: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-80: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-78: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-79: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-77: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-76: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-70: Wrote 1000 images to /mnt/madlib_NSXRMLYd7L/imagenet_train_data0001.tmp
ForkPoolWorker-68: Wrote 1000 images to /mnt/madlib_3VrGcz8DBM/imagenet_train_data0001.tmp
ForkPoolWorker-72: Wrote 1000 images to /mnt/madlib_Ht2X9s3qff/imagenet_train_data0001.tmp
ForkPoolWorker-65: Wrote 1000 images to /mnt/madlib_c30z5t2mEF/imagenet_trai

ForkPoolWorker-71: Wrote 1000 images to /mnt/madlib_fhd5i1giED/imagenet_train_data0004.tmp
ForkPoolWorker-74: Wrote 1000 images to /mnt/madlib_Lxgbpmq24N/imagenet_train_data0004.tmp
ForkPoolWorker-80: Wrote 1000 images to /mnt/madlib_CHrTYWtNqf/imagenet_train_data0004.tmp
ForkPoolWorker-66: Wrote 1000 images to /mnt/madlib_OT1BDrYmnc/imagenet_train_data0004.tmp
ForkPoolWorker-71: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-74: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-65: Wrote 1000 images to /mnt/madlib_c30z5t2mEF/imagenet_train_data0004.tmp
ForkPoolWorker-80: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-72: Wrote 1000 images to /mnt/madlib_Ht2X9s3qff/imagenet_train_data0004.tmp
ForkPoolWorker-73: Wrote 1000 images to /mnt/madlib_urvbUahyG8/imagenet_train_data0004.tmp
ForkPoolWorker-66: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-67: Wrote 1000 images to /mnt/madlib_kKI2dQWzCb/imagenet_train_data0004.tmp
ForkPoolWorker-65: L

ForkPoolWorker-73: Wrote 1000 images to /mnt/madlib_urvbUahyG8/imagenet_train_data0007.tmp
ForkPoolWorker-77: Wrote 1000 images to /mnt/madlib_wBaRI0xhTD/imagenet_train_data0007.tmp
ForkPoolWorker-73: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-77: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-75: Wrote 1000 images to /mnt/madlib_Ev8YKAJmSS/imagenet_train_data0007.tmp
ForkPoolWorker-69: Wrote 1000 images to /mnt/madlib_9IRALsYgNI/imagenet_train_data0007.tmp
ForkPoolWorker-78: Wrote 1000 images to /mnt/madlib_wzey9pHnTr/imagenet_train_data0007.tmp
ForkPoolWorker-76: Wrote 1000 images to /mnt/madlib_A0zKXRUTiW/imagenet_train_data0007.tmp
ForkPoolWorker-67: Wrote 1000 images to /mnt/madlib_kKI2dQWzCb/imagenet_train_data0007.tmp
ForkPoolWorker-79: Wrote 1000 images to /mnt/madlib_976VgTiChB/imagenet_train_data0007.tmp
ForkPoolWorker-68: Wrote 1000 images to /mnt/madlib_3VrGcz8DBM/imagenet_train_data0007.tmp
ForkPoolWorker-70: Wrote 1000 images to /mnt/madlib_NSX

ForkPoolWorker-83: Created temporary directory /mnt/madlib_nupaUvEEMY
Initializing ForkPoolWorker-84 [pid 181907]
ForkPoolWorker-83: Connected to cerebro db.
ForkPoolWorker-84: Created temporary directory /mnt/madlib_R2ZCxkGYiM
Initializing ForkPoolWorker-85 [pid 181916]
ForkPoolWorker-84: Connected to cerebro db.
ForkPoolWorker-85: Created temporary directory /mnt/madlib_hKMANrCUF5
Initializing ForkPoolWorker-86 [pid 181925]
ForkPoolWorker-85: Connected to cerebro db.
ForkPoolWorker-86: Created temporary directory /mnt/madlib_kN0H9lZzfc
Initializing ForkPoolWorker-87 [pid 181934]
ForkPoolWorker-86: Connected to cerebro db.
ForkPoolWorker-87: Created temporary directory /mnt/madlib_p6lwLDxx90
Initializing ForkPoolWorker-88 [pid 181943]
ForkPoolWorker-87: Connected to cerebro db.
ForkPoolWorker-88: Created temporary directory /mnt/madlib_NivCLxpP4W
Initializing ForkPoolWorker-89 [pid 181952]
ForkPoolWorker-89: Created temporary directory /mnt/madlib_Gm676gArcC
Initializing ForkPoolWorke

In [34]:
train_list = 

('imagenet_train_data',)

In [None]:
SELECT 'schema_name.table_name'::regclass


In [52]:
if_exists_table(cursor, 'tal')

False

In [None]:
def init_table()

In [30]:
load_one(file_path, name)

Executing: CREATE TABLE imagenet_train_data (id SERIAL, x REAL[], y TEXT)
CREATE TABLE
Created table imagenet_train_data in cerebro db
Spawning 16 workers...
Initializing ForkPoolWorker-1 [pid 175721]
ForkPoolWorker-1: Created temporary directory /mnt/madlib_1N7IcM8OpO
ForkPoolWorker-1: Connected to cerebro db.
Initializing ForkPoolWorker-2 [pid 175724]
ForkPoolWorker-2: Created temporary directory /mnt/madlib_9IyBGdIay4
Initializing ForkPoolWorker-3 [pid 175733]
ForkPoolWorker-2: Connected to cerebro db.
ForkPoolWorker-3: Created temporary directory /mnt/madlib_OlVzBf3iW6
Initializing ForkPoolWorker-4 [pid 175742]
ForkPoolWorker-3: Connected to cerebro db.
ForkPoolWorker-4: Created temporary directory /mnt/madlib_PMTaXqdso3
Initializing ForkPoolWorker-5 [pid 175751]
ForkPoolWorker-4: Connected to cerebro db.
ForkPoolWorker-5: Created temporary directory /mnt/madlib_Ylkz0bYo4K
Initializing ForkPoolWorker-6 [pid 175760]
ForkPoolWorker-5: Connected to cerebro db.
ForkPoolWorker-6: Create

ForkPoolWorker-2: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-1: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-3: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-5: Wrote 1000 images to /mnt/madlib_Ylkz0bYo4K/imagenet_train_data0002.tmp
ForkPoolWorker-6: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-8: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-7: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-9: Wrote 1000 images to /mnt/madlib_LQhu8diW0x/imagenet_train_data0002.tmp
ForkPoolWorker-5: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-10: Wrote 1000 images to /mnt/madlib_ECsPZL00HW/imagenet_train_data0002.tmp
ForkPoolWorker-9: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-13: Wrote 1000 images to /mnt/madlib_UNywEhg8MV/imagenet_train_data0002.tmp
ForkPoolWorker-12: Wrote 1000 images to /mnt/madlib_I5ev9PSdBP/imagenet_train_data0002.tmp
ForkPoolWorker-11: Wrote 1000 images to /mnt/madlib

ForkPoolWorker-9: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-14: Wrote 1000 images to /mnt/madlib_EKSqYlPo9z/imagenet_train_data0005.tmp
ForkPoolWorker-12: Wrote 1000 images to /mnt/madlib_I5ev9PSdBP/imagenet_train_data0005.tmp
ForkPoolWorker-16: Wrote 1000 images to /mnt/madlib_Bsku7rwohG/imagenet_train_data0005.tmp
ForkPoolWorker-11: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-13: Wrote 1000 images to /mnt/madlib_UNywEhg8MV/imagenet_train_data0005.tmp
ForkPoolWorker-14: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-12: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-16: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-13: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-15: Wrote 1000 images to /mnt/madlib_8wZRr1UYpf/imagenet_train_data0005.tmp
ForkPoolWorker-15: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-4: Wrote 1000 images to /mnt/madlib_PMTaXqdso3/imagenet_train_data0006.tmp
ForkPoolWorker-

ForkPoolWorker-4: Wrote 1000 images to /mnt/madlib_PMTaXqdso3/imagenet_train_data0009.tmp
ForkPoolWorker-1: Wrote 1000 images to /mnt/madlib_1N7IcM8OpO/imagenet_train_data0009.tmp
ForkPoolWorker-2: Wrote 1000 images to /mnt/madlib_9IyBGdIay4/imagenet_train_data0009.tmp
ForkPoolWorker-4: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-1: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-8: Wrote 1000 images to /mnt/madlib_X4jqceiKoH/imagenet_train_data0009.tmp
ForkPoolWorker-2: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-3: Wrote 1000 images to /mnt/madlib_OlVzBf3iW6/imagenet_train_data0009.tmp
ForkPoolWorker-7: Wrote 1000 images to /mnt/madlib_hMMK4xxgWY/imagenet_train_data0009.tmp
ForkPoolWorker-8: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-3: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-7: Loaded 1000 images into imagenet_train_data
ForkPoolWorker-7: Wrote 146 images to /mnt/madlib_hMMK4xxgWY/imagenet_train_data0010.tmp

In [19]:
h5f = h5py.File(file_path, 'r')

np_images = np.asarray(h5f.get("images"))

np_labels = np.eye(1000)[np.asarray(h5f.get("labels")).astype(int)]

%sql DROP TABLE IF EXISTS imagenet_train_data;

# Save images to temporary directories and load into database
iloader.load_dataset_from_np(np_images, np_labels, 'imagenet_train_data', append=False)


In [8]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
%sql DROP TABLE IF EXISTS cifar_10_train_data, cifar_10_test_data;

# Save images to temporary directories and load into database
iloader.load_dataset_from_np(x_train, y_train, 'cifar_10_train_data', append=False)
iloader.load_dataset_from_np(x_test, y_test, 'cifar_10_test_data', append=False)

 * postgresql://gpadmin@localhost:5432/cerebro
Done.
MainProcess: Connected to cerebro db.
Executing: CREATE TABLE cifar_10_train_data (id SERIAL, x REAL[], y TEXT)
CREATE TABLE
Created table cifar_10_train_data in cerebro db
Spawning 5 workers...
Initializing ForkPoolWorker-1 [pid 120269]
ForkPoolWorker-1: Created temporary directory /tmp/madlib_UtkeUeMcVk
ForkPoolWorker-1: Connected to cerebro db.
Initializing ForkPoolWorker-2 [pid 120272]
ForkPoolWorker-2: Created temporary directory /tmp/madlib_uYyMLvKdBQ
Initializing ForkPoolWorker-3 [pid 120279]
ForkPoolWorker-3: Created temporary directory /tmp/madlib_Tlt9UX62iO
ForkPoolWorker-2: Connected to cerebro db.
Initializing ForkPoolWorker-4 [pid 120287]
ForkPoolWorker-4: Created temporary directory /tmp/madlib_DeZY5yzuYF
ForkPoolWorker-3: Connected to cerebro db.
Initializing ForkPoolWorker-5 [pid 120296]
ForkPoolWorker-5: Connected to cerebro db.
ForkPoolWorker-5: Created temporary directory /tmp/madlib_vnvXp1dXE0
ForkPoolWorker-4: Co

ForkPoolWorker-2: Wrote 1000 images to /tmp/madlib_uYyMLvKdBQ/cifar_10_train_data0010.tmp
ForkPoolWorker-1: Loaded 1000 images into cifar_10_train_data
ForkPoolWorker-2: Loaded 1000 images into cifar_10_train_data
ForkPoolWorker-1: Wrote 1000 images to /tmp/madlib_UtkeUeMcVk/cifar_10_train_data0011.tmp
ForkPoolWorker-1: Loaded 1000 images into cifar_10_train_data
ForkPoolWorker-3: Removed temporary directory /tmp/madlib_Tlt9UX62iO
ForkPoolWorker-5: Removed temporary directory /tmp/madlib_vnvXp1dXE0
ForkPoolWorker-4: Removed temporary directory /tmp/madlib_DeZY5yzuYF
ForkPoolWorker-2: Removed temporary directory /tmp/madlib_uYyMLvKdBQ
ForkPoolWorker-1: Removed temporary directory /tmp/madlib_UtkeUeMcVk
Done!  Loaded 50000 images in 33.17561364173889s
5 workers terminated.
MainProcess: Connected to cerebro db.
Executing: CREATE TABLE cifar_10_test_data (id SERIAL, x REAL[], y TEXT)
CREATE TABLE
Created table cifar_10_test_data in cerebro db
Spawning 5 workers...
Initializing ForkPoolWork

In [9]:
%%sql
SELECT COUNT(*) FROM cifar_10_train_data;

 * postgresql://gpadmin@localhost:5432/cerebro
1 rows affected.


count
50000


In [13]:
%%sql
SELECT COUNT(*) FROM cifar_10_test_data;

1 rows affected.


count
10000


<a id="file_system"></a>
# 3. Load from file system

Uses the Python Imaging Library so supports multiple formats
http://www.pythonware.com/products/pil/

<em>load_dataset_from_disk(root_dir, table_name, num_labels='all', append=False)</em>

- Calling this function  will look in <em>root_dir</em> on the local disk of wherever this is being run.  It will skip over any files in that directory, but will load images contained in each of its subdirectories.  The images should be organized by category/class, where the name of each subdirectory is the label for the images contained within it.


- The <em>table_name</em> and <em>append</em> parameters are the same as above  The parameter <em>num_labels</em> is an optional parameter which can be used to restrict the number of labels (image classes) loaded, even if more are found in <em>root_dir</em>.  For example, for a large dataset you may have hundreds of labels, but only wish to use a subset of that containing a few dozen.

For example, if we put the CIFAR-10 training data is in 10 subdirectories under directory <em>cifar10</em>, with one subdirectory for each class:

In [14]:
%sql drop table if exists cifar_10_train_data_filesystem;
# Load images from file system
iloader.load_dataset_from_disk('/Users/fmcquillan/tmp/cifar10', 'cifar_10_train_data_filesystem', num_labels='all', append=False)

Done.
MainProcess: Connected to madlib db.
Executing: CREATE TABLE cifar_10_train_data_filesystem (id SERIAL, x REAL[], y TEXT,                        img_name TEXT)
CREATE TABLE
Created table cifar_10_train_data_filesystem in madlib db
.DS_Store is not a directory, skipping
number of labels = 10
Found 10 image labels in /Users/fmcquillan/tmp/cifar10
Spawning 5 workers...
Initializing PoolWorker-11 [pid 82438]
PoolWorker-11: Created temporary directory /tmp/madlib_aEC1lF2HqL
Initializing PoolWorker-12 [pid 82439]
PoolWorker-12: Created temporary directory /tmp/madlib_70qpwFzzqW
Initializing PoolWorker-13 [pid 82440]
PoolWorker-13: Created temporary directory /tmp/madlib_r2u4Zo5bPt
PoolWorker-11: Connected to madlib db.
Initializing PoolWorker-14 [pid 82441]
PoolWorker-12: Connected to madlib db.
PoolWorker-14: Created temporary directory /tmp/madlib_aTPESoNjVi
Initializing PoolWorker-15 [pid 82443]
PoolWorker-13: Connected to madlib db.
PoolWorker-15: Created temporary directory /tmp/m

PoolWorker-11: Wrote 1000 images to /tmp/madlib_aEC1lF2HqL/cifar_10_train_data_filesystem0008.tmp
PoolWorker-14: Loaded 1000 images into cifar_10_train_data_filesystem
PoolWorker-15: Loaded 1000 images into cifar_10_train_data_filesystem
PoolWorker-12: Loaded 1000 images into cifar_10_train_data_filesystem
PoolWorker-13: Loaded 1000 images into cifar_10_train_data_filesystem
PoolWorker-11: Loaded 1000 images into cifar_10_train_data_filesystem
PoolWorker-14: Wrote 1000 images to /tmp/madlib_aTPESoNjVi/cifar_10_train_data_filesystem0009.tmp
PoolWorker-15: Wrote 1000 images to /tmp/madlib_rhVwjLTbWI/cifar_10_train_data_filesystem0009.tmp
PoolWorker-12: Wrote 1000 images to /tmp/madlib_70qpwFzzqW/cifar_10_train_data_filesystem0009.tmp
PoolWorker-13: Wrote 1000 images to /tmp/madlib_r2u4Zo5bPt/cifar_10_train_data_filesystem0009.tmp
PoolWorker-11: Wrote 1000 images to /tmp/madlib_aEC1lF2HqL/cifar_10_train_data_filesystem0009.tmp
PoolWorker-14: Loaded 1000 images into cifar_10_train_data_fil

In [15]:
%%sql
SELECT COUNT(*) FROM cifar_10_train_data_filesystem;

1 rows affected.


count
50000
