# Example Notebook for Setting up and Installing H5PY on Bluemix

## Identify your current working directory
### We will need to declare a few paths

In [1]:
# Author:  Sanjay Joshi (@jStartter) ibm.biz/sanjay_joshi
# Courtesy of jStart - IBM Emerging Technology's client engagement team

import subprocess as sub
p = sub.Popen(['pwd'],stdout=sub.PIPE,stderr=sub.PIPE)
prefix, errors = p.communicate()
prefix = os.sep.join(prefix.split(os.sep)[:-2])
shareDir = prefix + "/.local/share"
hdf5Dir = shareDir + "/notebook_hdf5"
print "prefix = " + prefix
print "shareDir = " + shareDir
print "hdf5Dir = " + hdf5Dir

prefix = /gpfs/global_fs01/sym_shared/YPProdSpark/user/s1a2-472d95bcebf7db-bf066087ecf5
shareDir = /gpfs/global_fs01/sym_shared/YPProdSpark/user/s1a2-472d95bcebf7db-bf066087ecf5/.local/share
hdf5Dir = /gpfs/global_fs01/sym_shared/YPProdSpark/user/s1a2-472d95bcebf7db-bf066087ecf5/.local/share/notebook_hdf5


In [2]:
# Temp Testing cell to help reset setup
# Uncomment and Run if you want to test modifications/tweaks to this notebook and need to reset to an uninstalled state
# !rm -rf $shareDir
# !rm -rf $prefix/.local/lib/python2.7/site-packages/h5py
# !rm -rf $prefix/.local/lib/python2.7/site-packages/h5py-2.6.0-py2.7.egg-info
# !rm -rf *

In [3]:
!mkdir $shareDir
!mkdir $hdf5Dir
!mkdir $hdf5Dir/hdf5

## Fetch Snapshot of hdf5 tar gzip file

In [4]:
!wget https://www.hdfgroup.org/ftp/HDF5/current/src/hdf5-1.8.17.tar.gz -O $hdf5Dir/hdf5-1.8.17.tar.gz

--2016-05-17 16:53:51--  https://www.hdfgroup.org/ftp/HDF5/current/src/hdf5-1.8.17.tar.gz
Resolving www.hdfgroup.org (www.hdfgroup.org)... 50.28.50.143
Connecting to www.hdfgroup.org (www.hdfgroup.org)|50.28.50.143|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12304149 (12M) [application/x-gzip]
Saving to: '/gpfs/global_fs01/sym_shared/YPProdSpark/user/s1a2-472d95bcebf7db-bf066087ecf5/.local/share/notebook_hdf5/hdf5-1.8.17.tar.gz'


2016-05-17 16:53:56 (2.17 MB/s) - '/gpfs/global_fs01/sym_shared/YPProdSpark/user/s1a2-472d95bcebf7db-bf066087ecf5/.local/share/notebook_hdf5/hdf5-1.8.17.tar.gz' saved [12304149/12304149]



## Untar (extract) file

In [5]:
# This command takes about 10-15 secs
!tar -zxvf $hdf5Dir/hdf5-1.8.17.tar.gz -C $hdf5Dir >/dev/null
hdf5SrcDir = hdf5Dir + "/hdf5-1.8.17"
print hdf5SrcDir

/gpfs/global_fs01/sym_shared/YPProdSpark/user/s1a2-472d95bcebf7db-bf066087ecf5/.local/share/notebook_hdf5/hdf5-1.8.17


In [6]:
!ls -al $hdf5SrcDir

total 2376
drwxr-xr-x 14 s1a2-472d95bcebf7db-bf066087ecf5 users    4096 May 17 16:54 .
drwxr-xr-x  4 s1a2-472d95bcebf7db-bf066087ecf5 users    4096 May 17 16:53 ..
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users     683 Apr 26 07:44 ACKNOWLEDGMENTS
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users    5701 Apr 26 07:44 CMakeFilters.cmake
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users   24952 Apr 26 07:44 CMakeInstallation.cmake
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users   40635 Apr 26 07:44 CMakeLists.txt
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users    4677 Apr 26 07:44 COPYING
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users    1508 Apr 26 07:44 CTestConfig.cmake
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users   68969 May  6 16:41 MANIFEST
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users    1318 Apr 26 07:44 Makefile
-rw-r--r--  1 s1a2-472d95bcebf7db-bf066087ecf5 users    7599 Apr 26 07:44 Makefile.am
-rw-r--r--  1 s1a2-472d95bce

In [7]:
# Setup Configuration Info
!$hdf5SrcDir/configure --prefix=$hdf5Dir/hdf5

checking for a BSD-compatible install... /bin/install -c
checking whether build environment is sane... yes
checking for a thread-safe mkdir -p... /bin/mkdir -p
checking for gawk... gawk
checking whether make sets $(MAKE)... yes
checking whether make supports nested variables... yes
checking whether make supports nested variables... (cached) yes
checking whether to enable maintainer-specific portions of Makefiles... no
checking build system type... x86_64-unknown-linux-gnu
checking host system type... x86_64-unknown-linux-gnu
checking shell variables initial values... done
checking if basename works... yes
checking if xargs works... yes
checking for cached host... none
checking for config x86_64-unknown-linux-gnu... no
checking for config x86_64-unknown-linux-gnu... no
checking for config unknown-linux-gnu... no
checking for config unknown-linux-gnu... no
checking for config x86_64-linux-gnu... no
checking for config x86_64-linux-gnu... no
checking for config x86_64-unknown... no
checki

In [8]:
# Let's make the build and install process as quiet as possible.  Removing all of the H5_CFFLAGS compiler warning settings
!rm config.status.new 2>/dev/null
!cat config.status | sed -n '1h;1!H;${;g;s/"\-std.*O3"/"\-std=c99"/g;p;}' | sed 's/&amp;/\&/g' | sed 's/&lt;/\</g' | sed 's/&gt;/\>/g' > config.status.new
!rm config.status
!mv config.status.new config.status

In [9]:
# WARNING:  This cell will take a while ... ~10-12 mins with high CPU on the browser.  
# Your browser may even become unresponsive for a period of time.  Just be patient until the cell execution is complete.  Go grab something to drink.
# It is basically configuring and compiling the native hdf5 libs that are required by H5py 
# NOTE:  You may need to refresh your browser after this cell completes.  Carefully monitor the kernel indicator in the upper right as well.
#        The good news is that this is a one time operation. After the native libs are built, they will be available to all existing and new notebooks within this Spark Instance.
!make -w && make -w install

make: Entering directory `/gpfs/global_fs01/sym_shared/YPProdSpark/user/s1a2-472d95bcebf7db-bf066087ecf5/notebook/work'
 /bin/sh ./config.status
config.status: creating src/libhdf5.settings
config.status: creating Makefile
config.status: creating src/Makefile
config.status: creating test/Makefile
config.status: creating test/testcheck_version.sh
config.status: creating test/testerror.sh
config.status: creating test/H5srcdir_str.h
config.status: creating test/testlibinfo.sh
config.status: creating test/testlinks_env.sh
config.status: creating test/test_plugin.sh
config.status: creating testpar/Makefile
config.status: creating tools/Makefile
config.status: creating tools/h5dump/Makefile
config.status: creating tools/h5dump/testh5dump.sh
config.status: creating tools/h5dump/testh5dumppbits.sh
config.status: creating tools/h5dump/testh5dumpxml.sh
config.status: creating tools/h5ls/testh5ls.sh
config.status: creating tools/h5import/Makefile
config.status: creating tools/h5import/h5importtes

## Housekeeping

In [10]:
# Remove all of configure generated by-products
!rm -rf *
# Remove the extracted source folder
!rm -rf $hdf5Dir/hdf5-1.8.17
# Remove the tar gzip file
!rm $hdf5Dir/hdf5-1.8.17.tar.gz

In [11]:
os.environ['HDF5_DIR']= hdf5Dir + "/hdf5"
!pip install h5py --user

Collecting h5py
  Using cached h5py-2.6.0.tar.gz
Installing collected packages: h5py
  Running setup.py install for h5py ... [?25l- \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | done
[?25hSuccessfully installed h5py-2.6.0


# Let's work through the h5py Quick Start Guide
[Quick Start Guide](http://docs.h5py.org/en/latest/quick.html)

In [12]:
import h5py
import numpy as np
f = h5py.File("mytestfile1.hdf5", "w")

In [13]:
dset = f.create_dataset("mydataset", (100,), dtype='i')

In [14]:
dset.shape

(100,)

In [15]:
dset.dtype

dtype('int32')

In [16]:
dset[...] = np.arange(100)

In [17]:
dset[0]

0

In [18]:
dset[10]

10

In [19]:
dset[0:100:10]

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype=int32)

In [20]:
dset.name

u'/mydataset'

In [21]:
f.name

u'/'

In [22]:
grp = f.create_group("subgroup")

In [23]:
dset2 = grp.create_dataset("another_dataset", (50,), dtype='f')

In [24]:
dset2.name

u'/subgroup/another_dataset'

In [25]:
dset3 = f.create_dataset('subgroup2/dataset_three', (10,), dtype='i')

In [26]:
dset3.name

u'/subgroup2/dataset_three'

In [27]:
for name in f:
    print name

mydataset
subgroup
subgroup2


In [28]:
dset.attrs['temperature'] = 99.5
dset.attrs['temperature']

99.5

In [29]:
'temperature' in dset.attrs

True