In [13]:
import os
import sys
from datetime import datetime
import pytz
import h5py
import csv
import numpy as np
from scipy.signal import medfilt
import matplotlib.pyplot as plt

from __future__ import print_function
import sys
import re


class ProgressBar(object):
    DEFAULT = 'Progress: %(bar)s %(percent)3d%%'
    FULL = '%(bar)s %(current)d/%(total)d (%(percent)3d%%) %(remaining)d to go'

    def __init__(self, total, width=40, fmt=DEFAULT, symbol='=',
                 output=sys.stderr):
        assert len(symbol) == 1

        self.total = total
        self.width = width
        self.symbol = symbol
        self.output = output
        self.fmt = re.sub(r'(?P<name>%\(.+?\))d',
            r'\g<name>%dd' % len(str(total)), fmt)

        self.current = 0

    def __call__(self):
        percent = self.current / float(self.total)
        size = int(self.width * percent)
        remaining = self.total - self.current
        bar = '[' + self.symbol * size + ' ' * (self.width - size) + ']'

        args = {
            'total': self.total,
            'bar': bar,
            'current': self.current,
            'percent': percent * 100,
            'remaining': remaining
        }
        print('\r' + self.fmt % args, file=self.output, end='')

    def done(self):
        self.current = self.total
        self()
        print('', file=self.output)


def task_1() :
    dir_name = "H5Files"
    file_list = get_all_filenames(dir_name)
    max_size=len(list(file_list))
    progress = ProgressBar(max_size, fmt=ProgressBar.FULL)
    sys.stdout.write("Indexing "+str(len(list(file_list)))+" files")
    for file in file_list :
        if file[-3:] == ".h5" :
            utc_dt, cern_dt, fileptr1, fileptr2 = convert_unix_time_to_datetype(file[:-3])
#             print(file)
#             print('UTC Time ',utc_dt)
#             print('Cern Time ',cern_dt)
#             print('ptr1'+" "+str(fileptr1)+" "+'ptr2'+" "+str(fileptr2))
            CSVMapping = task_2(dir_name, file)
            write_into_csv(CSVMapping)
            progress.current += 1
            progress()
    progress.done()

def task_2(dir_name, main_file_ptr):
    file = h5py.File(dir_name+"/"+str(main_file_ptr), 'r')
    CSVMapping = []
    CSVMapping = get_csv_mapping(CSVMapping, file, main_file_ptr)
    return CSVMapping
    
def get_csv_mapping(CSVMapping, file, main_file_ptr) :
    if(isinstance(file, h5py.Group)) :
        for sub in file.keys() :
            if(isinstance(file[sub], h5py.Dataset)) :
                if (file[sub].size == 0):
                    CSVMapping.append([file[sub].name, file.name, main_file_ptr, "NA", file[sub].size, file[sub].shape, "NA"])
                else :
                    try:
                        if(file[sub].size == 1):
                            CSVMapping.append([file[sub].name, file.name, main_file_ptr, file[sub][0], file[sub].size, file[sub].shape, str(file[sub].dtype)])
                        else :
                            CSVMappinsg.append([file[sub].name, file.name, main_file_ptr, "NA", file[sub].size, file[sub].shape, str(file[sub].dtype)])
                    except :
                        if(file[sub].size == 1):
                            CSVMapping.append([file[sub].name, file.name, main_file_ptr, "NA", file[sub].size, file[sub].shape, "NA"])
                        else :
                            CSVMapping.append([file[sub].name, file.name, main_file_ptr, "NA", file[sub].size, file[sub].shape, "NA"])                        
            elif (isinstance(file[sub], h5py.Group)) :
                get_csv_mapping(CSVMapping, file[sub], main_file_ptr)
    return CSVMapping
            
def write_into_csv(CSVMapping) :
    if(os.path.exists('CSVFiles/csv_all_files.csv')):
        with open('CSVFiles/csv_all_files.csv', mode='a') as csv_file :
            writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for row in CSVMapping :
                if len(row) != 0 :
                    writer.writerow(row)
    else :
        with open('CSVFiles/csv_all_files.csv', mode='w') as csv_file :
            fieldnames = ['DatasetName', 'GroupName', 'HDF5FileName', 'SingletonValue', 'DatasetSize', 'DatasetShape', 'DatasetDatatype']
            writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            writer.writerow(fieldnames)
            for row in CSVMapping :
                if len(row) != 0 :
                    writer.writerow(row)

def get_all_filenames(filepath) :
    return os.listdir(filepath)

def convert_unix_time_to_datetype(filename) :
    filename_array = filename.split("_")
    if len(str(filename_array[0])) == 19 :
        timestamp = float(filename_array[0])
        utc_dt = datetime.utcfromtimestamp(timestamp // 1e9)
        cern_tz = pytz.timezone('Europe/Zurich')
        cern_dt = utc_dt.replace(tzinfo=pytz.utc).astimezone(cern_tz)
        cern_dt = cern_tz.normalize(cern_dt)
        return (utc_dt, cern_dt, filename_array[1], filename_array[2])

task_1()

Indexing 11 files

