In [1]:
'''
    Submitted By : Aman Singh Thakur
    Date : 09/03/2019
    Submitted to Spencer Jake Gessner for GSOC eval
    Organization : CERN-HSF
    Convention followed : PEP-8
'''

import os
from datetime import datetime
import pytz
import h5py
import csv
import numpy as np
from scipy.signal import medfilt
import matplotlib.pyplot as plt

'''
    Task 1 : Getting the Unix timestamp of all H5 files and converting 
             it into Python datetype object with UTC and CERN local time.  
'''

def task_1() :
    dir_name = "H5Files"
    file_list = get_all_filenames(dir_name)
    for file in file_list :
        if file[-3:] == ".h5" :
            utc_dt, cern_dt, fileptr1, fileptr2 = convert_unix_time_to_datetype(file[:-3])
            print('UTC Time ',utc_dt)
            print('Cern Time ',cern_dt)
            CSVMapping, Image = task_2(dir_name, file, fileptr1, fileptr2)
            task_3(Image, fileptr1, fileptr2)
            

'''
    Task 2 : Open the HDF File and explore all branches of the
             directory tree. Convert resulting Data into CSV file with
             mapping Group Name, DataSet Name, Size, Shape, Datatype.
             Also Get Data for Task 3
'''
            
def task_2(dir_name, file, fileptr1, fileptr2) :
    file = h5py.File(dir_name+"/"+str(file), 'r')
    CSVMapping = [[]]
    Image = {
        'ImageData' : [],
        'ImageHeight' : 0,
        'ImageWidth' : 0
    }
    CSVMapping, Image = get_csv_mapping(CSVMapping, file, Image)
    write_into_csv(CSVMapping, fileptr1, fileptr2)
    print('CSV written in CSVFiles directory')
    return (CSVMapping, Image)

'''
    Task 3 : Use the CSV Directory to find datasets /AwakeEventData/XMPP-STREAK/StreakImage/streakImageData (image),
             /AwakeEventData/XMPP-STREAK/StreakImage/streakImageHeight (height)
             /AwakeEventData/XMPP-STREAK/StreakImage/streakImageWidth (width)
             Store information about height, width and convert 1D array to 2D png image.
             
'''

def task_3(Image, fileptr1, fileptr2) :
    Image['ImageData'] = np.reshape(Image['ImageData'] ,(Image['ImageHeight'][0],Image['ImageWidth'][0]))
    Image['ImageData'] = medfilt(Image['ImageData'], 3)
    plot_image(Image['ImageData'], fileptr1, fileptr2, "streakImage")

'''
    Helper Functions
'''

def plot_image(ImageData, fileptr1, fileptr2, filename) :
    plt.plot(ImageData)
    plt.savefig('PNGFiles/'+filename+'_'+fileptr1+'_'+fileptr2+'.png')
    plt.close()
    print('Please check necessary folders')
    

def get_data_from_task3(data, Image) :
    if(data.name == '/AwakeEventData/XMPP-STREAK/StreakImage/streakImageData') :
        print("Size of dataset is : "+str(data.size))
        print("Please Wait !")
        Image['ImageData'] = list(data)
    if(data.name == '/AwakeEventData/XMPP-STREAK/StreakImage/streakImageHeight') :
        Image['ImageHeight'] = list(data)
    if(data.name == '/AwakeEventData/XMPP-STREAK/StreakImage/streakImageWidth') :
        Image['ImageWidth'] = list(data)
    return Image

def write_into_csv(CSVMapping, fileptr1, fileptr2) :
    with open('CSVFiles/csv_directory_'+fileptr1+'_'+fileptr2+'.csv', mode='w') as csv_file :
        fieldnames = ['Group Name', 'Dataset Name', 'Dataset Size', 'Dataset Shape', 'Dataset Datatype']
        writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(fieldnames)
        for row in CSVMapping :
            writer.writerow(row)

def get_csv_mapping(CSVMapping, file, Image) :
    if(isinstance(file, h5py.Group)) :
        for sub in file.keys() :
            if(isinstance(file[sub], h5py.Dataset)) :
                if (file[sub].size == 0):
                    CSVMapping.append([file.name, file[sub].name, file[sub].size, file[sub].shape, "NA"])
                else :
                    try:
                        CSVMapping.append([file.name, file[sub].name, file[sub].size, file[sub].shape, str(file[sub].dtype)])
                        Image = get_data_from_task3(file[sub], Image)
                        #print(ImageData, ImageHeight, ImageWidth)
                    except :
                        CSVMapping.append([file.name, file[sub].name, file[sub].size, file[sub].shape, "NA"])                        
            elif (isinstance(file[sub], h5py.Group)) :
                get_csv_mapping(CSVMapping, file[sub], Image)
    return CSVMapping, Image
                
            
def convert_unix_time_to_datetype(filename) :
    filename_array = filename.split("_")
    if len(str(filename_array[0])) == 19 :
        timestamp = float(filename_array[0])
        utc_dt = datetime.utcfromtimestamp(timestamp // 1e9)
        cern_tz = pytz.timezone('Europe/Zurich')
        cern_dt = utc_dt.replace(tzinfo=pytz.utc).astimezone(cern_tz)
        cern_dt = cern_tz.normalize(cern_dt)
        return (utc_dt, cern_dt, filename_array[1], filename_array[2])

def get_all_filenames(filepath) :
    return os.listdir(filepath)

'''
    Main Function
    Task1 calls Task2 and Task3
'''

task_1()

UTC Time  2018-11-11 18:48:28
Cern Time  2018-11-11 19:48:28+01:00
Size of dataset is : 344064
Please Wait !
CSV written in CSVFiles directory


ValueError: cannot reshape array of size 0 into shape (512,672)