In [2]:
import sys
import h5py
import datetime
import numpy as np

sys.path.append('../')
from envir import config

In [4]:
#format the date
def date_formatter(time):
    return datetime.datetime.fromtimestamp(float(time.split('_')[1])).strftime('%Y-%m-%d %H:%M:%S')

In [6]:
#initialize threshold
thres = 0.5

#write to csv
f = open('../../../share/data/clean_yamnet/clean_yamnet.csv','w')

sensors = ['b827eb815321','b827eb8e2420','b827eb905497', 'b827eb0fedda','b827eb1685c7']
for sensor in sensors:
    #read in file
    sensor_file = h5py.File(config.dataFol+'YAMNet/06-09-2020/'+sensor+'_yamnet-pred.h5','r')
    current_hour = None
    bird_dict = {}
    bird_cnt = None #count the number of bird recordings in each hour
    total_cnt = None #cont the total number of recordings in each hour
    last_hour = date_formatter(list(sensor_file.keys())[-1])[:-6] #the end time of each sensor
    print(sensor, last_hour)
    #birdList = None
    
    #Filter all valid predictions by timestamp and take the average
    for time in list(sensor_file.keys()):
        timestamp = date_formatter(time)     #format epoch time
        current_min = int(timestamp[-5:-3])  #get current minute
        bird_class = [x[107] for x in np.array(sensor_file[time])] #get only the predictions for class 107 (Bird vocalization, bird call, bird song)
        
        #initialize the values of current_hour, bird_cnt, and total_cnt
        if current_hour is None:
            current_hour = timestamp[:-6]
            bird_cnt = 0
            total_cnt = 0
            #birdList = [0]*60

        #update current hour, update bird presence rate
        if timestamp[:-6] != current_hour:
            bird_dict[current_hour] = round(float(bird_cnt/total_cnt),5)
            current_hour = timestamp[:-6]
            bird_cnt = 0
            total_cnt = 0
            #birdList = [0]*60
            
        #if reaches the last hour in file
        if current_hour == last_hour:
            #write to bird_dict every timestamp
            if (total_cnt == 0):
                bird_dict[current_hour] = 0
            else:
                bird_dict[current_hour] = round(float(bird_cnt/total_cnt),5)

        #check if any pred >= thres at the time 
        for pred in bird_class:
            if pred >= thres:
                #birdList[current_min] = 1
                bird_cnt += 1
                break;
        
        total_cnt +=1      
    
    #Count the number of predictions for each minute and write to file
    for time in list(sensor_file.keys()):
        timestamp = date_formatter(time)
        current_min = timestamp[:-3]
        current_hour = timestamp[:-6]
        bird_class = [x[107] for x in np.array(sensor_file[time])] #get only the predictions for class 107 (Bird vocalization, bird call, bird song)

        filter_list = list(filter(lambda x: (x >= thres) , bird_class))
        try:
            f.write(','.join([sensor, current_min, str(len(filter_list)), str(bird_dict[current_hour])])+'\n')
        except:
            print(sensor,current_min,current_hour)

#close file
f.close()

b827eb815321 2020-05-11 23
b827eb8e2420 2020-05-11 23
b827eb905497 2020-04-13 02
b827eb0fedda 2020-05-11 23
b827eb1685c7 2020-05-08 23


In [None]:
#THE OLD WAY OF CLEANING YAMNET DATA

# #initialize threshold
# thres = 0.5

# #write to csv
# f = open('../../../share/data/clean_yamnet/clean_yamnet.csv','w')

# sensors = ['b827eb815321','b827eb8e2420','b827eb905497', 'b827eb0fedda','b827eb1685c7']
# for sensor in sensors:
#     #read in file
#     sensor_file = h5py.File(config.dataFol+'YAMNet/dense-bird/'+sensor+'_yamnet-pred-bird.h5','r')
#     #Filter all valid predictions by timestamp and take the average
#     for time in list(sensor_file.keys()):
#         timestamp = date_formatter(time)
#         filter_list = list(filter(lambda x: (x >= thres) , np.array(sensor_file[time]))) 
#         if len(filter_list) > 0:
#             f.write(','.join([sensor, timestamp, str(len(filter_list)), str(np.mean(filter_list))])+'\n') 
            
# #close file
# f.close()

### Print start time and end time 2017-2020 for each sensor

In [1]:
def checkTimeRange(sensor):
    print(sensor+":")
    start_time = [None]*4
    end_time = [None]*4
    #read in file
    sensor_file = h5py.File(config.dataFol+'YAMNet/06-09-2020/'+sensor+'_yamnet-pred.h5','r')
    for time in list(sensor_file.keys()):
        timestamp = date_formatter(time)
        year = int(timestamp[:4])
        if start_time[year-2017] is None:
            start_time[year-2017] = timestamp
        if end_time[year-2017] is None:
            end_time[year-2017] = timestamp

        if timestamp < start_time[year-2017]:
            start_time[year-2017] = timestamp
        if timestamp > end_time[year-2017]:
            end_time[year-2017] = timestamp
    
    print(dict(zip(start_time,end_time)))

In [5]:
checkTimeRange('b827eb815321')

b827eb815321:
{'2017-02-26 01:00:05': '2017-03-24 23:59:47', '2018-03-02 06:26:04': '2018-05-12 21:02:05', '2019-02-24 00:00:35': '2019-05-12 04:08:33', '2020-02-24 00:02:09': '2020-05-11 23:57:49'}


In [6]:
checkTimeRange('b827eb8e2420')

b827eb8e2420:
{'2017-03-10 14:49:37': '2017-05-12 23:59:19', '2018-02-24 00:00:10': '2018-05-12 23:59:49', '2019-02-24 00:01:33': '2019-05-12 23:59:13', '2020-02-24 00:00:12': '2020-05-11 23:59:00'}


In [7]:
checkTimeRange('b827eb905497')

b827eb905497:
{'2017-03-15 10:17:54': '2017-03-24 23:59:17', '2018-03-15 19:08:16': '2018-05-11 11:36:20', '2019-02-24 00:00:08': '2019-05-12 23:59:00', '2020-02-24 00:00:03': '2020-04-13 02:45:43'}


In [8]:
checkTimeRange('b827eb0fedda')

b827eb0fedda:
{'2017-02-24 15:35:37': '2017-05-12 23:59:47', '2018-03-02 11:17:57': '2018-05-12 23:59:04', '2019-02-25 00:01:14': '2019-05-12 23:58:58', '2020-02-24 00:00:09': '2020-05-11 23:59:36'}


In [9]:
checkTimeRange('b827eb1685c7')

b827eb1685c7:
{'2017-02-24 00:00:20': '2017-03-24 23:59:43', '2018-02-24 00:00:10': '2018-03-22 08:05:50', '2019-02-24 00:00:04': '2019-05-12 23:55:59', '2020-02-24 00:00:10': '2020-05-08 23:59:23'}
