In [1]:
import os
import sys
import csv
import ast
import json
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pyplot import figure
import matplotlib.dates as mdates


In [295]:
path = '/Users/maggie/Desktop/HPD_mobile-H1'

sensors = ['BS1']
server_id = sensors[0]
audio_tape_length = '20'
root_dir = os.path.join(path, server_id, 'audio')

a = AudioChecker(path, server_id, audio_tape_length)
a.main()



Date: 2019-02-10, Sensor: BS1
Start Time :  22:29
End Time :  23:58
Expected number of wavs :  270
Number of unique wavs :  206
Total number of duplicates :  0
Number of not captured wavs :  64
Percent of wavs captured :  0.76
Expected number of directories :  90
Number of directories w/ correct number wavs :  62
Number of directories w/ incorrect number wavs :  10
Number of directories w/ zero wavs :  18
Directories per hour w/ zero wavs :  {'22:00': 3, '23:00': 15}
Hours with no wavs :  []


Date: 2019-02-11, Sensor: BS1
Start Time :  00:00
End Time :  23:59
Expected number of wavs :  4320
Number of unique wavs :  14
Total number of duplicates :  0
Number of not captured wavs :  4306
Percent of wavs captured :  0.0
Expected number of directories :  1440
Number of directories w/ correct number wavs :  4
Number of directories w/ incorrect number wavs :  1
Number of directories w/ zero wavs :  329
Directories per hour w/ zero wavs :  {'00:00': 54, '01:00': 44, '02:00': 34, '03:00': 15, 

In [293]:
class AudioChecker():
    def __init__(self, path, server_id, tape_length, display_output = True, write_file = False):
        self.write_file = write_file        
        self.display_output = display_output 
        self.root_dir = os.path.join(path, server_id, 'audio')
        self.server_id = server_id
        self.audio_tape_length = tape_length
        self.correct_files_per_dir = int(60/int(self.audio_tape_length))        
        self.date_folders = self.get_date_folders(self.root_dir)
        self.date_dirs = [str(day.date()) for day in pd.date_range(start = self.day1, end = self.dayn, freq = 'D').tolist()]
        self.missing_days = [day for day in self.date_dirs if day not in self.date_folders]        
        self.day_summary = {}
        self.first_last = {}
        self.end_sec = str(60-int(self.audio_tape_length))
        
    def mylistdir(self, directory):
        filelist = os.listdir(directory)
        return [x for x in filelist if not (x.startswith('.') or 'Icon' in x)] 
    
    def get_date_folders(self, path):
        date_folders = self.mylistdir(path)
        date_folders.sort()
        self.day1, self.dayn = date_folders[0], date_folders[-1]
        return date_folders   
                   
            
    def get_all_mins(self, day, hr_mins):
        date_path = os.path.join(self.root_dir, day)
        hr_mins.sort()
        min_i, min_f = hr_mins[0], hr_mins[-1]
        self.first_last = min_i, min_f
        b_f = str(day + ' 00:00:00')
        e_f = str(day + ' 23:59:' + self.end_sec)        
        b_dt = datetime.strptime((day + ' ' + min_i), '%Y-%m-%d %H%M')
        e_dt = datetime.strptime((day + ' ' + min_f + self.end_sec), '%Y-%m-%d %H%M%S')        
        self.expected_wavs = pd.date_range(b_dt, e_dt, freq = self.audio_tape_length + 'S').tolist()
        self.all_seconds = pd.date_range(b_f, e_f, freq = self.audio_tape_length + 'S').tolist()
        self.expected_dirs = pd.date_range(b_dt, e_dt, freq = '60S').tolist()
        self.all_minutes = pd.date_range(b_f, e_f, freq = '60S').tolist()
        

    def finder(self):
        for wav in self.wavs:
            dt = datetime.strptime(wav.split('_')[0], '%Y-%m-%d %H%M%S')
            try:
                ind = self.expected_wavs.index(dt)
                self.expected_wavs.pop(ind)
            except:
                self.duplicates += 1
                self.duplicates_ts.append(dt.strftime('%Y-%m-%d %H:%M:%S'))
                pass

    def writer(self, output_dict):
        if self.write_file:

            b = 'test_output.json'
            write_file = os.path.join(self.root_dir, b)
            print('Writing file to: {}'.format(write_file))
            with open(write_file, 'w+') as f:
                f.write(json.dumps(output_dict))
    
    def displayer(self, output_dict, d):
        if self.display_output:
            for key in output_dict:
                print(key, ': ', output_dict[key])
            print('\n')
        else:
            print('No output')

    def configure_output(self,d):
        if self.write_file or self.display_output:
            missed_seconds = []

            for ts in self.expected_wavs:
                missed_seconds.append(ts.strftime('%Y-%m-%d %H:%M:%S'))
            
            unique_wavs = self.total_wavs - self.duplicates
            perc = unique_wavs / self.expect_num_wavs
            self.perc_cap = float("{0:.2f}".format(perc))
            self.zero_hours = [hr for hr in self.zero_dirs if self.zero_dirs[hr] == 60]
                            
            output_dict = {
                'Start Time': datetime.strptime(self.first_last[0], '%H%M').strftime('%H:%M'),
                'End Time': datetime.strptime(self.first_last[1], '%H%M').strftime('%H:%M'),
                'Expected number of wavs': self.expect_num_wavs,
                'Number of unique wavs': unique_wavs,
                'Total number of duplicates': self.duplicates,
                'Number of not captured wavs': len(self.expected_wavs),
                'Percent of wavs captured': self.perc_cap,
                'Expected number of directories': len(self.expected_dirs),
                'Number of directories w/ correct number wavs': len(self.count_correct),
                'Number of directories w/ incorrect number wavs': len(self.count_other),
                'Number of directories w/ zero wavs': len(self.num_zero_dirs),
                'Directories per hour w/ zero wavs': self.zero_dirs,
                'Hours with no wavs': self.zero_hours
            }
        return output_dict
   
    
    def main(self):
        for d in self.date_folders:
            hr_min_dirs = mylistdir(os.path.join(self.root_dir, d))
            self.get_all_mins(d, hr_min_dirs)
            self.expect_num_wavs = len(self.expected_wavs)
            self.expect_num_directories = len(self.expected_dirs)            
            self.total_wavs = 0  
                       
            self.wavs = []
            self.count_correct = {}
            self.zero_dirs = {}
            self.count_other = {}
            self.num_zero_dirs = []
            self.zero_hours = []
            self.duplicates = 0
            self.duplicates_ts = []         
            
            for hr_min in hr_min_dirs:
                a = datetime.strptime((d + ' ' + hr_min), '%Y-%m-%d %H%M')
                temp = os.path.join(self.root_dir, d, hr_min)
                if os.path.isdir(temp):
                    self.wavs = mylistdir(os.path.join(self.root_dir, d, hr_min))
                    self.wavs = [x for x in self.wavs if x.endswith('.wav')]
                    self.finder()
                    self.total_wavs += len(self.wavs)
                    
                    hr = datetime.strptime(hr_min,'%H%M').strftime('%H:00')
                    if len(self.wavs) == self.correct_files_per_dir:
                        self.count_correct[hr_min] = self.correct_files_per_dir
                    elif len(self.wavs) == 0:
                        self.num_zero_dirs.append(hr_min)
                        if hr not in self.zero_dirs:
                            self.zero_dirs[hr] = 1
                        else:
                            self.zero_dirs[hr] += 1
                    else:
                        self.count_other[hr_min] = len(self.wavs)
                else:
                    pass
            
            output_dict = self.configure_output(d)
            self.day_summary[d] = output_dict
            print('Date: {}, Sensor: {}'.format(d, self.server_id))#, self.server_id
            self.displayer(output_dict, d)
        self.writer(output_dict)
        


In [None]:
class PhotoChecker():
    def __init__(self, path_to_import_conf, write_file, display_output, server_id):
        # self.conf_file_path = path_to_import_conf
        # self.import_conf(self.conf_file_path)
        # self.write_file = write_file
        # self.display_output = display_output
        self.all_seconds = pd.date_range(self.b_dt, self.e_dt, freq = self.conf_dict['img_freq']).tolist()
        self.expect_num_photos = len(self.all_seconds)
        self.expect_num_directories = len(pd.date_range(self.b_dt, self.e_dt, freq = self.conf_dict["dir_create_freq"]).tolist())
        self.root_dir = os.path.join(self.conf_dict['root'], server_id, 'img')
        self.date_dirs = self.conf_dict['date_dirs']
        self.hrs_to_pass = self.conf_dict['hr_dirs_to_skip']
        self.count_61 = {}
        self.count_60 = {}
        self.count_other = {}
        self.total_pics = 0
        self.count_61_double_00 = 0
        self.duplicates = 0
        self.counter_min = 2000
        self.duplicates_ts = []
        self.pics = []
        self.start_time = datetime.now()

    def configure_output(self):
        if self.write_file:
            missed_seconds = []

            for ts in self.all_seconds:
                missed_seconds.append(ts.strftime('%Y-%m-%d %H:%M:%S'))
            rt = datetime.now() - self.start_time
            mins = rt.seconds / 60
            output_dict = {
                'Configuration dict': self.conf_dict,
                'Total runtime in minutes': mins,
                'Expected number of photos': self.expect_num_photos,
                'Number of photos counted (including duplicates)': self.total_pics,
                'Total number of duplicates': self.duplicates,
                'Number of not captured photos': len(self.all_seconds),
                'Expected number of directories': self.expect_num_directories,
                'Number of directories w/60 photos': len(self.count_60),
                'Number of directories w/61 photos': len(self.count_61),
                'Number of directories w/61 photos and 2x 00 second photos': self.count_61_double_00,
                'Number of directories w/not 60 OR 61 photos': len(self.count_other),
                'Non-60 or 61 directories': self.count_other,
                'Timestamps of not captured photos': missed_seconds,
                'Duplicates': self.duplicates_ts
            }
        else:
            output_dict = []
        return output_dict

    def main(self):
        for d in self.date_dirs:
            hr_min_dirs = os.listdir(os.path.join(self.root_dir, d))
            for hr_min in hr_min_dirs:
                if not hr_min == '.DS_Store' and not hr_min in self.hrs_to_pass:
                    a = datetime.strptime((d + ' ' + hr_min), '%Y-%m-%d %H%M')
                    if a < self.b_dt_as_dt or a > self.e_dt_as_dt:
                        continue
                    else:
                        temp = os.path.join(self.root_dir, d, hr_min)
                        if os.path.isdir(temp):
                            self.pics = os.listdir(os.path.join(self.root_dir, d, hr_min))
                            self.pics = [x for x in self.pics if x.endswith('.png')]
                            self.finder()
                            self.total_pics += len(self.pics)
                            if self.total_pics > self.counter_min:
                                print('Counting picture: {}'.format(self.total_pics))
                                rt = datetime.now() - self.start_time
                                mins = rt.seconds / 60
                                print('Current runtime in mins: {}'.format(mins))
                                self.counter_min += 2000
                            if len(self.pics) == 61:
                                double_00 = [x for x in self.pics if x.split('_')[0].endswith('00')]
                                if len(double_00) == 2:
                                    self.count_61_double_00 += 1
                                self.count_61[os.path.join(d,hr_min)] = 61

                            elif len(self.pics) == 60:
                                self.count_60[os.path.join(d,hr_min)] = 60
                            else:
                                self.count_other[os.path.join(d,hr_min)] = len(self.pics)

                        else:
                            print('{} is not a dir'.format(temp))
        
        output_dict = self.configure_output()
        self.writer(output_dict)
        self.displayer(output_dict)
        print('All done!')