In [None]:
import os, time
import csv
import shutil
from pathlib import Path
import librosa
import datetime as dt

## Create a list of all files in wanted directory

In [166]:
#data parser
#listing all files and modification datetime
  
files_dir = Path('/data/matlab_code/deep_voice/Recordings_2019/23.9.19/videos')

date_file_list = []
folders = [x for x in files_dir.iterdir() if x.is_dir()]
if len(folders) == 0:
    folders = [files_dir]
    
for folder in folders:
    print("folder =", folder)
    # select the type of file, for instance *.jpg or all files *.*
    for file in list(folder.glob('*.MP4')):
        # retrieves the stats for the current file as a tuple
        # (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime)
        # the tuple element mtime at index 8 is the last-modified-date
        stats = file.stat()
        # create tuple (year yyyy, month(1-12), day(1-31), hour(0-23), minute(0-59), second(0-59),
        # weekday(0-6, 0 is monday), Julian day(1-366), daylight flag(-1,0 or 1)) from seconds since epoch
        # note:  this tuple can be sorted properly by date and time
#         lastmod_date = time.localtime(stats[8])
        lastmod_date = stats[8]
        #print image_file, lastmod_date   # test
        # create list of tuples ready for sorting by date
        date_file_tuple = lastmod_date, file
        date_file_list.append(date_file_tuple)

folder = /data/matlab_code/deep_voice/Recordings_2019/23.9.19/videos


## looking for all recordings of a certain day

In [167]:
recording_dir = Path('/data/matlab_code/deep_voice/Recordings_2019/23.9.19')
recordings=[]
times=[]
buffer = 5 # time buffer for recordings

recordings_paths = list(recording_dir.glob('**/*.hprj')) #all hprj files
# print(recordings[0].stat().st_mtime) #example timestamp of one recording hprj

for record in recordings_paths:
    record_dict = {}
    r = record.name
    year = int(r[0:2]) + 2000
    month = int(r[2:4])
    day = int(r[4:6])
    hour = int(r[7:9])
    minute = int(r[9:11])
    second = int(r[11:13])
    rectime=time.mktime(dt.datetime(year,month,day,hour,minute,second).timetuple())
    record_dict['start_time'] = rectime - buffer
    record_duration = librosa.core.get_duration(filename=(record.parent / (record.parent.name + '_Tr1.WAV')))
    record_dict['end_time'] = rectime + record_duration + buffer
    record_dict['name'] = record
    times.append(record_dict)

def getKey(item):
  return item['start_time']

times = (sorted(times, key=getKey))
print(times)

[{'start_time': 1569219505.0, 'end_time': 1569220349.301678, 'name': PosixPath('/data/matlab_code/deep_voice/Recordings_2019/23.9.19/ZOOM0002/190923-081830.hprj')}, {'start_time': 1569231235.0, 'end_time': 1569232565.196644, 'name': PosixPath('/data/matlab_code/deep_voice/Recordings_2019/23.9.19/ZOOM0003/190923-113400.hprj')}, {'start_time': 1569232616.0, 'end_time': 1569233495.8325624, 'name': PosixPath('/data/matlab_code/deep_voice/Recordings_2019/23.9.19/ZOOM0004/190923-115701.hprj')}, {'start_time': 1569237014.0, 'end_time': 1569237121.796644, 'name': PosixPath('/data/matlab_code/deep_voice/Recordings_2019/23.9.19/ZOOM0005/190923-131019.hprj')}]


## Create CSV files from directory

In [168]:
#print date_file_list  # test
date_file_list.sort()
file_names=[]
file_dates=[]
recordings_align=[]
recordings_align_path=[]

print("%-40s %s" %("filename:", "last modified:"))
for file in date_file_list:
    file_name = file[1].name
    file_names.append(file_name)
    # convert date tuple to MM/DD/YYYY HH:MM:SS format
    file_date = time.strftime("%m/%d/%y %H:%M:%S", time.localtime(file[0]))
    file_dates.append(file_date)
    alignment = None
    alignement_path = None
    for recording in times:
        if alignment:
            break
        if file[0] > recording['start_time'] and file[0] < recording['end_time']:
            alignment = recording['name'].name
            alignement_path = recording['name'].parent
    recordings_align.append(alignment)
    recordings_align_path.append(alignement_path)
    print("%-40s %s %s" %(file_name, file_date, alignment))
    
with open (files_dir / 'tryme.csv', 'w', newline='') as fp:
    writer=csv.writer(fp)
    writer.writerow(["Last Modified","Name", 'Alignment'])
    for index, _ in enumerate(file_dates):
        writer.writerow([file_dates[index],file_names[index], recordings_align[index]])

filename:                                last modified:
C0021.MP4                                09/23/19 08:11:38 None
C0022.MP4                                09/23/19 08:13:13 None
C0023.MP4                                09/23/19 08:15:00 None
C0024.MP4                                09/23/19 08:18:19 None
C0030.MP4                                09/23/19 08:43:00 None
C0031.MP4                                09/23/19 08:43:21 None
C0032.MP4                                09/23/19 08:44:19 None
C0033.MP4                                09/23/19 08:46:58 None
C0034.MP4                                09/23/19 08:47:26 None
C0035.MP4                                09/23/19 08:50:56 None
C0036.MP4                                09/23/19 08:52:40 None
C0037.MP4                                09/23/19 08:53:31 None


## Move files to the audio directory

In [169]:
video_formats = ['mov', 'mp4']
stills_formats = ['arw', 'nef', 'jpg', 'jpeg']

for item in times:
    video_dir = item['name'].parent / 'videos'
    stills_dir = item['name'].parent / 'stills'
    video_dir.mkdir(exist_ok=True)
    stills_dir.mkdir(exist_ok=True)

for file_name, aligned_path  in zip(file_names, recordings_align_path):
    if aligned_path:
        if file_name.split('.')[-1].lower() in video_formats:
            shutil.move(files_dir / file_name, aligned_path / 'videos' / file_name)
        elif file_name.split('.')[-1].lower() in stills_formats:
            shutil.move(files_dir / file_name, aligned_path / 'stills' / file_name)
