In [None]:
######## IMPORTS ########
import os 
import soundfile as sf
import librosa
import pandas as pd

#Attempts to run notebook from another cell -- WIP
#!pip install ipynb
#from ipynb.fs.full.definitions import *
#%run "definitions.ipynb"

In [None]:
######## DEFINITIONS ########


################################################################


#Generates a directory (unless it already exists)
def dir_gen_1(directory):
    try:
        os.makedirs(directory)
    except FileExistsError:
        pass
    #os.chdir(directory)

#Generates a directory with location printouts
def dir_gen(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
        print("new dir,who dis? "+directory)
    else:
        print("dir exists "+directory)


################################################################


#Makes list of filenames in directory
def get_filenames_from_dir(directory):
    #os.chdir(directory)#unneccesary?
    thispath=os.path.join(directory)
    rootlist=os.listdir(thispath)

    names=[]
    for item in rootlist: 
        filename=os.path.splitext(item)[0]
        names.append(filename)
    return names

#Makes list of filenames.extensions from directory
def get_filenames_with_ext_from_dir(directory):
    dfn=os.listdir(os.path.join(directory))
    return dfn

#Makes list of absolute paths in directory
def get_abspaths_from_dir(directory):
    os.chdir(directory)
    
    thispath=os.path.join(directory)
    rootlist=os.listdir(thispath)
    abspaths=[os.path.abspath(item) for item in rootlist]

    return abspaths


################################################################

#Makes dataframe of start/stop times from a single sheet that was generated by RavenPro(/BirdNET)
#Remember! any xlsx file in the dir must be closed so lock file isn't in dir too
def make_timesframe_from_sheet(sheet_path):
 
    df=pd.read_excel(sheet_path)

    times=pd.DataFrame(df['filename'])
    times['start']=df['Start (s)']
    times['stop']=df['End (s)']
 
    return times

#Concatenates 2 dataframes together
def cat_frames(f1,f2):
    fullframe = pd.concat([f1, f2], ignore_index=True)
    return fullframe

#Makes dataframe of start/stop times from all sheets in a directory (format generated by RavenPro/BirdNET) 
#Remember! any xlsx file in the dir must be closed so lock file isn't in dir too
def make_timesframe_from_sheetdir(dirpath):
    #Receives directory of sheets as input and makes single dataframe out of all sheets in the directory
    abspaths=get_abspaths_from_dir(dirpath) #list
    
    mainframe=pd.DataFrame()

    for xlpath in abspaths: #absolute path in list
        df=pd.read_excel(xlpath) #read xl

        times=pd.DataFrame(df['filename']) #adds filenames to timeframe
        times['start']=df['Start (s)'] #adds starttime to timeframe
        times['stop']=df['End (s)'] #adds stoptime to timeframe

        mainframe=cat_frames(mainframe,times)
        
    return mainframe


################################################################


#Obtains filename sans extension from full path
def get_filename_from_path(fullpath):
    filename=os.path.splitext(os.path.split(fullpath)[1])[0]
    return filename

#Creates a set of audio slices from a SINGLE audio file. Receives input audio fullpath, slice duration, and saveto directory as arguments. Outputs time-enumerated audio slices to specified location. 
def slice_file_by_pathname(pathname,time,savedir): 
    #Makes subdir to save slices
    newdir=os.path.join(savedir,str(time)+'s')
    dir_gen(newdir)
    os.chdir(newdir) #needed because soundfile

    y, sr = librosa.load(pathname,sr=None) 
    samps=int(time*sr)
    
    filename=get_filename_from_path(pathname)

    slices=[]
    
    for i in range(0, len(y), samps): 
        slices.append(y[i:i + samps])

    for i, slice in enumerate(slices):
        output_file = f'{newdir}\{filename}_{i*time}.wav'
        sf.write(output_file, slice, sr)

#Creates a set of audio slices from an entire directory of audiofiles. Receives directory path, slice length, and saveto path as arguments. Outputs time-enumerated audio slices to specified location. 
def slice_files_by_dirname(dirname,time,savedir): 
    #Makes subdir to save slices
    newdir=os.path.join(savedir,str(time)+'s')
    dir_gen(newdir)
    os.chdir(newdir) #needed because soundfile

    pathlist=get_abspaths_from_dir(dirname)

    for pathname in pathlist:
        y, sr = librosa.load(pathname,sr=None) 
        samps=int(time*sr)
    
        #Specifies output filename
        filename=get_filename_from_path(pathname)

        slices=[]
    
        for i in range(0, len(y), samps): 
            slices.append(y[i:i + samps])

        for i, slice in enumerate(slices):
            output_file = f'{newdir}\{filename}_{i*time}.wav'
            sf.write(output_file, slice, sr)


In [None]:
######## PATHS ########

rootdir=r'SOME_DIR'

audiodir=rootdir+r'\audio' 
slicedir=rootdir+r'\slices'
sheetdir=rootdir+r'\sheets'

dir_gen(audiodir)
dir_gen(slicedir)
dir_gen(sheetdir)

In [None]:
#EXAMPLE: slices files by 30-second intervals
 
slice_files_by_dirname(audiodir,30,slicedir)