In [1]:
import datetime
from matplotlib import pyplot as plt
from redvox.api1000.wrapped_redvox_packet.wrapped_packet import WrappedRedvoxPacketM
from redvox.common.data_window import DataWindow, DataWindowConfig
import pandas as pd
from scipy.io.wavfile import write
import sys
import re
import shutil
import os
import csv
import datetime
import numpy as np
from research.dataImporting import dataTools
import librosa
from librosa.feature import mfcc

def convertToGPS(path,name):
    window = dataTools.import_redVoxData(path)
    station = window.first_station()

    locationSensor = station.best_location_sensor()
    if locationSensor:
        lat=locationSensor._data.columns[3]
        lon=locationSensor._data.columns[4]
        alt=locationSensor._data.columns[5]
        phoneGPS.append([name,np.mean(lat),np.mean(lon),np.mean(alt)])


def searchDir(rootdir):
    it = 0
    for it in os.scandir(rootdir):
        if it.is_dir():
            # print(it.path)
            searchDir(it)
        else:
            it = os.path.dirname(it)
            break
    if isinstance(it, str):
        splitPath = it.split('\\')
        convertToGPS(it,splitPath[8])

def get_filepaths(data_path):
    data_files=[]
    for path, subdirs, files in os.walk(data_path):
        for name in files:
            data_files.append(os.path.join(path, name))
    return data_files

# def split_audio(waveData, sampleFreq):
#     '''
#     Frames audio data and converts to feature space (MFCC)
#     :param waveData: waveData array of time-domain audio
#     :param sampleFreq: Sample Frequency (8Khz)
#     @return list of features (ds), list of labels corresponding to feature dataset:
#     '''
#     # middle third of data
#     duration = waveData.shape[0]
#     startTime = np.round(duration / 3)
#     endTime = np.round(duration * 2 / 3)
#     waveDataSplit= waveData[int(startTime):int(endTime)]
#     features=MFCCCalc(waveDataSplit.squeeze(), sampleFreq)
#     #label= [droneDict[labelName]] * features.shape[1]
#     return features

def create_dataset(train_files):
    '''
    Creates feature dataset and label dataset.
    @param train_files: EagerTensor of file paths.
    @return list of features (ds), list of labels corresponding to feature dataset:
    '''
    features = []
    for x in train_files:
        #test_file = tf.io.read_file(x)
        #test_audio, sampleRate = tf.audio.decode_wav(contents=test_file)
        test_audio, sampleRate = librosa.load(x, sr=8000)
        if min(np.asarray(test_audio)) != 0 and len(test_audio)!=0:
            x = str(x)
            #newData = test_audio[0: test_audio.shape[0] - test_audio.shape[0] % sampleRate]  # trim to nearest second
            #newFeats, newLabs = split_audio(test_audio, int(sampleRate))
            phone_features=MFCCCalc(test_audio.squeeze(),Fs=8000)
            features.append([x.split("\\")[7].split(".")[0],phone_features.transpose()])
        else:
            features.extend(np.zeros(len(features[0]))) #just trying to fill space

    return features

def MFCCCalc(audioData, Fs):
    '''
    Converts decoded wav file to MFCC feature space
    @param audioData: Numpy array of decoded audio wav file
    @return MFCC coefficients
    '''
    data= audioData.astype(float)
    coefs = mfcc(y=data, hop_length=2048,n_mfcc=40, sr=Fs)
    return coefs

passes_df=pd.read_csv(r"C:\Users\rclendening\researchData\researchCSVs_Scripts_etc\A1_A2_flight_directory.csv")
print()
to_meters=111139
range_df=pd.DataFrame(columns=["Name","Lat","Lon","Alt","Range"])
#test123=readDirectoryCSV(r"C:\Users\rclendening\researchData\researchCSVs_Scripts_etc\A1_A2_flight_directory.csv")
def distanceCalc(phoneLat,phoneLon,phoneAlt, droneLat,droneLon,droneAlt):
    lat_delta= (phoneLat-droneLat)*to_meters
    lon_delta=(phoneLon-droneLon)*to_meters
    return np.sqrt((lat_delta**2+lon_delta**2+(phoneAlt-droneAlt)**2))
for x in passes_df.iterrows():
    phoneGPS=[]
    row=x[1]
    pass_num=row[0]
    scenario=row[1]
    run_num=row[2]
    start=row[3]
    stop=row[4]
    drone_gps_file=row[5]
    full_name=(str(scenario) + 'R' + str(run_num) + 'P' + str(pass_num)).strip()

    searchDir(("C:\\Users\\rclendening\\researchData\\Unused_Datasets\\EscapeCell_Data\\"+scenario+"\\"+full_name))
    GPS_df=pd.DataFrame(phoneGPS, columns=['Name','Lat','Lon','Alt'])
    start_time=datetime.datetime(2021, 8, int(start[6:8]), int(start[9:11]), int(start[11:13]),int(start[13:15])).timestamp()
    stop_time=datetime.datetime(2021, 8, int(stop[6:8]), int(stop[9:11]), int(stop[11:13]),int(stop[13:15])).timestamp()
    if drone_gps_file[0]== 'F':
        FLY=True
    else:
        FLY=False
    GPS_flight_log=pd.read_csv(("C:\\Users\\rclendening\\researchData\\ESCAPE II_AFRL_SSD\\UAS Campaign\\UAS_log_files\\A1_A2"+"\\"+drone_gps_file+".csv"))
    if FLY==True:
        lon=GPS_flight_log.iloc[:,5]
        lat=GPS_flight_log.iloc[:,6]
        time=GPS_flight_log.iloc[:,7]
        date=GPS_flight_log.iloc[:,8]
        height=GPS_flight_log.iloc[:,10]
    else:
        lon=GPS_flight_log.iloc[:,3]/10**7
        lat=GPS_flight_log.iloc[:,2]/10**7
        time_UTC=GPS_flight_log.iloc[:,1]/10**6
        height=GPS_flight_log.iloc[:,5]/10**3
    #for phones in GPS_df: #FLY files Long=col(5) lat=col(6) date=col(7) time=col(8) height (MSL)=col(10)
                        #Non-FLY files lat=col(3) lon=col(4) timestamp(UTC)=col(1) height (MSL)= col(5) in millimeters
    usable_phones=[]
    for ph in phoneGPS:
        if 43.00 < ph[1] < 44.00 and -75.00 > ph[2] > -76.00:
            usable_phones.append(ph)

    for t in range(0,len(GPS_flight_log)):
        if FLY:
            if np.isnan(time[t]):
                cur_time=0
            else:
                cur_time=datetime.datetime(2021, 8, int(start[6:8]), int(str(time[t])[0:2]),int(str(time[t])[2:4]),int(str(time[t])[4:6])).timestamp()
        else:
            cur_time=time_UTC[t]


        time_delta=cur_time-start_time
        filepaths= get_filepaths("C:\\Users\\rclendening\\researchData\\EscapeCell_DataWav\\"+scenario+"\\"+full_name)
        phone_features= create_dataset(filepaths)



        if  start_time < cur_time<= stop_time:
            for phone in usable_phones:
                    val=distanceCalc(phone[1],phone[2],phone[3], lat[t],lon[t],height[t])
                    range_df.loc[len(range_df.index)]=[str(phone[0]),np.float64(phone[1]),np.float64(phone[2]),np.float64(phone[3]),np.float64(val)]
        ##TODONEXT: create method to store audio data with attached range
print()