# Drone Audio Data
### Code Summary:
- Detrending, normalizing, and shifting data to zero mean
- Fast Fourier transform magnitude and phase spectra plots
- Continuous wavelet transform scalograms with Morlet wavelet
- Subsampled data plots
- Tapered data plots
- Histograms and statistics for plots

In [None]:
#pip install librosa[all]
import librosa
import librosa.display
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import numpy as np
import plotly.graph_objects as go
import plotly.tools as tls
import scipy.stats as ss
import scipy.fft
import pandas as pd
import pywt
from scipy import interpolate
from scipy.signal.windows import hann
from scipy import signal
from scipy.ndimage import uniform_filter

In [None]:
# load the data
# sampling rate is 48kHz
y, sr = librosa.load('20250919_backyard_clippy.WAV', sr=48000)

***
### Plotting the Normalized, Detrended, and Shifted (DNS) Time Series
#### Notes:
- Data was loaded with librosa.load, which automatically normalizes the data range to [-1, 1]
- Data was linearly detrended using signal.detrend from scipy
- After detrending, data was shifted to ~zero mean by subtracting the mean from itself 

In [None]:
# linearly detrend the data
y_detrended = signal.detrend(y, type='linear')

# Center the data (zero mean)
y_centered = y_detrended - np.mean(y_detrended)

# PLot time series:
drone_audio= plt.figure(figsize=(12, 4))
librosa.display.waveshow(y_centered, sr=sr, axis='time')
plt.title('Drone Audio Waveform')
plt.xlabel('Time (M:SS)')
plt.ylabel('Amplitude')
plt.grid(True, alpha=0.5)
plt.tight_layout()
plt.show()
# Conversion to plotly doesn't work. Message says "Plotly can only import path collections linked to 'data' coordinates"
print(f'sampling rate: {sr} samples per second')

In [None]:
# some stats:
print(ss.describe(y_centered))
print("standard deviaiton:", np.std(y_centered))
print("median:", np.median(y_centered))
print("mode:", ss.mode(y_centered))

***
### Subsampling the Time Series
#### Notes:
- different step sizes were applied to the normalized, detrended and mean-shifted (DNS) data for subsampling
- comment out certain series before plotting to focus on a specific step value
- adjust x-axis limits to zoom in on certain times

In [None]:
# convert y_centered (numpy array) to pandas data frome
drone_data = pd.DataFrame(y_centered)
drone_data.columns = ['Amplitude']
drone_data.head()

In [None]:
# Add column for time in seconds
time_secs = drone_data.index / sr
drone_data['time_secs'] = time_secs
drone_data.head()

In [None]:
# Subsample drone audio:
# Number of data points in df1['Normalized_Shifted']: 23292
step1 = 10
step2 = 100
step3 = 1000
step4 = 10000

subsamp_plot = plt.figure(figsize=(12,4))
plt.plot(drone_data['time_secs'], drone_data['Amplitude'], label="Full Data")
#plt.plot(drone_data['time_secs'][::step1], drone_data['Amplitude'][::step1], label=f"Step 1 = {step1}")
#plt.plot(drone_data['time_secs'][::step2], drone_data['Amplitude'][::step2], label=f"Step 2 = {step2}")
#plt.plot(drone_data['time_secs'][::step3], drone_data['Amplitude'][::step3], label=f"Step 3 = {step3}")
#plt.plot(drone_data['time_secs'][::step4], drone_data['Amplitude'][::step4], label=f"Step 3 = {step4}")
plt.plot(drone_data['time_secs'][::562], drone_data['Amplitude'][::562], label="Supsampled Data (step=562)")
plt.title("Subsampling Drone Audio Data")
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.grid(True, alpha=0.5)
plt.xlim(83,85)
plt.legend()
plt.tight_layout()
plt.show()
# Conversion to plotly doesn't work --> try to add buttons using matplotlib
# see what happens when you change the step size to __ points per second 

### Comparing Subsampled Time Series to Full Time Series
#### Notes:
- The subsampled time series of different step sizes were compared to the full detrended, normalized and shifted (DNS) time series
- A variety comparison methods were tested that could be applied elsewhere as well (e.g. for comparing tapered vs untapered data):
    - **Comparing summary statistics** (i.e. mean, median, standard deviation) for different sampling rates against the full DNS series
    - Calculating the **mean absolute error (MAE)** between the full DNS data and reconstructions of the subsampled data
        - interpolation was used to reconstruct the time series after subsampling to align timestamps between the subsampled data and the original DNS data
        - the MAE was then calculated and expressed as a percentage of the DNS series' standard deviation
    - Defining a **'percent_check' function** to determine if the subsampled data is within 5% of the full DNS data
        - this method also uses interpolation to compare subsampled data to the full DNS data
        - the absolute error between the full DNS data and the reconstructed subsampled data is calculated and divided by the full data range
        - this result is then used determine if the error is within the selected threshold (i.e. 5%)

#### Comparing Summary Statistics for Different Subsampling Intervals

In [None]:
# Compare summary statistics for different sampling steps
stats_original = {'Mean': drone_data['Amplitude'].iloc[::].mean(),
    'Std': drone_data['Amplitude'].iloc[::].std(),
    'Median': drone_data['Amplitude'].iloc[::].median()}

stats_step1 = {'Mean': drone_data['Amplitude'].iloc[::step1].mean(),
    'Std': drone_data['Amplitude'].iloc[::step1].std(),
    'Median': drone_data['Amplitude'].iloc[::step1].median()}

stats_step2 = {'Mean': drone_data['Amplitude'].iloc[::step2].mean(),
    'Std': drone_data['Amplitude'].iloc[::step2].std(),
    'Median': drone_data['Amplitude'].iloc[::step2].median()}

stats_step3 = {'Mean': drone_data['Amplitude'].iloc[::step3].mean(),
    'Std': drone_data['Amplitude'].iloc[::step3].std(),
    'Median': drone_data['Amplitude'].iloc[::step3].median()}

stats_step4 = {'Mean': drone_data['Amplitude'].iloc[::step4].mean(),
    'Std': drone_data['Amplitude'].iloc[::step4].std(),
    'Median': drone_data['Amplitude'].iloc[::step4].median()}

# Percent differences in statistics
for stat in ['Mean', 'Std', 'Median']:
    pct_o1 = (abs(stats_original[stat] - stats_step1[stat]) / abs(stats_original[stat])) * 100
    pct_o2 = (abs(stats_original[stat] - stats_step2[stat]) / abs(stats_original[stat])) * 100
    pct_o3 = (abs(stats_original[stat] - stats_step3[stat]) / abs(stats_original[stat])) * 100
    #pct_12 = abs(stats_step1[stat] - stats_step2[stat]) / abs(stats_step1[stat]) * 100
    #pct_13 = abs(stats_step1[stat] - stats_step3[stat]) / abs(stats_step1[stat]) * 100
    pct_o4 = (abs(stats_original[stat] - stats_step4[stat]) / abs(stats_original[stat])) * 100
    print(f"{stat} % diff (original vs step1): {pct_o1:.2f}%")
    print(f"{stat} % diff (original vs step2): {pct_o2:.2f}%")
    print(f"{stat} % diff (original vs step3): {pct_o3:.2f}%")
    print(f"{stat} % diff (original vs step4): {pct_o4:.2f}%")

#### Comparing Subsampled DNS Data to Full DNS Data Using Mean Absolute Error (MAE)

In [None]:
# Comparing subsampled data to original
# using interpolation to align timestamps
# Original time series:
x_original = np.arange(len(drone_data))
y_original = drone_data['Amplitude'].values

# Create interpolation functions for each subsampled version
x_step1 = np.arange(0, len(drone_data), step1)
y_step1 = drone_data['Amplitude'].iloc[::step1].values
f_step1 = interpolate.interp1d(x_step1, y_step1, kind='linear', fill_value='extrapolate')

x_step2 = np.arange(0, len(drone_data), step2)
y_step2 = drone_data['Amplitude'].iloc[::step2].values
f_step2 = interpolate.interp1d(x_step2, y_step2, kind='linear', fill_value='extrapolate')

x_step3 = np.arange(0, len(drone_data), step3)
y_step3 = drone_data['Amplitude'].iloc[::step3].values
f_step3 = interpolate.interp1d(x_step3, y_step3, kind='linear', fill_value='extrapolate')

# Reconstruct time series from subsampled data:
reconstructed_step1 = f_step1(x_original)
reconstructed_step2 = f_step2(x_original)
reconstructed_step3 = f_step3(x_original)

# Trying mean absolute error (MAE), then can also express as a percentage of the data's standard deviation
# Calculate MAE for each taper:
mae_s1 = np.mean(np.abs(drone_data['Amplitude'] - reconstructed_step1))
mae_s2 = np.mean(np.abs(drone_data['Amplitude'] - reconstructed_step2))
mae_s3 = np.mean(np.abs(drone_data['Amplitude'] - reconstructed_step3))

# Express as percentage of original data's standard deviation
original_std = np.std(drone_data['Amplitude'])
print(f"Standard deviation of full data: {original_std: .4f}")
percent_diff_s1 = (mae_s1 / original_std) * 100
percent_diff_s2 = (mae_s2 / original_std) * 100
percent_diff_s3 = (mae_s3 / original_std) * 100

print(f"Step 1 Subsample: Mean abs error: {mae_s1:.4f}, {percent_diff_s1:.2f}% of the original data's standard deviation")
print(f"Step 2 Subsample: Mean abs error: {mae_s2:.4f}, {percent_diff_s2:.2f}% of the original data's standard deviation")
print(f"Step 3 Subsample: Mean abs error: {mae_s3:.4f}, {percent_diff_s3:.2f}% of the original data's standard deviation")

#### Comparing Subsampled DNS Data to Full DNS Data Using 'percent_check' Function

In [None]:
# Checking if subsampled data is within 5% of original
def percent_check(data, column, step, tolerance=0.05):
    # Get original series
    x_original = np.arange(len(data))
    y_original = data[column].values
    
    # Get subsampled series
    x_subsampled = np.arange(0, len(data), step)
    y_subsampled = data[column].iloc[::step].values
    
    # Interpolate subsampled data back to original timestamps
    f_interp = interpolate.interp1d(x_subsampled, y_subsampled, kind='linear', fill_value='extrapolate')
    y_reconstructed = f_interp(x_original)
    
    # Calculate absolute errors
    abs_errors = np.abs(y_original - y_reconstructed)
    
    # Calculate data range (the scale of the data)
    data_range = np.max(y_original) - np.min(y_original)
    print("data_range:", data_range)
    # Normalize errors by data range instead of individual values
    normalized_errors = abs_errors / data_range
    
    # After calculating normalized_errors
    # plt.figure(figsize=(12, 4))
    # plt.plot(normalized_errors*100, alpha=0.5)
    # plt.axhline(tolerance*100, color='r', linestyle='--', label=f'{tolerance*100}% tolerance')
    # plt.ylabel('Normalized Error (%)')
    # plt.xlabel('Data Point')
    # plt.title(f'Reconstruction Error (Step={step})')
    # plt.legend()
    # plt.show()
    
    # Test if within tolerance
    within_tolerance = normalized_errors <= tolerance
    pct_within = (np.sum(within_tolerance) / len(within_tolerance)) * 100
    
    # Overall pass/fail
    passes = pct_within >= 95  # At least 95% of points within tolerance
    
    results = {
        'step': step,
        'tolerance': f'{tolerance*100}%',
        'passes': passes,
        'points_within_tolerance': f'{pct_within: .4f}%',
        'num_original_points': len(y_original),
        'num_subsampled_points': len(y_subsampled),
        'data_range': f'{data_range: .4f}',  
        'max_normalized_error': f'{(np.max(normalized_errors)*100): .4f}%' }
    
    return results

#results_step1 = percent_check(drone_data, 'Amplitude', step1, tolerance=0.05)
#results_step2 = percent_check(drone_data, 'Amplitude', step2, tolerance=0.05)
#results_step3 = percent_check(drone_data, 'Amplitude', step3, tolerance=0.05)
#results_step4 = percent_check(drone_data, 'Amplitude', step4, tolerance=0.05)

#print("results1:", results_step1)
#print("results2:", results_step2)
#print("results3:", results_step3)
#print("results4:", results_step4)
results_step_test = percent_check(drone_data, 'Amplitude', 562, tolerance=0.05)
print("results_test:", results_step_test)

***
### Tapering the Time Series
#### Notes:
- a Hann window/raised cosine was used to taper the DNS time series
- the time series was padded with zeros to create different tapers:
    - no padding
    - padding with half the length of the original time series
    - padding with the full length of the original time series
    - etc.
- comment out certain data before plotting to focus on specific series, or adjust axes limits to zoom in on certain features
- the same methods of comparison that were used to compare subsampled to full data were used to compare the tapered data to the original DNS series
- a goal of tapering: reduce spectral leakage

In [None]:
# Tapering Drone Audio
L = len(drone_data['Amplitude'].values)
data = drone_data['Amplitude'].values
time_s = drone_data['time_secs'].values
# time delta
time_delta = (time_s[1] - time_s[0])

# 1L - original taper
window1 = hann(L)
tapered_data1 = window1 * data
padding1 = 0

# 2L - gentler taper
padding2 = L // 2
data2_padded = np.concatenate([np.zeros(padding2), data, np.zeros(padding2)])
window2 = hann(len(data2_padded))
tapered_data2_full = window2 * data2_padded
time_s2 = np.arange(len(data2_padded)) * time_delta - (padding2 * time_delta)

# 3L - very gentle taper
padding3 = L
data3_padded = np.concatenate([np.zeros(padding3), data, np.zeros(padding3)])
window3 = hann(len(data3_padded))
tapered_data3_full = window3 * data3_padded
time_s3 = np.arange(len(data3_padded)) * time_delta - (padding3 * time_delta)

# Plot
plt.figure(figsize=(12,4))
plt.plot(time_s, drone_data['Amplitude'], label="Full Data (1L)")
plt.plot(time_s, tapered_data1, label="1L Tapered Series")
plt.plot(time_s2, tapered_data2_full, label="2L Tapered Series")
plt.plot(time_s3, tapered_data3_full, label="3L Tapered Series")
plt.title("Tapering Drone Audio Data")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.grid(True, alpha=0.5)
plt.xlim(-0.01, 0.025)
plt.ylim(-0.025, 0.025)
plt.legend()
plt.tight_layout()
plt.show()
# taper subsampled data once you know that things are working individually

#### Comparing Tapered DNS Data to Full DNS Data Using 'percent_check_tapered' Function
- This version of the 'percent_check' function includes MAE as an output in the results

In [None]:
data = drone_data['Amplitude'].values
def percent_check_tapered(original_data, tapered_data_full, padding, tolerance=0.05):
    """Compare tapered time series to original time series
    Parameters:
    original_data : array
        Original unpadded time series
    tapered_data_full : array
        Tapered time series (includes padding)
    padding : int
        Amount of padding added to each end
    tolerance : float
        Acceptable error as fraction of data range"""
    # Extract the original region from tapered data (remove padding)
    if padding > 0:
        tapered_original_region = tapered_data_full[padding:-padding]
    else:
        tapered_original_region = tapered_data_full
    
    # Direct comparison: tapered vs original
    abs_errors = np.abs(original_data - tapered_original_region)
    # normalization
    data_range = np.max(original_data) - np.min(original_data)
    normalized_errors = abs_errors / data_range
    
    within_tolerance = normalized_errors <= tolerance
    pct_within = (np.sum(within_tolerance) / len(within_tolerance))*100
    
    # Mean absolute error (MAE), compare to standard deviaiton of original data
    mae = np.mean(abs_errors)
    std_original = np.std(original_data)
    mae_pct_std = (mae / std_original)*100
    
    passes = pct_within >= 95
    
    results = {
        'padding': padding,
        'tolerance': f'{tolerance*100}%',
        'passes_pointwise': passes, # pointwise?
        'points_within_tolerance': f'{pct_within: .4f}%',
        'max_normalized_error': f'{(np.max(normalized_errors)*100): .4f}%',
        'mean_abs_error': f'{mae: .4f}',
        'mae_as_%_of_std': f'{mae_pct_std: .4f}%',
        'num_points': len(original_data),
        'data_range': f'{data_range: .4f}'}
    
    return results

results_1 = percent_check_tapered(data, tapered_data1, padding1, tolerance=0.05)
results_2 = percent_check_tapered(data, tapered_data2_full, padding2, tolerance=0.05)
results_3 = percent_check_tapered(data, tapered_data3_full, padding3, tolerance=0.05)
print("results1:", results_1)
print("")
print("results2:", results_2)
print("")
print("results3:", results_3)

***
### Combining Subsampling and Tapering
#### Notes:
- Tapering with Hann window and using step=562 based on results from percent_check function for subsampled data
- Taper applied with no padding to prevent discontinuities

In [None]:
sub_step = 562 # data step for subsampling
# Tapering Drone Audio:
L = len(drone_data['Amplitude'][::sub_step])
data = drone_data['Amplitude'][::sub_step].values
time_s = drone_data['time_secs'].values
# time delta
time_delta = (time_s[1] - time_s[0])
# 1L taper
window1 = hann(L)
tapered_sub1 = window1 * data

# Plot
plt.figure(figsize=(12,4))
plt.plot(time_s, drone_data['Amplitude'], label="Full Data")
plt.plot(time_s[::sub_step], tapered_sub1, label="Tapered Data (Step=562)")
plt.title("Tapered and Subsampled Drone Audio Data")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.grid(True, alpha=0.5)
plt.legend()
plt.tight_layout()
plt.show()

***
### Fast Fourier Transform Plots
#### Notes:
- The fast fourier transform was applied to the original detrended, normalized and shifted time series to obtain magnitude and phase spectra
- Only positive frequencies are included in the plots since the input is real and will produce symmetric results
- Also applied the fast fourier transform to subsampled and tapered DNS data to plot the resulting magnitude and phase spectra
- Change x-limits to zoom in on certain areas
<br> Effects of subsampling/tapering:
    - Removes higher frequency content
    - Reduces spectral leakage caused by discontinuities

In [None]:
def apply_hann_window(data):
    # Want to apply Hann window and return windowed data with coherent gain factor
    window = np.hanning(len(data))
    windowed_data = data * window
    # Coherent gain for Hann window is 0.5, so need to multiply by 2 later on to make correction
    coherent_gain = 0.5
    return windowed_data, coherent_gain

def subsample_with_antialiasing(data, decimation_factor, sr):
    """Subsample data using scipy.signal.decimate with anti-aliasing filter.
    data: array
    decimation_factor: factor by which to reduce sampling rate
    sr: Original sampling rate"""
    # scipy.signal.decimate applies an anti-aliasing filter automatically
    # Use a higher order filter for better anti-aliasing (default is 8)
    decimated_data = scipy.signal.decimate(data, decimation_factor, ftype='iir', zero_phase=True)
    # get new sampling rate:
    new_sr = sr / decimation_factor
    return decimated_data, new_sr

def plot_fft2(data, sr, fig1=None, fig2=None, fig3=None, name='', apply_window=True):
    """Plot FFT with windowing correction.
    data: array
    sr: Sampling rate
    apply_window : bool, says whether to apply Hann window (default True)"""
    n = len(data)
    
    # Apply window if requested
    if apply_window:
        windowed_data, coherent_gain = apply_hann_window(data)
    else:
        windowed_data = data
        coherent_gain = 1.0
    
    # Compute FFT
    fft_values = scipy.fft.rfft(windowed_data)
    freqs = scipy.fft.rfftfreq(n, d=1/sr)

    # magnitudes WITHOUT coherent gain correction
    magnitudes = np.abs(fft_values) / n

    # Double the AC components (not DC and Nyquist)
    magnitudes[1:] *= 2.0

    # Undo doubling for Nyquist if even length
    if n % 2 == 0:
        magnitudes[-1] /= 2.0
    
    phases = np.angle(fft_values)
    
    # Create figures:
    if fig1 is None:
        fig1 = plt.figure(figsize=(10, 6))
        plt.plot(freqs, magnitudes, linewidth=1, label=name)
        plt.title('FFT Magnitude Spectrum (Linear)', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.minorticks_on()
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.tight_layout()
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(fig1.number)
        plt.plot(freqs, magnitudes, linewidth=1, label=name)
        if name:
            plt.legend(loc='upper right')
    
    if fig2 is None:
        fig2 = plt.figure(figsize=(10, 6))
        plt.loglog(freqs[1:], magnitudes[1:], linewidth=1, label=name)
        plt.title('FFT Magnitude Spectrum (log)', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.tight_layout()
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(fig2.number)
        plt.loglog(freqs[1:], magnitudes[1:], linewidth=1, label=name)
        #plt.axvline(x=15, label='15 Hz', linestyle=":", color="green", alpha=0.7)
        #plt.axvline(x=30, label='30 Hz', linestyle=":", color="purple", alpha=0.7)
        plt.legend()
        if name:
            plt.legend(loc='upper right')
    
    if fig3 is None:
        fig3 = plt.figure(figsize=(10, 6))
        plt.plot(freqs[1:], phases[1:], linewidth=1, label=name)
        plt.title('Phase Spectrum', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Phase (radians)', fontsize=14)
        plt.grid(True, alpha=0.5)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        #plt.xlim(-1, 50)
        plt.tight_layout()
        if name:
            plt.legend(loc='lower right', framealpha=0.5)
    else:
        plt.figure(fig3.number)
        plt.plot(freqs[1:], phases[1:], linewidth=1, label=name)
        if name:
            plt.legend(loc='lower right', framealpha=0.8)
    
    return fig1, fig2, fig3
 
# y_centered = drone_audio_data
# sr = sampling_rate
decimation_factor = 562  # step for subsampling

# For full data
fig1, fig2, fig3 = plot_fft2(y_centered, sr, name='Full Data', apply_window=False) #apply_window=False for no tapering

# For subsampled data with anti-aliasing
tapered_sub1, new_sr = subsample_with_antialiasing(y_centered, decimation_factor, sr)
plot_fft2(tapered_sub1, new_sr, fig1=fig1, fig2=fig2, fig3=fig3, name='Subsampled/Tapered Data', apply_window=True)

plt.show()

In [None]:
print(f"Original sampling rate: {sr} Hz")
print(f"Original Nyquist: {sr/2} Hz")
print(f"Decimation factor: {decimation_factor}")
print(f"New sampling rate: {new_sr} Hz")
print(f"New Nyquist: {new_sr/2} Hz")

***
### Adding Sine Waves to Full DNS Data
#### Notes:
- two data columns containing sine waves are added to the data frame containing the full DNS data (not tapered or subsampled yet), along with additional columns for the combined sine waves and the DNS data combined with both sine waves
- the first 0.5 second of DNS data is also plotted alongside the individual sine waves before combining

In [None]:
# Add two columns of sine waves to data
def add_sine_waves(data):
    data = data.copy()
    
    data['time_column'] = np.arange(len(data))
    t = data['time_secs'].values
    
    # Define sine wave parameters
    frequencies = {
        'Sine 1': 30, # Frequencies in Hz
        'Sine 2': 15}
    
    amplitudes = {
        'Sine 1': 0.15,
        'Sine 2': 0.15}
    
    phases = {
        'Sine 1': 0,
        'Sine 2': 0}
    
    # Add sine wave columns
    for wave_name, freq in frequencies.items():
        amplitude = amplitudes[wave_name]
        phase = phases[wave_name]
        
        # Calculate sine wave: A * sin(2π * f * t + φ)
        data[wave_name] = amplitude * np.sin(2 * np.pi * freq * t + phase)
        
    # Add combined sine waves column
    sine_columns = list(frequencies.keys())
    data['Combined_Sines'] = data[sine_columns].sum(axis=1)

    # Add sine waves to original data        
    if 'Amplitude' in data.columns:
        data['Data_With_Sines'] = data['Amplitude'] + data['Combined_Sines']

    return data

add_sine_waves(drone_data)
# Combined_Sines = Sine 1 + Sine 2
# Data_With_Sines = Amplitude + Combined_Sines

In [None]:
print(add_sine_waves(drone_data).columns.tolist())

In [None]:
# Plot time series alongside sine waves
data_with_sines = add_sine_waves(drone_data)
data_with_sines['time_seconds'] = data_with_sines['time_column'] / sr # to get time in seconds
plt.figure(figsize=(12,4))
plt.plot(data_with_sines['time_seconds'][:48000//2], data_with_sines['Amplitude'][:48000//2], label='Time Series', zorder=10) # [start:stop:step]
plt.plot(data_with_sines['time_seconds'][:48000//2], data_with_sines['Sine 1'][:48000//2], label='Sine 1')
plt.plot(data_with_sines['time_seconds'][:48000//2], data_with_sines['Sine 2'][:48000//2], label='Sine 2', linestyle=":")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Sine Waves Before Adding to Data (First 0.5 Seconds)")
plt.grid(True, alpha=0.5)
plt.tight_layout()
plt.legend(loc='upper right')
plt.show()

In [None]:
data_with_sines.head()

In [None]:
df_removed = data_with_sines.drop('time_seconds', axis=1) # axis: 1 = columns
df_removed.head()

In [None]:
df_removed.info()

In [None]:
drone_data.head()

***
### Adding Sine Waves to Subsampled/Tapered Time Series
#### Notes:
- using add_sine_waves function from previous cell

In [None]:
# convert to pandas data frame from numpy array
t_sub_df = pd.DataFrame(tapered_sub1)
# need same starting columns to use add_sine_waves
t_sub_df.columns = ['Amplitude']
time_secs = drone_data.index / sr
t_sub_df['time_secs'] = time_secs[::sub_step]
t_sub_df

In [None]:
add_sine_waves(t_sub_df)

In [None]:
sub_t_sines = add_sine_waves(t_sub_df)

# Plot:
# (trying plotly again)
pre_2sines = go.Figure()
pre_2sines.add_trace(go.Scatter(x= list(t_sub_df['time_secs']), y= list(t_sub_df['Amplitude']), mode='lines', 
                                name='Tapered Time Series (step = 562)', zorder=10))
pre_2sines.add_trace(go.Scatter(x= list(t_sub_df['time_secs']), y= list(sub_t_sines['Sine 1']), mode='lines', 
                                name='Sine 1', line=dict(dash='solid')))
pre_2sines.add_trace(go.Scatter(x= list(t_sub_df['time_secs']), y= list(sub_t_sines['Sine 2']), mode='lines', 
                                name='Sine 2', line=dict(dash='solid')))
# Set title
pre_2sines.update_layout(title_text="Subsampled and Tapered Data Before Adding Sine Waves", width=1100, height=600, 
                  xaxis_title='Time (s)', yaxis_title='Amplitude', title_x=0.5, showlegend=True)
# Add range slider
pre_2sines.update_layout(xaxis=dict(rangeslider=dict(visible=True)))
pre_2sines.show()

plt.figure(figsize=(12,6))
plt.plot(t_sub_df['time_secs'][:48000//100], t_sub_df['Amplitude'][:48000//100], label='Tapered Time Series (step=562)', zorder=10)
plt.plot(t_sub_df['time_secs'][:48000//100], sub_t_sines['Sine 1'][:48000//100], label='Sine 1')
plt.plot(t_sub_df['time_secs'][:48000//100], sub_t_sines['Sine 2'][:48000//100], label='Sine 2', linestyle=":")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Sine Waves Before Adding to Data (First 5 Seconds)")
plt.grid(True, alpha=0.5)
plt.tight_layout()
plt.legend()
plt.show()
# use slider to see more than a green blob

***
### Fast Fourier Transform for Full DNS Data with Added Sine Waves
#### Notes:
- magnitude spectrum for the DNS data combined with two sine waves of differing frequencies
- a magnitude spectrum with period on the x-axis instead of frequency is also included
- a data step > 1 can be used to make running the code faster

In [None]:
# FFT plots with sines:
def plot_fft_with_sines(data):
    data = data.values
    n = len(data) # Data points
    fft_values = scipy.fft.rfft(data)
    freqs = scipy.fft.rfftfreq(n, d = 1/sr) # corresponding frequencies in Hz, d is inverse of sampling frequency
    magnitudes = np.abs(fft_values)
    phases = np.angle(fft_values)
    #step= sr//10
    step=1

    # remove 0 frequency component to prevent division by zero error:
    non_zero_mask = freqs > 0
    freqs_plot = freqs[non_zero_mask]
    mags_plot = magnitudes[non_zero_mask]
    periods_plot = 1/freqs_plot
    
    # Frequency spectrum (linear)
    # plt.figure(figsize=(11, 6)) 
    # plt.plot(freqs[::step], magnitudes[::step], 'r-', linewidth = 1)
    # plt.title('FFT Magnitude Spectrum (Linear)', fontsize=14)
    # plt.xlabel('Frequency (Hz)', fontsize=12)
    # plt.ylabel('Magnitude', fontsize=12)
    # plt.grid(True, which='major' ,alpha=0.5)
    # plt.minorticks_on()
    # plt.grid(True, which='minor', alpha=0.5)
    # plt.xticks(fontsize=12)
    # plt.yticks(fontsize=12)
    # plt.tight_layout()
    # plt.show()

    # Frequency spectrum (log)
    fig, ax = plt.subplots(figsize=(11, 6))   
    plt.semilogy(freqs[1::step], magnitudes[1::step], 'g-', linewidth = 1)
    plt.semilogx(freqs[1::step], magnitudes[1::step], 'g-', linewidth = 1)
    plt.title('FFT Magnitude Spectrum (log)', fontsize=14)
    plt.xlabel('Frequency (Hz)', fontsize=12)
    plt.ylabel('Magnitude', fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    #plt.axvline(x=15, color='orange', linestyle='--', label= '15 Hz')
    #plt.axvline(x=30, color='blue', linestyle='--', label= '30 Hz')
    #plt.legend()
    plt.grid(True, which='major', alpha=0.5)
    plt.grid(True, which='minor', alpha=0.5)
    plt.tight_layout()
    plt.show()

    # Alternative: plot against period instead of frequency
    fig, ax = plt.subplots(figsize=(11, 6))
    plt.loglog(periods_plot[::step], mags_plot[::step], 'g-', linewidth=1)
    plt.title('FFT Magnitude Spectrum (log)', fontsize=14)
    plt.xlabel('Period (s)', fontsize=12)
    plt.ylabel('Magnitude', fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    #plt.axvline(x=1/100, color='orange', linestyle='--', label= '0.01 s')
    #plt.axvline(x=1/200, color='blue', linestyle='--', label= '0.005 s')
    #plt.legend()
    plt.grid(True, which='major', alpha=0.5)
    plt.grid(True, which='minor', alpha=0.5)
    ax = plt.gca()
    ax.invert_xaxis()
    plt.tight_layout()
    plt.show()

    # Phase spectrum
    # plt.figure(figsize=(11, 6))
    # plt.plot(freqs[1::step], phases[1::step], 'm-', linewidth = 1)
    # plt.title('Phase Spectrum', fontsize=14)
    # plt.xlabel('Frequency (Hz)', fontsize=12)
    # plt.ylabel('Phase (radians)', fontsize=12)
    # plt.grid(True, alpha=0.5)
    # plt.xticks(fontsize=12)
    # plt.yticks(fontsize=12)
    # plt.tight_layout()
    # plt.show()

    return None
plot_fft_with_sines(df_removed['Data_With_Sines']) 

***
### Fast Fourier Transform for Data with Added Sine Waves
#### Notes:
- Magnitude and phase spectra for the DNS data combined with two sine waves of differing frequencies
- Includes full (blue) and tapered/subsampled (orange) data on the same plot
- Calling plot_fft2 function from a previous cell, including Hann window correction factor and scipy.signal.decimate method for subsampling

In [None]:
# Trying to fix graph display as before:
decimation_factor = 562  # same step for subsampling

# For full data
fig1, fig2, fig3 = plot_fft2(data_with_sines['Data_With_Sines'].values, sr, name='Full Data', apply_window=False)

# For subsampled data with anti-aliasing
tapered_sub1, new_sr = subsample_with_antialiasing(data_with_sines['Data_With_Sines'].values, decimation_factor, sr)
plot_fft2(tapered_sub1, new_sr, fig1=fig1, fig2=fig2, fig3=fig3, name='Subsampled/Tapered Data', apply_window=True)
plt.show()

***
### Continuous Wavelet Transform
#### Notes:
- the Morlet wavelet, given by $\psi(t)=\exp(\frac{-t^2}{2})\cos(5t)$, is used for the wavelet transform
- scalograms are created using the full DNS data and the full DNS data combined with sine waves
- histograms of scalogram magnitudes are also plotted showing the probability density corresponding to certain magnitude ranges in addition to other statistics (i.e. mean, median, mode, etc.)
- a difference map was also created to compare scalograms
- scales have been determined based on the formula $f=\frac{F_c}{a\cdot\Delta t}$, where $f$ is the pseudo-frequency corresponding to the scale, $F_c$ is the approx. center frequency of the wavelet, $a$ is the wavelet scale, and $\Delta t$ is the sampling period of the signal
- additionally, scales are chosen so that the frequency range of the plots do not exceed the Nyquist frequency

### Increasing Scalogram Frequency Resolution
#### Notes:
- **goal**: increase frequency resolution of scalograms
  
    **Consequences of increasing frequency resolution:**
    - increasing the frequency resolution will decrease the time resolution
    - will change appearance of added sine waves
      
    <br>**Possible Methods for increasing frequency resolution:**
    - increasing the number of scales will provide more frequency bins to sample, which will show finer differences between frequencies
    - Adjust spacing to linear instead of logarithmic and focus on a specific frequency range of interest
    - Decrease time resolution
      <br>**Effective Methods:**
      - Increasing the number of scales showed a slight difference compared to the starting scalogram
      - Decreasing time resolution by applying a uniform filter from scipy.ndimage had the greatest effect
          - The filter is set up to only smooth in time and leave frequency unchanged

#### DNS Data Without Added Sine Waves

In [None]:
# Continuous Wavelet Transform (without sine waves added)
step=20 # step added to reduce cell run time
data = drone_data['Amplitude'].values[::step]
data = data.flatten()
print(f"Data shape: {data.shape}") # Check data shape

drone_data['time_column'] = np.arange(len(drone_data))
drone_data['time_seconds'] = drone_data['time_column'] / sr # to get time in seconds
t = drone_data['time_seconds'].values[::step]
dt = t[1] - t[0]  # Time step in seconds
print(f"dt: {dt}")
print(f"Data step: {step}")

# Convert frequencies to scales: scale = ~center_freq / (a * dt)
# center frequency for Morlet wavelet ~1.0
scale_min = 1.0 / (250 * dt)  # Small scale for high freq, use freq_max=250
scale_max = 1.0 / (10 * dt)  # Large scale for low freq, use freq_min=10
scales = np.logspace(np.log10(scale_min), np.log10(scale_max), 60) # increased number of scales
# Wavelet transform:
coefficients, frequencies = pywt.cwt(data, scales, 'morl', sampling_period=dt)
print(f"Coefficients shape: {coefficients.shape}")
print(f"Frequency range: {frequencies.min():.2f} to {frequencies.max():.2f} Hz")

# Smooth only in the TIME direction (axis=1), not frequency (axis=0)
smoothed_coefficients = uniform_filter(np.abs(coefficients), size=(1, 50))

# Plot:
plt.figure(figsize=(14, 6))
plt.pcolormesh(t, frequencies, np.log10(smoothed_coefficients), cmap='plasma', shading='auto', vmin=-5)
plt.colorbar(label='log\u2081\u2080(Magnitude)')
plt.ylabel('Frequency (Hz)')
plt.yscale('log')
plt.xlabel('Time (s)')
plt.title('Drone Audio')
plt.show()

# Try to make histogram:
magnitudes = smoothed_coefficients
magnitude_values = magnitudes.flatten()

fig, ax = plt.subplots(figsize=(12, 4))
plt.hist(magnitude_values, bins=50, density = True, facecolor='skyblue', edgecolor='grey')
plt.title("Histogram of Scalogram Magnitudes")
plt.xlabel("Magnitude")
plt.ylabel("Probability Density")
plt.yscale('log')
plt.grid(axis='y', alpha=0.5)
plt.axvline(x=np.mean(magnitude_values), color='r', linestyle='--', label=f'Mean: {np.mean(magnitude_values): .2e}')
plt.axvline(x=np.median(magnitude_values), color='orange', linestyle='--', label=f'Median: {np.median(magnitude_values): .2e}')
# get mode:
hist_counts, bin_edges = np.histogram(magnitude_values, bins=50)
mode_bin_index = np.argmax(hist_counts)
# Mode is the center of the bin with the highest count:
mode_value = (bin_edges[mode_bin_index] + bin_edges[mode_bin_index + 1]) / 2
plt.axvline(x=mode_value, color='b', linestyle='--', label=f'Mode: {mode_value: .2e}')
plt.legend()
plt.show()

print(ss.describe(magnitude_values))
std_dev = np.std(magnitude_values)
print(f"Standard Deviation: {std_dev:.4f}")

#### DNS Data With Added Sine Waves

In [None]:
# Continuous Wavelet Transform (with sine waves added)
step=20 # step added to reduce cell run time
data = data_with_sines['Data_With_Sines'].values[::step]
data = data.flatten()  # Need data to be 1D
print(f"Data shape: {data.shape}") # Check data shape
t = data_with_sines['time_seconds'].values[::step]
dt = t[1] - t[0]  # Time step in seconds
print(f"Data step: {step}")

# Convert frequencies to scales: scale = ~center_freq / (a * dt)
scales=np.logspace(np.log10(1.0/(250*dt)), np.log10(1.0/(10*dt)), 60)
coefficients1, frequencies = pywt.cwt(data, scales, 'morl', sampling_period=dt)

print(f"Scale range: {scale_min:.6f} to {scale_max:.6f}")
print(f"Frequency range: {frequencies.min():.2f} to {frequencies.max():.2f} Hz")

# Smooth only in the TIME direction (axis=1), not frequency (axis=0)
coefficients_smoothed = uniform_filter(np.abs(coefficients1), size=(1, 50))

# Plot:
plt.figure(figsize=(14, 6))
plt.pcolormesh(t, frequencies, np.abs(coefficients_smoothed), cmap='plasma', shading='auto', vmax=2.0) 
plt.colorbar(label='Magnitude')
plt.ylabel('Frequency (Hz)')
plt.yscale('log')
plt.xlabel('Time (s)')
plt.title('Drone Audio Scalogram With Sines Added')
plt.show()

# Make histogram:
magnitudes = np.abs(coefficients_smoothed)
magnitude_values = magnitudes.flatten()

fig, ax = plt.subplots(figsize=(12, 4))
plt.hist(magnitude_values, bins=50, density = True, facecolor='skyblue', edgecolor='grey')
plt.title("Histogram of Scalogram Magnitudes")
plt.xlabel("Magnitude")
plt.ylabel("Probability Density")
plt.yscale('log')
plt.grid(axis='y', alpha=0.5)
plt.axvline(x=np.mean(magnitude_values), color='r', linestyle='--', label=f'Mean: {np.mean(magnitude_values): .2e}')
plt.axvline(x=np.median(magnitude_values), color='orange', linestyle='--', label=f'Median: {np.median(magnitude_values): .2e}')
# get mode:
hist_counts, bin_edges = np.histogram(magnitude_values, bins=50)
mode_bin_index = np.argmax(hist_counts)
# Mode is the center of the bin with the highest count:
mode_value = (bin_edges[mode_bin_index] + bin_edges[mode_bin_index + 1]) / 2
plt.axvline(x=mode_value, color='b', linestyle='--', label=f'Mode: {mode_value: .2e}')
plt.legend()
plt.show()

print(ss.describe(magnitude_values))
std_dev = np.std(magnitude_values)
print(f"Standard Deviation: {std_dev:.4f}")

In [None]:
# Difference map for full data plots
# difference map for sub/tap with(out) sine waves
data_original = drone_data['Amplitude'].values[::20].flatten() # WITHOUT sine waves
data_sines = data_with_sines['Data_With_Sines'].values[::20].flatten() # WITH sine waves
# time:
t = data_with_sines['time_seconds'].values[::20]
dt = t[1] - t[0]  # Time step in seconds
print(f"dt: {dt}")

scales = np.logspace(np.log10(1.0/(250*dt)), np.log10(1.0/(10*dt)), 60)
# Verify that this gives you the right frequency range:
test_freqs = pywt.scale2frequency('morl', scales) / dt # used to fix display problem
print(f"Expected frequency range: {test_freqs.min():.2f} to {test_freqs.max():.2f} Hz")
# CWT
sines_coeffs, sines_freqs = pywt.cwt(data_sines, scales, 'morl', sampling_period=delta_t)
coeffs_orig, freqs_orig = pywt.cwt(data_original, scales, 'morl', sampling_period=delta_t)
# Smooth only in the TIME direction (axis=1), not frequency (axis=0)
coefficients_smoothed = uniform_filter(np.abs(sines_coeffs), size=(1, 50))
smoothed_coefficients = uniform_filter(np.abs(coeffs_orig), size=(1, 50))

# Plot:
fig, axes = plt.subplots(1, 3, figsize=(12, 3))
# Scalogram 1 - use log scale like original
im1 = axes[0].pcolormesh(t, test_freqs, np.log10(smoothed_coefficients), cmap='plasma', shading='auto', vmin=-5)
axes[0].set_title('Scalogram 1')
axes[0].set_xlabel('Time (s)')
axes[0].set_ylabel('Frequency (Hz)')
axes[0].set_yscale('log')  # Use log scale for frequency axis
plt.colorbar(im1, ax=axes[0], label='log\u2081\u2080(Magnitude)')

# Scalogram 2 - linear scale
im2 = axes[1].pcolormesh(t, test_freqs, np.abs(coefficients_smoothed), cmap='plasma', shading='auto', vmax=2.0)
axes[1].set_title('Scalogram 2')
axes[1].set_xlabel('Time (s)')
axes[1].set_ylabel('Frequency (Hz)')
axes[1].axhline(y=15, linestyle="--", color='white')
axes[1].axhline(y=30, linestyle="--", color='cyan')
axes[1].set_yscale('log')
plt.colorbar(im2, ax=axes[1], label='Magnitude')

# Normalize each scalogram
coeff_norm = (np.abs(smoothed_coefficients) - np.abs(smoothed_coefficients).min()) / (np.abs(smoothed_coefficients).max() - np.abs(smoothed_coefficients).min())
coeff1_norm = (np.abs(coefficients_smoothed) - np.abs(coefficients_smoothed).min()) / (np.abs(coefficients_smoothed).max() - np.abs(coefficients_smoothed).min())

difference = coeff_norm - coeff1_norm
# Difference map
max_diff = np.max(np.abs(difference))
im3 = axes[2].pcolormesh(t, test_freqs, difference, cmap='RdBu', vmin=-max_diff, vmax=max_diff, shading='auto')
axes[2].set_title('Difference (1 - 2)')
axes[2].set_xlabel('Time (s)')
axes[2].set_ylabel('Frequency (Hz)')
axes[2].set_yscale('log')
plt.colorbar(im3, ax=axes[2], label='Difference')

plt.tight_layout()
plt.show()

In [None]:
print(f"Frequency range: {freqs_orig.min():.2f} to {freqs_orig.max():.2f} Hz")
print(f"Number of frequency bins: {len(freqs_orig)}")
print(f"Frequencies near 15 Hz: {freqs_orig[(freqs_orig > 14) & (freqs_orig < 16)]}")
print(f"Frequencies near 30 Hz: {freqs_orig[(freqs_orig > 29) & (freqs_orig < 31)]}")

#### Subsampled and Tapered DNS Data Without Added Sine Waves

##### Add corrections for Hann window coherent gain and subsample with anti-aliasing filter

In [None]:
def apply_hann_window(data):
    # Want to apply Hann window and return windowed data with coherent gain factor
    window = np.hanning(len(data))
    windowed_data = data * window
    # Coherent gain for Hann window is 0.5, so need to multiply by 2 to make correction
    coherent_gain = 0.5
    return windowed_data/coherent_gain

def subsample_with_antialiasing(data, decimation_factor, sr):
    """Subsample data using scipy.signal.decimate with anti-aliasing filter.
    data: array
    decimation_factor: factor by which to reduce sampling rate
    sr: Original sampling rate"""
    # scipy.signal.decimate applies an anti-aliasing filter automatically
    # Use a higher order filter for better anti-aliasing (default is 8)
    decimated_data = scipy.signal.decimate(data, decimation_factor, ftype='iir', zero_phase=True)
    # get new sampling rate:
    new_sr = sr / decimation_factor
    return decimated_data

hann_windowed_data = apply_hann_window(drone_data['Amplitude'].values)
hann_windowed_sines = apply_hann_window(data_with_sines['Data_With_Sines'].values)
subsample_with_antialiasing(hann_windowed_data, 562, sr)
subsample_with_antialiasing(hann_windowed_sines, 562, sr)

In [None]:
# subsampled/tapered without sine waves:
#sub_tap_data = sub_t_sines['Amplitude'].values.flatten()
sub_tap_data = subsample_with_antialiasing(hann_windowed_data, 562, sr)
# time:
time_in_sec = sub_t_sines['time_secs'].values
subsampling_interval = 562
delta_t = time_in_sec[1] - time_in_sec[0]  # Time step in seconds
print("delta_t:", delta_t)

freq_max = 50 # below Nyquist limit of ~42
scale_min = 1.0/(freq_max*delta_t)
scales=np.logspace(np.log10(scale_min), np.log10(1.0/(5*delta_t)), 60)
# CWT
subt_coeffs, subt_freqs = pywt.cwt(sub_tap_data, scales, 'morl', sampling_period=delta_t)
print(f"Coefficients shape: {subt_coeffs.shape}")
print(f"Frequency range: {subt_freqs.min():.2f} to {subt_freqs.max():.2f} Hz")

subt_smoothed_coefficients = uniform_filter(np.abs(subt_coeffs), size=(1, 50))

# Plot:
plt.figure(figsize=(14, 6))
plt.pcolormesh(time_in_sec, subt_freqs, np.log10(np.abs(subt_smoothed_coefficients)), cmap='plasma', shading='auto', vmin=-5)
plt.colorbar(label='log\u2081\u2080(Magnitude)')
plt.ylabel('Frequency (Hz)')
plt.yscale('log')
plt.yticks(minor=True)
plt.xlabel('Time (s)')
plt.title('Subsampled and Tapered Drone Audio')
plt.show()

# Make histogram:
magnitudes = np.abs(subt_smoothed_coefficients)
magnitude_values = magnitudes.flatten()

fig, ax = plt.subplots(figsize=(12, 4))
plt.hist(magnitude_values, bins=50, density = True, facecolor='skyblue', edgecolor='grey')
plt.title("Histogram of Scalogram Magnitudes")
plt.xlabel("Magnitude")
plt.ylabel("Probability Density")
plt.yscale('log')
plt.grid(axis='y', alpha=0.5)
plt.axvline(x=np.mean(magnitude_values), color='r', linestyle='--', label=f'Mean: {np.mean(magnitude_values): .2e}')
plt.axvline(x=np.median(magnitude_values), color='orange', linestyle='--', label=f'Median: {np.median(magnitude_values): .2e}')
# get mode:
hist_counts, bin_edges = np.histogram(magnitude_values, bins=50)
mode_bin_index = np.argmax(hist_counts)
# Mode is the center of the bin with the highest count:
mode_value = (bin_edges[mode_bin_index] + bin_edges[mode_bin_index + 1]) / 2
plt.axvline(x=mode_value, color='b', linestyle='--', label=f'Mode: {mode_value: .2e}')
plt.legend()
plt.show()

print(ss.describe(magnitude_values))
std_dev = np.std(magnitude_values)
print(f"Standard Deviation: {std_dev:.4f}")

#### Subsampled and Tapered DNS Data With Added Sine Waves

In [None]:
# subsampled/tapered WITH sine waves:
#sub_tap_with_sines = sub_t_sines['Data_With_Sines'].values.flatten()
sub_tap_with_sines = subsample_with_antialiasing(hann_windowed_sines, 562, sr)

# time:
time_in_sec = sub_t_sines['time_secs'].values
subsampling_interval = 562
delta_t = time_in_sec[1] - time_in_sec[0]  # Time step in seconds
print("delta_t:", delta_t)

freq_max = 50 # to get freq range below Nyquist limit of ~42
scale_min = 1.0/(freq_max*delta_t)
scales=np.logspace(np.log10(scale_min), np.log10(1.0/(5*delta_t)), 60)
# CWT
subt_sines_coeffs, subt_sines_freqs = pywt.cwt(sub_tap_with_sines, scales, 'morl', sampling_period=delta_t)
print(f"Coefficients shape: {subt_sines_coeffs.shape}")
print(f"Frequency range: {subt_sines_freqs.min():.2f} to {subt_sines_freqs.max():.2f} Hz")

tsub_smoothed_coefficients = uniform_filter(np.abs(subt_sines_coeffs), size=(1, 50))

# Plot:
plt.figure(figsize=(14, 6))
plt.pcolormesh(time_in_sec, subt_sines_freqs, np.abs(tsub_smoothed_coefficients), cmap='plasma', shading='auto')
plt.colorbar(label='Magnitude')
plt.ylabel('Frequency (Hz)')
plt.yscale('log')
plt.xlabel('Time (s)')
plt.title('Subsampled and Tapered Drone Audio With Added Sines')
plt.show()

# Try to make histogram:
magnitudes = np.abs(tsub_smoothed_coefficients)
magnitude_values = magnitudes.flatten()

fig, ax = plt.subplots(figsize=(12, 4))
plt.hist(magnitude_values, bins=50, density = True, facecolor='skyblue', edgecolor='grey')
plt.title("Histogram of Scalogram Magnitudes")
plt.xlabel("Magnitude")
plt.ylabel("Probability Density")
plt.yscale('log')
plt.grid(axis='y', alpha=0.5)
plt.axvline(x=np.mean(magnitude_values), color='r', linestyle='--', label=f'Mean: {np.mean(magnitude_values): .2e}')
plt.axvline(x=np.median(magnitude_values), color='orange', linestyle='--', label=f'Median: {np.median(magnitude_values): .2e}')
# get mode:
hist_counts, bin_edges = np.histogram(magnitude_values, bins=50)
mode_bin_index = np.argmax(hist_counts)
# Mode is the center of the bin with the highest count:
mode_value = (bin_edges[mode_bin_index] + bin_edges[mode_bin_index + 1]) / 2
plt.axvline(x=mode_value, color='b', linestyle='--', label=f'Mode: {mode_value: .2e}')
plt.legend()
plt.show()

print(ss.describe(magnitude_values))
std_dev = np.std(magnitude_values)
print(f"Standard Deviation: {std_dev:.4f}")

In [None]:
# difference maps again
# difference map for sub/tap with(out) sine waves
subt_original = subsample_with_antialiasing(hann_windowed_data, 562, sr) # WITHOUT sine waves
subt_with_sines = subsample_with_antialiasing(hann_windowed_sines, 562, sr) # WITH sine waves
# time:
time_in_sec = sub_t_sines['time_secs'].values
subsampling_interval = 562
delta_t = time_in_sec[1] - time_in_sec[0]  # Time step in seconds


freq_max = 50 # to get freq range below Nyquist limit of ~42
scale_min = 1.0/(freq_max*delta_t)
scales=np.logspace(np.log10(scale_min), np.log10(1.0/(5*delta_t)), 60) # increase number of scales

# CWT
sub_t_sines_coeffs, sub_t_sines_freqs = pywt.cwt(subt_with_sines, scales, 'morl', sampling_period=delta_t)
coefficients_smoothed = uniform_filter(np.abs(sub_t_sines_coeffs), size=(1, 50))
sub_t_coeffs, sub_t_freqs = pywt.cwt(subt_original, scales, 'morl', sampling_period=delta_t)
# Smooth only in the TIME direction (axis=1), not frequency (axis=0)
smoothed_coefficients = uniform_filter(np.abs(sub_t_coeffs), size=(1, 50))

# Plot:
fig, axes = plt.subplots(1, 3, figsize=(12, 3))
# Scalogram 1 - use log scale like original
im1 = axes[0].pcolormesh(time_in_sec, sub_t_freqs, np.log10(smoothed_coefficients), cmap='plasma', shading='auto', vmin=-5)
axes[0].set_title('Scalogram 1')
axes[0].set_xlabel('Time (s)')
axes[0].set_ylabel('Frequency (Hz)')
axes[0].set_yscale('log')  # Use log scale for frequency axis
plt.colorbar(im1, ax=axes[0], label='log\u2081\u2080(Magnitude)')

# Scalogram 2 - linear scale
im2 = axes[1].pcolormesh(time_in_sec, sub_t_sines_freqs, np.abs(coefficients_smoothed), cmap='plasma', shading='auto')
axes[1].set_title('Scalogram 2')
axes[1].set_xlabel('Time (s)')
axes[1].set_ylabel('Frequency (Hz)')
axes[1].set_yscale('log')
plt.colorbar(im2, ax=axes[1], label='Magnitude')

# Normalize each scalogram
coeff_norm = (np.abs(smoothed_coefficients) - np.abs(smoothed_coefficients).min()) / (np.abs(smoothed_coefficients).max() - np.abs(smoothed_coefficients).min())
coeff1_norm = (np.abs(coefficients_smoothed) - np.abs(coefficients_smoothed).min()) / (np.abs(coefficients_smoothed).max() - np.abs(coefficients_smoothed).min())

difference = coeff_norm - coeff1_norm
# Difference map
max_diff = np.max(np.abs(difference))
im3 = axes[2].pcolormesh(time_in_sec, sub_t_freqs, difference, cmap='RdBu', vmin=-max_diff, vmax=max_diff, shading='auto')
axes[2].set_title('Difference (1 - 2)')
axes[2].set_xlabel('Time (s)')
axes[2].set_ylabel('Frequency (Hz)')
axes[2].set_yscale('log')
plt.colorbar(im3, ax=axes[2], label='Difference')

plt.tight_layout()
plt.show()