In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from dask.distributed import Client, LocalCluster
client = Client(n_workers=1,
                threads_per_worker=6,
                memory_limit='10GB')
client

In [None]:
import copy
import sys
import xarray as xr
import numpy as np
import dask.array as da
import time
import os

import dask

import matplotlib.pyplot as plt
import hvplot.xarray
import holoviews as hv
import scipy.constants
import scipy

sys.path.append("../..")
import processing_dask as pr
import plot_dask

sys.path.append("../../../preprocessing/")
from generate_chirp import generate_chirp

In [None]:
import matplotlib
matplotlib.rcParams.update({
        'font.size': 16,
        'legend.fontsize': 10,
        'lines.linewidth': 2,
        'text.usetex': False
    })

In [None]:
#prefix = "/media/thomas/Extreme SSD/orca_paper_data_files/phase_noise/b205/20240222_203345"

prefix = "/home/thomas/Documents/StanfordGrad/RadioGlaciology/drone/radar_data/orca_paper_data_files/phase_noise/b205/20240222_203345"
#prefix = "/media/thomas/Extreme SSD/orca_paper_data_files/phase_noise/b205/heat_experiment/20240301_003904" # heat experiment
#prefix = "/media/thomas/Extreme SSD/orca_paper_data_files/phase_noise/b205/20240305_193939" # replication of original
#prefix = "/media/thomas/Extreme SSD/orca_paper_data_files/phase_noise/b205/20240306_155551" # 10 dB higher TX power, 10 dB lower RX gain
#prefix = "/media/thomas/Extreme SSD/orca_paper_data_files/phase_noise/b205/20240306_183951" # 30 dB attenuator switched to RX side
#prefix = "/media/thomas/Extreme SSD/orca_paper_data_files/phase_noise/b205/20240306_192829" # 30 dB attenuator switched back to TX side (back to orig config)
#prefix = "/media/thomas/Extreme SSD/orca_paper_data_files/phase_noise/b205/20240306_210308" # fiber


zero_sample_idx = 159
sig_speed = scipy.constants.speed_of_light * (2/3)

zarr_base_location="/home/thomas/Documents/StanfordGrad/RadioGlaciology/test_tmp_zarr_cache/"
zarr_path = pr.save_radar_data_to_zarr(prefix, zarr_base_location=zarr_base_location, skip_if_cached=True)

zarr_path

In [None]:
raw = xr.open_zarr(zarr_path)
#raw = raw[{'pulse_idx': slice(0, 10000)}]
if 'NOTES' in raw.attrs['config']:
    print("=== Notes from config file: ===")
    print(raw.attrs['config']['NOTES'])

In [None]:
chirp_ts, chirp = generate_chirp(raw.config)

compressed = pr.pulse_compress(raw, chirp,
                               fs=raw.config['GENERATE']['sample_rate'],
                               zero_sample_idx=zero_sample_idx,
                               signal_speed=sig_speed)

In [None]:
compressed_zarr_path = os.path.join(zarr_base_location, raw.basename+"_pulsecompressed.zarr")

# COMMENT THIS OUT IF THE FILE IS ALREADY GENERATED
#print("Generating and writing pulse compressed data to: ", compressed_zarr_path)
#compressed.to_zarr(compressed_zarr_path)

### Compressed data

Comrpessed data is now saved to disk and we can load it from there. Optionally
use some of these plots to verify the peak index.

In [None]:
# Now re-open "compressed" but directly from the zarr file
compressed = xr.open_dataset(compressed_zarr_path, chunks={"pulse_idx": 1000})

In [None]:
compressed_pwr = xr.apply_ufunc(lambda x: np.abs(x)**2, compressed["radar_data"], dask="parallelized", output_dtypes=[np.float32])

In [None]:
# Relatively fast approach, but only suitable if the approximate peak is known and we're just verifying
# Plot every millionth pulse compressed data and zoom in around the expected peak index
fig, ax = plt.subplots()
for i in range(1, 7):
    ax.plot(compressed_pwr[{'pulse_idx': (i*1000000)-1}], label=f"pulse_idx={i*1000000-1}")
ax.set_xlim(185, 190)
#ax.set_xlim(225, 232)
#ax.set_xlim(155, 160)
ax.grid()
ax.legend()

### More complete (but very slow) way to check peak index
The cells below will find the peak around an approximate distance in every pulse compressed chirp. This is slow.

In [None]:
# expected_reflector_distance_1way = 50 # m
# reflector_peak_tol_bins = 2 # bins (on each side)
# noise_start_distance_1way = 1000 # m

In [None]:
# expected_peak_idx = (np.abs(compressed.reflection_distance - expected_reflector_distance_1way)).argmin().compute().item()

# peak_idxs = compressed["radar_data"].reduce(
#     lambda x, axis: (np.abs((x[:, expected_peak_idx-reflector_peak_tol_bins:expected_peak_idx+reflector_peak_tol_bins]))
#                         ).argmax(axis=axis) + expected_peak_idx-reflector_peak_tol_bins, dim='travel_time')

In [None]:
# Somehow figure out the peak_idx (either take median of peak_idxs or guess and verify with the millionth sample plot)
peak_idx = 187
#peak_idx = 229
#peak_idx = 159

### Extract and save just the peak from each chirp

In [None]:

compressed_single_peak = xr.apply_ufunc(
    lambda x: x[peak_idx],
    compressed["radar_data"],
    input_core_dims=[['travel_time']], # The dimension operated over -- aka "don't vectorize over this"
    output_core_dims=[[]], # The output dimensions of the lambda function itself
    exclude_dims=set(("travel_time",)), # Dimensions to not vectorize over
    vectorize=True, # Vectorize other dimensions using a call to np.vectorize
    dask="parallelized", # Allow dask to chunk and parallelize the computation
    output_dtypes=[np.complex64], # Needed for dask: explicitly provide the output dtype
)

In [None]:
compressed_single_peak_zarr_path = compressed_zarr_path.replace(".zarr", f"_single_peak{peak_idx}.zarr")
print(compressed_single_peak_zarr_path)

# COMMENT THIS OUT IF THE FILE IS ALREADY GENERATED
compressed_single_peak.chunk('auto').to_zarr(compressed_single_peak_zarr_path)

In [None]:
# Now re-open compressed_single_peak from the zarr file

compressed_single_peak = xr.open_zarr(compressed_single_peak_zarr_path)["radar_data"]

# USE ONLY FOR PHASE CORRECTION TEST
#compressed_single_peak = compressed_single_peak_corrected

### Compute signal statistics

In [None]:
ts = np.logspace(np.log10(250e-6), np.log10(2.5*60), 10)
actual_stack_t = np.nan * np.zeros_like(ts)
actual_stack_n = np.zeros_like(ts, dtype=int)

# Statistics to compute
stack_signal_peak_pwr_mean = np.nan * np.zeros_like(ts)
stack_signal_peak_pwr_variance = np.nan * np.zeros_like(ts)
stack_signal_peak_phase = np.nan * np.zeros_like(ts)
stack_signal_peak_phase_variance = np.nan * np.zeros_like(ts)

In [None]:
for t_idx, t in enumerate(ts):
    if not np.isnan(stack_signal_peak_phase_variance[t_idx]):
        continue # Skip if already computed (in case of interruption and restart)
    
    timestamp = time.time() # Track computation time 

    actual_stack_n[t_idx] = max(1, int(t / raw.attrs['config']['CHIRP']['pulse_rep_int']))
    actual_stack_t[t_idx] = actual_stack_n[t_idx] * raw.attrs['config']['CHIRP']['pulse_rep_int'] # TODO: Account for errors?
    print(f"[{t_idx+1}/{len(ts)}] \tt={actual_stack_t[t_idx]} \tn_stack={actual_stack_n[t_idx]}")
    
    with dask.config.set(**{'array.slicing.split_large_chunks': False}):

        if actual_stack_n[t_idx] == 1:
            # No need to stack, just compute the statistics
            stacked = compressed_single_peak
        else:
            n_stacks_expected = compressed_single_peak.pulse_idx.size // actual_stack_n[t_idx]
            stacked = pr.stack(compressed_single_peak, actual_stack_n[t_idx])
    
        peak_phases_numpy = np.angle(stacked)
        peak_pwr = (np.abs(stacked))**2

        stack_signal_peak_pwr_mean[t_idx] = peak_pwr.mean().compute().item()
        stack_signal_peak_pwr_variance[t_idx] = peak_pwr.var().compute().item()
        stack_signal_peak_phase[t_idx] = peak_phases_numpy.mean()
        stack_signal_peak_phase_variance[t_idx] = peak_phases_numpy.var()
        
    
    print(f"Completed in {time.time() - timestamp} seconds")

In [None]:
# Save summarized results to a file
from datetime import datetime
import pickle

filename = f"outputs/{raw.basename}_stacking_stats_{datetime.now().strftime('%Y%m%d_%H%M%S')}_bin{peak_idx}.pickle"

with open(filename, 'wb') as f:
    pickle.dump({
        "basename": raw.basename,
        "actual_stack_t": actual_stack_t,
        "actual_stack_n": actual_stack_n,
        "stack_signal_peak_pwr_mean": stack_signal_peak_pwr_mean,
        "stack_signal_peak_pwr_variance": stack_signal_peak_pwr_variance,
        "stack_signal_peak_phase": stack_signal_peak_phase,
        "stack_signal_peak_phase_variance": stack_signal_peak_phase_variance
    }, f)


In [None]:
# Plot results

with open(filename, 'rb') as f:
    data = pickle.load(f)

fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True)
ax_pwr, ax_ph = axs

ax_pwr.plot(data["actual_stack_t"], data["stack_signal_peak_pwr_mean"])
# Add a shaded region for the variance
ax_pwr.fill_between(data["actual_stack_t"], data["stack_signal_peak_pwr_mean"] - np.sqrt(data["stack_signal_peak_pwr_variance"]),
                    data["stack_signal_peak_pwr_mean"] + np.sqrt(data["stack_signal_peak_pwr_variance"]), alpha=0.4)
ax_pwr.set_title(f"Signal Power [{data['basename']}]")
ax_pwr.loglog()

ph = np.degrees(data["stack_signal_peak_phase"])
ph_var = np.degrees(data["stack_signal_peak_phase_variance"])

# ax_ph.plot(data["actual_stack_t"], ph)
# # Add a shaded region for the variance
# ax_ph.fill_between(data["actual_stack_t"], ph - np.sqrt(ph_var),
#                     ph + np.sqrt(ph_var), alpha=0.4)

ax_ph.plot(data["actual_stack_t"], (data["stack_signal_peak_phase_variance"]))
ax_ph.set_title("Variance of Signal Phase")
ax_ph.loglog()
ax_ph.set_xlabel("Wall Clock Integration Time [s]")

for ax in axs:
    ax.grid(True)

plot_filename = filename.replace(".pickle", "_plot.png")
fig.savefig(plot_filename)

plt.show()

### Linear regression on single-pulse phases

In [None]:
phases = (np.angle(compressed_single_peak))

In [None]:
phase_var_deg_theory = (1/10**(20/10)) * (180/np.pi)**2

phase_var_deg_emp = np.var(np.degrees(phases))
print(f"Empirical phase variance: {phase_var_deg_emp} deg")
print(f"Theoretical phase variance: {phase_var_deg_theory} deg")

In [None]:
fig, ax = plt.subplots(figsize=(6,4))

ax.scatter(compressed_single_peak.slow_time/60, np.degrees(phases), s=0.01, alpha=0.4)

# Add linear regression line
from scipy.stats import linregress

slope, intercept, r_value, p_value, std_err = linregress(compressed_single_peak.slow_time, np.degrees(phases))
ax.plot(compressed_single_peak.slow_time/60, slope*compressed_single_peak.slow_time + intercept, color='C3', linestyle='--', label='Linear Regression Fit', linewidth=1)
print(f"Linear regression fit: slope={slope} degrees/second, r_value={r_value}, p_value={p_value}, std_err={std_err}")
ax.text(0.05, 0.93, f"Linear regression slope: {slope:.6f} degrees/second", transform=ax.transAxes, fontsize=10)

ax.annotate(text='', xy=(5,np.mean(np.degrees(phases))-(phase_var_deg_emp/2)), xytext=(5,np.mean(np.degrees(phases))+(phase_var_deg_emp/2)), arrowprops=dict(arrowstyle='<->', color='C1', linewidth=2), color='C1')
ax.annotate(text=f'Var$(\phi)={phase_var_deg_emp:.2f}$ degrees', xy=(5.5, -130), color='C1')

ax.set_xlabel('Slow Time [minutes]')
ax.set_ylabel('Phase [degrees]')
#ax.set_title(f"{raw.basename}")
ax.legend(loc='lower right')

fig.tight_layout()
fig.savefig(plot_filename.replace(".png", "_phase_linreg.png"), dpi=300)
plt.show()

### Phase correction

Use this to first estimate a phase correction term, then apply the phase correction,
then re-run the last section of cells with `compressed_single_peak_corrected` instead of `compressed_single_peak`

In [None]:
phase_correction = np.exp(-1j*np.radians(slope * compressed_single_peak.slow_time))

In [None]:
compressed_single_peak_corrected = compressed_single_peak * phase_correction