## Note by the author

<span style="font-size:1.3em;">
This code is written by: Donglin Wu
<br>
If you found any errors, please contact: donglin.wu@yale.edu
<br>

This is a preliminary version of the code, so there are places you need to input and modify throughout the code.</span>

<span style="font-size:1.3em;">
First, you need to input the directory and name of the fits cube, the directory and name of the output files, center of the desired images (usually taken to be somewhere near the continuum emission), the distance of the source, and other source-related figures.<br>


Second, since sometimes fits cube might have degenerate axes (such as CARMA-NRO data), you might need to modify the WCS axes and any part of the code that has <font color="blue"><em>cube_data.shape</em></font>.<br>

Third, the most important function is <font color="red">fit_gaussians</font>. It has many input parameters. Each of the parameters is important for the algorithm to operate correctly, and is described in details. Please read the descriptions carefully to select the parameters.<br>

Fourth, for the velocity maps and velocity gradients, the important input is the <font color="blue">region</font> over which the gradients are calculated/plotted/averaged.<br>

Fifth, the intensity-weighted velocity map is only one of the ways to generate velocity maps. It is selected because it can alleviate the consequences caused by overfitting (especially for data with high velocity resolution such as ALMA). There can be other ways to find the velocity maps. I provided some alternative functions but they are not the only ones.</span>

## Import fits cube

In [None]:
from astropy import constants as const
from astropy import units as u
import math
from astropy.io import fits
from astropy.wcs import WCS
# from astropy.utils.data import get_pkg_data_filename
import matplotlib.pyplot as plt
from matplotlib import colors

import numpy as np

from astropy.coordinates import SkyCoord  
from astropy.coordinates import FK5  
# from photutils.aperture import SkyEllipticalAperture, SkyRectangularAperture
# from photutils.aperture import aperture_photometry


In [None]:
## [INPUT] required in this block
## Input the directory and name of the fits cube

filename = '' #[Input]

hdul = fits.open(filename)
hdul.info()

hdu = hdul[0]
cube_header = hdu.header
cube_data = hdu.data

## May require modification: change to the velocity axis and space axes correspondingly
wcs = WCS(cube_header)
wcs2d = wcs[0,:,:] 
wcsv = wcs[:,0,0]

In [None]:
## [INPUT] required in this block
## Input the coordinate of the center (of the conitnuum)

center = SkyCoord("5:35:22.2008948674 -6:13:06.1608008323", frame=FK5, unit=(u.hourangle, u.deg))  #[Input]
center_pixel = wcs2d.world_to_pixel(center)
print(center, center_pixel)

In [None]:
## May require modification: cube_data.shape[0] should be changed to the velocity axis
v_world = np.array([wcsv.pixel_to_world(int(i)).value/1000 for i in range(cube_data.shape[0])])

In [None]:
xcenter = int(center_pixel[0])
ycenter = int(center_pixel[1])

In [None]:
## May require modification: change to the image axes correspondingly
bmaj, bmin = cube_header['BMAJ'], cube_header['BMIN'] # beam major and minor axis
dx, dy = cube_header['CDELT1'], cube_header['CDELT2'] # pixel size
print(bmaj, bmin, dx, dy)
print(bmaj/dx, bmin/dy)

In [None]:
## [INPUT] required in this block
## Input the distance of the source

d_source = 386 # 386.0 for HOPS 198


deg_to_pc = d_source*math.pi/180
deg_to_cm = u.parsec.to(u.cm)*d_source*math.pi/180

abs(dx)*deg_to_pc, abs(dy)*deg_to_pc

## Fit multiple gaussians to spectrum of each pixel

In [None]:
# Mask of the image: select pixels with non-nan values
mask_image = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]))
mask_image[np.isnan(cube_data[50,:,:]) == False] = True
mask_image[np.isnan(cube_data[50,:,:]) == True] = False

In [None]:
plt.imshow(mask_image, origin='lower')

In [None]:
# def noise_from_spectrum(spectrum, default_noise):
#     noise = []
#     for i in range (len(spectrum)):
#         if i<default_noise[0] or i>default_noise[1]:
#             if math.isnan(spectrum[i]) == False:
#                 noise.append(spectrum[i])
#     return noise

In [None]:
import numpy as np
from scipy.optimize import curve_fit
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
from scipy.signal import find_peaks

# Define a function for a single Gaussian
def gaussian(x, amp, mean, stddev):
    return amp * np.exp(-((x - mean) ** 2) / (2 * stddev ** 2))

# Define a function for multiple Gaussians
# Parameters should be given in list: [amp1, mean1, stddev1, amp2, mean2, stddev2, ...]
def multiple_gaussians(x, *params):
    n_gaussians = len(params) // 3
    y = np.zeros_like(x)
    for i in range(n_gaussians):
        amp = params[i * 3]
        mean = params[i * 3 + 1]
        stddev = params[i * 3 + 2]
        y += gaussian(x, amp, mean, stddev)
    return y

# Improved initial guess by finding peaks
def initial_guess_from_peaks(x, y, n_components, distance):
    # This function finds a certain number of peaks from a spectrum, where the number of peaks is specified as input: n_components
    # distance: int, minimum separation between peaks
    peaks, _ = find_peaks(y, distance=distance)
    sorted_peaks = sorted(peaks, key=lambda p: y[p], reverse=True)[:n_components]
    initial_params = []
    for peak in sorted_peaks:
        std1 = np.sqrt(-pow(x[peak-1]-x[peak], 2)/(2*np.log(y[peak-1]/y[peak])))
        std2 = np.sqrt(-pow(x[peak+1]-x[peak], 2)/(2*np.log(y[peak+1]/y[peak])))
        initial_params.extend([
            y[peak],            # amplitude
            x[peak],            # mean
            (std1+std2)/2       # stddev, a rough guess
        ])
    return initial_params

# Calculate criteria (AIC, BIC, AICc, DIC)
def calculate_criterion(gmm, x, y, criterion='aic'):
    X = np.array(list(zip(x, y)))
    n = len(x)
    k = gmm.n_components * 3 - 1  # Number of parameters in the model
    
    if criterion == 'aic':
        return gmm.aic(X)
    elif criterion == 'bic':
        return gmm.bic(X)
    elif criterion == 'aicc':
        aic = gmm.aic(X)
        aicc = aic + (2 * k * (k + 1)) / (n - k - 1)
        return aicc
    elif criterion == 'dic':
        deviance = -2 * gmm.score(X)
        dic = deviance + 2 * k
        return dic
    else:
        raise ValueError("Criterion must be 'aic', 'bic', 'aicc', or 'dic'.")
    
    

In [None]:
# Fit multiple Gaussian models and select the best one based on the chosen criterion
def fit_gaussians(x, y, max_components=3, mean_range=(-np.inf, np.inf), noise_region=[0,-1], amp_threshold=1000,distance=3, criterion='aic', n_init=10):
    # Input:
        # x: 1d array
        #   the velocity or frequency axis for the spectrum
        #
        # y: 1d array
        #   the spectrum
        #
        # max_components: int
        #   maximum number of components fitted to the spectrum
        #
        # mean_range: a tuple or list
        #   the allowed range for the means of the components to be;
        #   given as (min_mean, max_mean), which correspond to the minimum allowed value and maximum allowed value for the mean of the components;
        #   if unspecified, assume no constraint on the means of the components
        #
        # noise_region: a list or tuple
        #   the indices that specify the region of spectrum that is used to find the rms value to constrain how small the amplitude of the components can be;
        #   given as a list or a tuple [index_min, index_max], this means y[:index_min] and y[index_max:] only consist of noise
        #   if unspecified, assume no constraint on the amplitude of the components
        #
        # amp_threshold: float
        #   the maximum value allowed fro the amplitude of the components
        #   should be selected according to the level of the spectrum
        #
        # distance: int
        #   minimum separation allowed for the peaks, which should be adjusted according to velocity/frequency resolution of the data;
        #   a small distance of 1 to 3 should be set for data with low velocity resolution; a larger distance such as 5 or 8 should be set otherwise
        #
        # criterion: string, chosen from ['aic', 'bic', 'aicc', 'dic']
        #   the information criterion used for determining the most suitable number of components fitted to the spectrum;
        #   for spectrum with few effective data points (those that are above 3 rms), aicc should be used; otherwise, aic should be used
        # 
        # n_init: int
        #   number of initializations for the gaussian mixture to determine the most suitable number of components fitted to the spectrum;
        #   strongly affects the time taken to complete the algorithm
        #   usually 1 or 10 is enough

    # Initiation
    best_criterion = np.inf
    best_gmm = None

    # Find the noise level using the input noise_region
    rms = np.nanstd(np.concatenate((y[:noise_region[0]], y[noise_region[1]:])))

    # Find the region in the spectrum that is above 3 rms
    mask_y = np.copy(y)
    mask_y[mask_y < 3*rms] = np.nan
    index_notnan = [i for i in range(len(mask_y)) if np.isnan(mask_y[i]) == False]
    index_notnan_min, index_notnan_max = min(index_notnan), max(index_notnan)+1

    # Find the most suitable number of components using Gaussian mixture and selected information criterion
    for n_components in range(1, max_components + 1):

        # Make the initial centers of the gaussian mixtures at the peaks of the spectrum
        # If you feel like the function initial_guess_from_peaks does not find the suitable peaks, please comment the following four lines,
        #   and delete means_init=mean_init from the sixth line below
        mean_init = []
        param_array = np.array(initial_guess_from_peaks(x,y,n_components, distance=distance))
        for i in range(len(param_array)//3):
            mean_init.append([param_array[i*3+1], param_array[i*3]])

        gmm = GaussianMixture(n_components=n_components, covariance_type='diag', init_params='k-means++', means_init=mean_init, n_init=n_init)
        X = np.array(list(zip(x[index_notnan_min:index_notnan_max], y[index_notnan_min:index_notnan_max])))
        gmm.fit(X)

        current_criterion = calculate_criterion(gmm, x[index_notnan_min:index_notnan_max], y[index_notnan_min:index_notnan_max], criterion)

        if current_criterion < best_criterion:
            best_criterion = current_criterion
            best_gmm = gmm


    # Use the peaks found from the function initial_guess_from_peaks for initial guesses of the fit
    if best_gmm.n_components <= max_components:
        initial_params = initial_guess_from_peaks(x, y, best_gmm.n_components, distance=distance)
    else:
        initial_params = []
        for i in range(best_gmm.n_components):
            initial_params.extend([
                best_gmm.weights_[i] * y.max(),  # amplitude
                best_gmm.means_[i, 0],           # mean
                np.sqrt(best_gmm.covariances_[i, 0])  # stddev
            ])

    # Ensure initial params are within bounds
    lower_bounds = []
    upper_bounds = []
    for i in range(len(initial_params) // 3):
        lower_bounds.extend([0, mean_range[0], 1e-6])
        upper_bounds.extend([amp_threshold, mean_range[1], np.inf])

    initial_params = np.array(initial_params)
    initial_params[np.isnan(initial_params) == True] = 0.5

    initial_params = np.minimum(np.maximum(initial_params, lower_bounds), upper_bounds)  # Ensure p0 is within bounds


    # Curve fitting with constraints
    params, _ = curve_fit(multiple_gaussians, x, y, p0=initial_params, bounds=(lower_bounds, upper_bounds), maxfev=10000, nan_policy='omit')

    filtered_params = []
    for i in range(best_gmm.n_components):
        amp = params[i * 3]
        if amp > rms*3:
            filtered_params.extend(params[i * 3:i * 3 + 3])

    return len(filtered_params) // 3, filtered_params

    # Output: int and a list
        # len(filtered_params) // 3: int
        #   number of components best fitted for this spectrum
        #
        # filtered_params: a list
        #   parameters for function multiple_gaussians
        #   should be in the format: [amp1, mean1, stddev1, amp2, mean2, stddev2, ...]




In [None]:
# Example usage with one pixel in the fits cube

spectrum = cube_data[:,184,179]
n_components, params = fit_gaussians(v_world, spectrum, max_components=4, noise_region=[25,130], mean_range=(3.5, 12.5),criterion='aic', n_init=10)

print(f"Number of Gaussian components: {n_components}")
print("Parameters (amplitude, mean, stddev) for each Gaussian:")
for i in range(n_components):
    print(f"Gaussian {i + 1}: Amplitude={params[i * 3]}, Mean={params[i * 3 + 1]}, Stddev={params[i * 3 + 2]}")


# Plot the data and the fitted curve
# plt.scatter(v_carma, spectrum, label='Data', s=10)
plt.plot(v_world, spectrum)
plt.plot(v_world, multiple_gaussians(v_world, *params), label='Fitted', color='red')
plt.xlabel(r'$v_{\text{LSR}}$ [km s$^{-1}$]')
plt.ylabel(r'Intensity [Jy/beam]')
plt.legend()
plt.show()

In [None]:
# Remove warnings (since there will be a lot)
import warnings
warnings.filterwarnings("ignore", message="divide by zero encountered in divide")
warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide")
warnings.filterwarnings("ignore", message="invalid value encountered in scalar multiply")
warnings.filterwarnings("ignore", message="invalid value encountered in cast")
warnings.filterwarnings("ignore", message="Degrees of freedom <= 0 for slice")
warnings.filterwarnings("ignore", message="invalid value encountered in log")

In [None]:
## [INPUT] required in this block
## Input the parameters of fit_gaussians

data_ncomponents = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='int')
data_A = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='object')
data_mu = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='object')
data_sigma = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='object')
# data_error = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]))


for xchan in range(cube_header['NAXIS1']):
    for ychan in range(cube_header['NAXIS2']):
        if mask_image.astype(int)[ychan,xchan] == 1:
            try:
                spectrum = cube_data[:, ychan, xchan]
                n_components, params = fit_gaussians(v_world, spectrum, max_components=3, noise_region=[25,130], mean_range=(3.5, 12.5),criterion='aic', n_init=10)
                data_ncomponents[ychan, xchan] = n_components
                data_A[ychan, xchan], data_mu[ychan, xchan], data_sigma[ychan, xchan] = [], [], []
                for i in range(n_components):
                    #print(f"Gaussian {i + 1}: Amplitude={params[i * 3]}, Mean={params[i * 3 + 1]}, Stddev={params[i * 3 + 2]}")
                    data_A[ychan, xchan].append(params[i * 3])
                    data_mu[ychan, xchan].append(params[i * 3 + 1])
                    data_sigma[ychan, xchan].append(params[i * 3 + 2])
            except: 
                data_A[ychan, xchan], data_mu[ychan, xchan], data_sigma[ychan, xchan] = [np.nan], [np.nan], [np.nan]
        if xchan % 50 == 0 and ychan % 50 == 0:
            print(xchan, ychan)

data_mu[data_mu == 0] = np.nan

In [None]:
import pickle

## [INPUT] required in this block
## Input the directory and name of the output files

dir_output = ''

# Save parameters of the components in pickle
output = open(dir_output+'/ALMA_13CO_data_mu.pkl', 'wb')
pickle.dump(data_mu, output)
output.close()

output = open(dir_output+'/ALMA_13CO_data_A.pkl', 'wb')
pickle.dump(data_A, output)
output.close()

output = open(dir_output+'/ALMA_13CO_data_sigma.pkl', 'wb')
pickle.dump(data_sigma, output)
output.close()

# Save number of components for each pixel in fits file
from astropy.io import fits

hdu_output = fits.PrimaryHDU()
hdu_output.data = data_ncomponents

for i in range(len(list(hdu.header.keys()))):
    key = list(hdu.header.keys())[i]
    if '3' not in key and '4' not in key and 'COMMENT' not in key and 'HISTORY' not in key and key !='':
        # print(key)
        hdu_output.header.update({key:hdu.header[key]})

hdu_output.header.update({'NAXIS':2})
hdu_output.header['BUNIT'] = ''
hdu_output.writeto(dir_output+'/ALMA_13CO_n_component.fits', overwrite=True)

## Find the velocity maps and velocity gradients

### Velocity maps

In [None]:
from scipy.interpolate import interp1d

# Find the component with minimum velocity in a certain velocity range
def func_first_component_min(mu_ls, range_v):
    indices = [i for i in range(len(mu_ls)) if range_v[0] <= mu_ls[i] <= range_v[1]]
    if len(indices) > 1:
        mu_ls_range = [mu_ls[index_i] for index_i in indices]
        index_first = indices[np.argmin(mu_ls_range)]
        return mu_ls[index_first], index_first
    else:
        return mu_ls[indices[0]], indices[0]

# Find the component with maximum velocity in a certain velocity range
def func_first_component_max(mu_ls, range_v):
    indices = [i for i in range(len(mu_ls)) if range_v[0] <= mu_ls[i] <= range_v[1]]
    if len(indices) > 1:
        mu_ls_range = [mu_ls[index_i] for index_i in indices]
        index_first = indices[np.argmax(mu_ls_range)]
        return mu_ls[index_first], index_first
    else:
        return mu_ls[indices[0]], indices[0]
    
# Find the component with maximum intensity in a certain velocity range
def func_brightest_component(mu_ls, v_world, spectrum, range_v):
    indices = [i for i in range(len(mu_ls)) if range_v[0] <= mu_ls[i] <= range_v[1]]
    mu_ls_range = [mu_ls[index_i] for index_i in indices]
    spec_interp = interp1d(v_world, spectrum)
    A_ls_range = [spec_interp(mu_ls[index_i]) for index_i in indices]
    index_brightest = indices[np.argmax(A_ls_range)]
    return mu_ls_range[index_brightest], index_brightest


<span style="font-size:1.2em;"> The following velocity map is the intensity-weighted velocity map. </span>

In [None]:
## [INPUT] required in this block
## Input mu_low and mu_high, which specify the range of the components we want to analyze

mu_low, mu_high = 5, 6

# mu_ave_peak: the intensity-weighted velocity 
# A_ave_peak: the average of the amplitude of the components
# sigma_ave_peak: the sum of the velocity dispersions of the components

mu_ave_peak = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')
A_ave_peak = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')
sigma_ave_peak = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')

# difference_mu_peaks: difference in velocity of the components (higher minus lower)
# difference_A_peaks: difference in intensity of the components (the one with higher velocity minus the one with lower velocity)
# difference_sigma_peaks: difference in velocity of the components (the one with higher velocity minus the one with lower velocity)
difference_mu_peaks = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')
difference_A_peaks = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')
difference_sigma_peaks = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')



for xchan in range(cube_header['NAXIS1']):
    for ychan in range(cube_header['NAXIS2']):
        try:
            if len(data_mu[ychan, xchan]) > 0:
                mu_ls, A_ls, sigma_ls = [], [], []
                for j in range(len(data_mu[ychan, xchan])):
                    if mu_low < data_mu[ychan, xchan][j] < mu_high: #and data_A[ychan, xchan][j] > 0.3:
                        mu_ls.append(data_mu[ychan, xchan][j])
                        A_ls.append(data_A[ychan, xchan][j])
                        sigma_ls.append(data_sigma[ychan, xchan][j])
                # mu_ave_peak[ychan, xchan] = np.nanmean(np.array(mu_ls))
                mu_ave_peak[ychan, xchan] = np.nansum(np.array(mu_ls)*np.array(A_ls))/np.nansum(np.array(A_ls))
                A_ave_peak[ychan, xchan] = np.nanmean(np.array(A_ls))
                sigma_ave_peak[ychan, xchan] = np.nansum(np.array(sigma_ls))
                if len(mu_ls) >= 2:
                    difference_mu_peaks[ychan, xchan] = max(mu_ls) - min(mu_ls)
                    difference_A_peaks[ychan, xchan] = A_ls[np.argmax(np.array(mu_ls))] - A_ls[np.argmin(np.array(mu_ls))]
                    difference_sigma_peaks[ychan, xchan] = sigma_ls[np.argmax(np.array(mu_ls))] - sigma_ls[np.argmin(np.array(mu_ls))]
                
        except:
            continue

In [None]:
mu_ave_peak[mu_ave_peak == 0] = np.nan
plt.subplot(projection=wcs2d)
plt.imshow(mu_ave_peak, origin='lower',vmin=mu_low,vmax=mu_high,cmap='coolwarm')
plt.colorbar(label=r'$v_{\text{LSR, 1}}$ [km/s]')
plt.xlabel('RA')
plt.ylabel('DEC')
plt.xlim(90,448-90)
plt.ylim(90,448-90)

In [None]:
A_ave_peak[A_ave_peak == 0] = np.nan
plt.subplot(projection=wcs2d)
plt.imshow(A_ave_peak, origin='lower',vmax=0.5)
plt.colorbar(label=r'$A_{1}$')
plt.xlabel('RA')
plt.ylabel('DEC')
plt.xlim(90,448-90)
plt.ylim(90,448-90)

In [None]:
sigma_ave_peak[sigma_ave_peak == 0] = np.nan
plt.subplot(projection=wcs2d)
plt.imshow(sigma_ave_peak, origin='lower',vmax=1)
plt.colorbar(label=r'$\sigma_{\text{LSR, 1}}$')
plt.xlabel('RA')
plt.ylabel('DEC')
plt.xlim(90,448-90)
plt.ylim(90,448-90)

In [None]:
## [INPUT] required in this block
## Input the directory and name of the output files

from astropy.io import fits

hdu_output = fits.PrimaryHDU()

for i in range(len(list(hdu.header.keys()))):
    key = list(hdu.header.keys())[i]
    if '3' not in key and '4' not in key and 'COMMENT' not in key and 'HISTORY' not in key and key != '':
        # print(key)
        hdu_output.header.update({key:hdu.header[key]})
hdu_output.header.update({'NAXIS':2})

hdu_output.data = mu_ave_peak
hdu_output.header['BUNIT'] = 'km/s'
hdu_output.writeto(dir_output+'/ALMA_13CO_velocity_intensity_weighted_peak.fits', overwrite=True)

In [None]:
## [INPUT] required in this block
## Input the directory and name of the output files

from astropy.io import fits

hdu_output = fits.PrimaryHDU()


for i in range(len(list(hdu.header.keys()))):
    key = list(hdu.header.keys())[i]
    if '3' not in key and '4' not in key and 'COMMENT' not in key and 'HISTORY' not in key and key != '':
        # print(key)
        hdu_output.header.update({key:hdu.header[key]})
hdu_output.header.update({'NAXIS':2})

hdu_output.data = A_ave_peak
hdu_output.header['BUNIT'] = 'K'
hdu_output.writeto(dir_output+'/ALMA_13CO_amplitude_intensity_weighted_peak.fits', overwrite=True)

In [None]:
## [INPUT] required in this block
## Input the directory and name of the output files

from astropy.io import fits

hdu_output = fits.PrimaryHDU()
hdu_output.data = sigma_ave_peak

for i in range(len(list(hdu.header.keys()))):
    key = list(hdu.header.keys())[i]
    if '3' not in key and '4' not in key and 'COMMENT' not in key and 'HISTORY' not in key and key != '':
        # print(key)
        hdu_output.header.update({key:hdu.header[key]})

hdu_output.header.update({'NAXIS':2})
hdu_output.header['BUNIT'] = 'km/s'
hdu_output.writeto(dir_output+'/ALMA_13CO_sigma_sum_intensity_weighted_peak.fits', overwrite=True)

### Velocity gradients

In [None]:
from scipy.optimize import leastsq

# A plane defined by: z = ax + by +c; output: z
def plane(x, y, params):
    a, b, c = params
    return a * x + b * y + c

# Define the difference between a point and the point on the plane with same x and y coordinates
def error(params, x, y, z):
    return plane(x, y, params) - z

# Find the plane that best fits the data
def fit_plane(data, x_unit=1, y_unit=1):
    # data: a 2d array, data to fit the plane
    # x_unit: length in x of every pixel
    # y_unit: length in y of every pixel

    # Get the shape of the input data
    rows, cols = data.shape
    
    # Generate x, y coordinates
    x, y = np.meshgrid(np.arange(cols)*x_unit, np.arange(rows)*y_unit)
    
    # Flatten the arrays and filter out NaN values
    x_flat = x.flatten()
    y_flat = y.flatten()
    z_flat = data.flatten()
    
    # Mask for valid (non-NaN) values
    mask = ~np.isnan(z_flat)
    
    x_valid = x_flat[mask]
    y_valid = y_flat[mask]
    z_valid = z_flat[mask]
    
    # Initial guess for the parameters a, b, c
    initial_guess = [0, 0, np.nanmean(data)]
    
    # Perform least squares fitting
    params, _ = leastsq(error, initial_guess, args=(x_valid, y_valid, z_valid))
    
    return params, x, y


In [None]:
# Find the position angle and size of the velocity gradients for plane z = ax + by +c

def position_angle(a, b):
    # Input: a, b are the parameters of the plane z = ax + by +c
    # Output: float, unit in degrees
    #   range: -90 to 270, with x-axis being 0 and angle counted in the anticlockwise direction
    if -a >= 0:
        return 180*np.arctan(b/a)/np.pi
    else:
        return 180+180*np.arctan(b/a)/np.pi
    
def slope_plane(a, b):
    return np.sqrt(pow(a,2)+pow(b,2))

In [None]:

# Get every nth element in a list
def get_every_nth(myList, n, default=None):
    return np.array([myList[i] for i in range(0, len(myList), n)] or default)

# Get every nth element in both x and y directions in a 2d array
def get_every_nth_2d(arr, n):
    return arr[::n, ::n]

# Find the x and y components of an arrow given its amplitude and angle
def polar_to_cartesian(amplitude, angle):
    # amplitude: amplitude of the arrow
    # angle: arrow of the angle from x-axis (in radians)
    x = amplitude * np.cos(angle)
    y = amplitude * np.sin(angle)
    return x, y

# Find the amplitude and angle of an arrow given its x and y components
def cartesian_to_polar(x, y):
    amplitude = np.sqrt(x**2 + y**2)
    angle = np.arctan2(y, x)
    return amplitude, angle

# Averaging the arrows overing a region of m by m pixels
def average_vectors(amplitude_array, angle_array, m):
    # amplitude_array: 2d n by n square array, each element corresponds to amplitude of the arrow at that pixel
    # angle_array: 2d n by n square array, each element corresponds to angle of the arrow at that pixel, in radians
    # m: the length of the square regions over which the arrows are averaged
    # Note: n needs to be divisible by m

    n = amplitude_array.shape[0]
    assert n % m == 0, "n must be divisible by m"
    
    # Output size
    out_size = n // m
    
    # Initialize the output arrays
    avg_amplitude = np.zeros((out_size, out_size))
    avg_angle = np.zeros((out_size, out_size))
    
    for i in range(out_size):
        for j in range(out_size):
            # Extract the m x m block
            amp_block = amplitude_array[i*m:(i+1)*m, j*m:(j+1)*m]
            angle_block = angle_array[i*m:(i+1)*m, j*m:(j+1)*m]
            
            # Convert to Cartesian coordinates
            x_block, y_block = polar_to_cartesian(amp_block, angle_block)
            
            # Average the Cartesian coordinates
            avg_x = np.mean(x_block)
            avg_y = np.mean(y_block)
            
            # Convert back to polar coordinates
            avg_amp, avg_ang = cartesian_to_polar(avg_x, avg_y)
            
            # Store in the output arrays
            avg_amplitude[i, j] = avg_amp
            avg_angle[i, j] = avg_ang
    
    return avg_amplitude, avg_angle

# Example usage:
n = 6
m = 2
amplitude_array = np.random.rand(n, n)
angle_array = np.random.rand(n, n) * 2 * np.pi

X, Y = np.meshgrid(np.arange(0,n), np.arange(0,n))
plt.quiver(X, Y, np.cos(angle_array)*amplitude_array, np.sin(angle_array)*amplitude_array)

X_new, Y_new = np.meshgrid(np.arange(0,n,m), np.arange(0,n,m))
avg_amplitude, avg_angle = average_vectors(amplitude_array, angle_array, m)
plt.quiver(X_new, Y_new, np.cos(avg_amplitude)*avg_amplitude, np.sin(avg_amplitude)*avg_amplitude, color='blue')

print("Averaged Amplitude:\n", avg_amplitude)
print("Averaged Angle:\n", avg_angle)


In [None]:
## [INPUT] required in this block
## Input the half length of the square region to fit the plane,
##   a plane is fitted to a square with length of 2*halfsize_plane pixels
## Note: usually halfsize_plane should be larger than the ratio of the beam major axis and the length of each pixel for results to make sense

halfsize_plane = 8

slope_ave = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')
position_angle_ave = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')
v_plane_ave = np.zeros(shape=(cube_data.shape[1],cube_data.shape[2]),dtype='float')


for xchan in range(0+halfsize_plane, 448-halfsize_plane):
    for ychan in range(0+halfsize_plane, 448-halfsize_plane):
        data = mu_ave_peak[ychan-halfsize_plane:ychan+halfsize_plane,xchan-halfsize_plane:xchan+halfsize_plane]
        if len([1 for x in data.reshape(len(data)*len(data[0])) if math.isnan(x) == False]) > pow(halfsize_plane, 2)/2:
            params, x, y = fit_plane(data, x_unit=abs(dx)*deg_to_pc, y_unit=abs(dy)*deg_to_pc)
            a, b, c = params

            # Calculate the overall slope (magnitude of the gradient)
            slope_ave[ychan, xchan] = np.sqrt(a**2 + b**2)
            position_angle_ave[ychan, xchan] = position_angle(a, b)
            v_plane_ave[ychan, xchan] = c
        else:
            slope_ave[ychan, xchan] = np.nan
            position_angle_ave[ychan, xchan] = np.nan   
            v_plane_ave[ychan, xchan] = np.nan           

In [None]:
## [INPUT] required in this block
## Input the half length of region plotted: a square with length of 2*halfsize_plot is plotted
## Input the half length of region averaged: the arrows are averaged for a square region with length sep_pixel
## Note: 2*halfsize_plot needs to be divisible by sep_pixel

halfsize_plot = 100
sep_pixel = 8

# Create a grid of data
X, Y = np.meshgrid(np.arange(xcenter-halfsize_plot+round(sep_pixel/2), xcenter+halfsize_plot+round(sep_pixel/2), sep_pixel), np.arange(ycenter-halfsize_plot+round(sep_pixel/2), ycenter+halfsize_plot+round(sep_pixel/2), sep_pixel))

pa_init = (position_angle_ave[ycenter-halfsize_plot:ycenter+halfsize_plot, xcenter-halfsize_plot:xcenter+halfsize_plot])/180*np.pi
c_init = slope_ave[ycenter-halfsize_plot:ycenter+halfsize_plot, xcenter-halfsize_plot:xcenter+halfsize_plot]

avg_amplitude, avg_angle = average_vectors(c_init, pa_init, sep_pixel)
v_plot = get_every_nth_2d(mu_ave_peak[ycenter-halfsize_plot+round(sep_pixel/2):ycenter+halfsize_plot+round(sep_pixel/2), xcenter-halfsize_plot+round(sep_pixel/2):xcenter+halfsize_plot+round(sep_pixel/2)], sep_pixel)



# Calculate the magnitude and direction of the arrows
# Positive: pointing from higher velocity to lower velocity
print('shape of objects (have to be equal):', X.shape, Y.shape, avg_amplitude.shape, avg_angle.shape, v_plot.shape)

# Plot the arrows
fig, ax = plt.subplots(figsize=(10,10), subplot_kw={'projection':wcs2d})
im = ax.quiver(X, Y, np.cos(avg_angle)*np.sqrt(avg_amplitude), np.sin(avg_angle)*np.sqrt(avg_amplitude), v_plot, norm=colors.Normalize(vmin=mu_low,vmax=mu_high),cmap='coolwarm', headaxislength=2, headlength=2)
ax.set_aspect('equal')
cbar = fig.colorbar(im, fraction=0.03)
cbar.set_label(label=r'$v_{\text{LSR}}$ [km s$^{-1}$]',size=10)

# Set the axes limits
plt.xlim(xcenter - halfsize_plot,xcenter + halfsize_plot)
plt.ylim(ycenter - halfsize_plot,ycenter + halfsize_plot)

plt.xlabel('RA', size=10)
plt.ylabel('DEC', size=10)

# Show the plot
plt.show()