In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy.ndimage import zoom
from scipy.interpolate import interp1d
import datetime as datetime 
import psutil

In [2]:
# Function to interpolate and normalize the data
def interpolate_and_normalize(data, norm_factor, scale_factor):
    data = (data * norm_factor) * scale_factor  # Apply normalization
    data = zoom(data, ((inter_lat/hemisphere_lat), (inter_long/max_long)), order=1)  # Interpolate over latitude and longitude
    return data

In [3]:
def get_latitude_indices(lat_ind, lat_size):
    
    #Open the latitude table and clean it up
    lat_df = pd.read_csv('latitude_table.txt', sep='\s+', header=0)
    lat_df = lat_df.dropna(axis=1)
    lat_df = lat_df.drop(lat_df.columns[0], axis=1)
    lat_df.columns = ['rad', 'colat']
    lat_df['lat'] = lat_df['colat'] - 90 # Convert colatitude to latitude

    # Interpolate the latitude values to match the desired resolution
    # whilst ensuring the spacing is non-uniform per the REPPU grid
    latitudes = lat_df['lat'].iloc[0:lat_ind].values # Get the first lat_ind values
    indices = np.linspace(0, len(latitudes) - 1, lat_size) # Create the indices for the interpolation
    interp_func = interp1d(np.arange(len(latitudes)), latitudes, kind='linear') # Create the interpolation function
    interpolated_latitudes = np.flip(interp_func(indices)) # Interpolate the latitudes and flip them to be in ascending order

    return interpolated_latitudes

  lat_df = pd.read_csv('latitude_table.txt', sep='\s+', header=0)


In [4]:
# PARAMETERS (Change as needed)
folder_path_template = '/run/media/sachin/0fa21ddb-f70c-4238-9cf4-705e0360f1c1/NICT DUMP/idata{}/idata{}/'
inter_lat, inter_long = 40, 140 #Desired lon/mlt size in deg, desired lat size in deg #140 = 10 mins of MLT
hemisphere_lat = 63  #Desired lat size in index (check latitude_table.txt)
hemi = 'SH'  # Hemisphere to run: 'NH' or 'SH'
resolution = '2' #set to 2 mins, options: 1, 2, 5

# CONSTANTS (Change with caution)
max_long, max_lat = 321, 221  # Maximum longitude and latitude per the raw files
latitudes = get_latitude_indices(hemisphere_lat, inter_lat) #intepolate latitude maintainig correct spacing per latitude_table.txt
fac_norm = 3.75e-6  # A/m^2 for FAC
pot_norm = 1.017e7  # V for potential
cond_norm = 15  # S/m for conductivity

# Create a dictionary to map the features to the respective names
#and their respective units
feature_map = {
    'pot': 'Potential [kV]',
    'fac': 'Field-aligned current [uA/m^2]',
    'sxx': 'Conductivity (xx) [S/m]',
    'syy': 'Conductivity (yy) [S/m]',
    'sxy': 'Conductivity (xy) [S/m]'
}

# Create a dictionary to map the years to the respective months
# If we get more data in the future, we can add more months to the respective years

year_month_map = {
    '2020': ['08', '09', '10', '11', '12'],
    '2021': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
    '2022': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '12'],
    '2023': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
    '2024': ['01', '02', '03', '04', '05', '06', '07']
}


# Loop through the years and respective months
for feature in feature_map.keys(): #take keys only 
    print(f'Processing feature: {feature_map[feature]}')
    for year, months in year_month_map.items():
        for month in months:

            folder_path = folder_path_template.format(year, year + month)

            date_str = folder_path[-7:-1] #extract date for saving file name
            feature_list = []  
            dt_list = []

            start = datetime.datetime.now() #track time (for testing)
            process = psutil.Process(os.getpid())  # Get current process for memory tracking (for testing)

            for file_name in os.listdir(folder_path):

                # Extract datetime from filename (last 12 characters hold the datetime)
                dt_str = file_name[-12:]
                date_obj = pd.to_datetime(dt_str, format='%Y%m%d%H%M')

                with open(os.path.join(folder_path, file_name), 'r') as f:

                    #We are not interested in the first 5 values, so we overwrite them to save memory
                    _ = np.fromfile(f, dtype=np.int32, count=1)[0]  # Count
                    _ = np.fromfile(f, dtype=np.float32, count=1)[0]  # Time
                    _ = np.fromfile(f, dtype=np.float32, count=max_long*max_lat).reshape((max_lat, max_long))  # vx
                    _ = np.fromfile(f, dtype=np.float32, count=max_long*max_lat).reshape((max_lat, max_long))  # vy
                    _ = np.fromfile(f, dtype=np.float32, count=max_long*max_lat).reshape((max_lat, max_long))  # vz

                    #potential [V], field aligned current [uA/m^2], conductivity (xx, yy, xy) [S/m]
                    pot = np.fromfile(f, dtype=np.float32, count=max_long*max_lat).reshape((max_lat, max_long))
                    fac = np.fromfile(f, dtype=np.float32, count=max_long*max_lat).reshape((max_lat, max_long))
                    sxx = np.fromfile(f, dtype=np.float32, count=max_long*max_lat).reshape((max_lat, max_long))
                    syy = np.fromfile(f, dtype=np.float32, count=max_long*max_lat).reshape((max_lat, max_long))
                    sxy = np.fromfile(f, dtype=np.float32, count=max_long*max_lat).reshape((max_lat, max_long))

                    # Select hemisphere
                    if hemi == 'NH':
                        s_ind, e_ind = (max_lat - 1) - hemisphere_lat, max_lat - 1
                    else:
                        s_ind, e_ind = 0, hemisphere_lat

                    # Extract the feature data
                    feature_data = {
                        'pot': pot[s_ind:e_ind, :],
                        'fac': fac[s_ind:e_ind, :],
                        'sxx': sxx[s_ind:e_ind, :],
                        'syy': syy[s_ind:e_ind, :],
                        'sxy': sxy[s_ind:e_ind, :]
                    }[feature] # Select the feature

                    # Apply normalization and interpolation based on the feature
                    if feature == 'pot':
                        feature_data = interpolate_and_normalize(feature_data, pot_norm, scale_factor=1e-3)  # Convert V to kV
                    elif feature == 'fac':
                        feature_data = interpolate_and_normalize(feature_data, fac_norm, scale_factor=1e6) # Convert A/m^2 to uA/m^2
                    else:
                        feature_data = interpolate_and_normalize(feature_data, cond_norm, scale_factor=1) # No conversion for conductivity

                    #Append feature data and date to lists
                    feature_list.append(feature_data)
                    dt_list.append(date_obj)

                    #for testing
                    #set to between 1(1 file) and 1000(1000 files)
                    #if len(feature_list) == 1440:
                    #    break
                    
            # Create xarray dataset
            ds = xr.Dataset(
                {
                    feature: (['dt', 'lat', 'lon'], feature_list),  # Ensure feature_list is in shape (time, lat, lon)
                },
                coords={
                    'dt': dt_list,  # Datetime list
                    'lat': latitudes,  # Latitude indices or values
                    'lon': np.linspace(0, 360, inter_long) # Longitude values
                }
            )

            ds = ds.resample(dt=f'{resolution}min').mean()  # Resample to n minute intervals
            ds = ds.interpolate_na(dim='dt')  # Interpolate any missing values

            # Track time and memory
            end = datetime.datetime.now()
            total_time = end - start
            final_mem_usage = process.memory_info().rss / (1024 ** 3)  # Convert to GB
            print(f'Processed: {year}-{month}, Time taken: {total_time}, Memory used: {final_mem_usage:.2f} GB')

            # Save the dataset
            ds.to_netcdf(f'/run/media/sachin/0fa21ddb-f70c-4238-9cf4-705e0360f1c1/NICT_Data/{hemi}/{year}/{date_str}_{feature}_{inter_long}_{inter_lat}_2min.nc')


Processing feature: Potential [kV]
Processed: 2020-08, Time taken: 0:13:49.314533, Memory used: 2.30 GB
Processed: 2020-09, Time taken: 0:10:26.033858, Memory used: 2.28 GB
Processed: 2020-10, Time taken: 0:13:03.200045, Memory used: 2.38 GB
Processed: 2020-11, Time taken: 0:14:17.725023, Memory used: 2.28 GB
Processed: 2020-12, Time taken: 0:13:21.852195, Memory used: 2.38 GB
Processed: 2021-01, Time taken: 0:13:06.809613, Memory used: 2.38 GB
Processed: 2021-02, Time taken: 0:11:56.845611, Memory used: 2.26 GB
Processed: 2021-03, Time taken: 0:13:34.153517, Memory used: 2.38 GB
Processed: 2021-04, Time taken: 0:13:25.453709, Memory used: 2.36 GB
Processed: 2021-05, Time taken: 0:14:08.384038, Memory used: 2.39 GB
Processed: 2021-06, Time taken: 0:14:38.771960, Memory used: 2.37 GB


KeyboardInterrupt: 

In [None]:
# Define common parameters
theta = np.deg2rad(np.linspace(0, 360, inter_long) - 90)
r = 90 - np.linspace(53.1, 89.7, inter_lat)
shrink = 0.4
fig, ax = plt.subplots(1, 1, subplot_kw={'projection': 'polar'}, figsize=(5, 5))


if feature == 'fac':
    #current
    mesh2 = ax.pcolormesh(theta, r, ds[feature].mean('dt'), shading='auto', cmap='bwr', vmin = -1, vmax = 1)
    ax.set_title('Current')
    fig.colorbar(mesh2, ax=ax, label=r'$\mathrm{\mu}$A/m$^2$', orientation='horizontal', pad=0.15, shrink=shrink, extend='both')
elif feature == 'pot':
    #potential
    mesh1 = ax.pcolormesh(theta, r, ds[feature].mean('dt'), shading='auto', cmap='bwr', vmin=-20, vmax=20)
    ax.set_title('Potential')
    fig.colorbar(mesh1, ax=ax, label='kV', orientation='horizontal', pad=0.15, shrink=shrink, extend='both')
elif feature == 'sxx':
    #sxx
    mesh3 = ax.pcolormesh(theta, r, ds[feature].mean('dt'), shading='auto', cmap='viridis', vmin=0, vmax=15)
    ax.set_title('Conductivity (xx)')
    fig.colorbar(mesh3, ax=ax, label='S/m', orientation='horizontal', pad=0.15, shrink=shrink, extend='both')

ax.set_ylim([0, 37])
ax.set_yticks([0, 10, 20, 30])
ax.set_yticklabels(["90°", "80°", "70°", "60° MLAT"])
ax.set_xlim([-np.pi, np.pi])
ax.set_xticks(np.linspace(-np.pi, np.pi, 9)[1:])
ax.set_xticklabels(["21", "0 MLT \nMidnight", "3", "6 \n  Dawn", "9", "12 MLT \nMidday", "15", "18 \nDusk"])
ax.grid(True, linestyle='-', linewidth=0.5, zorder=6)

plt.tight_layout()
#space between subplots
#plt.savefig('lon_180.png', dpi=300)
