In [10]:
import numpy as np
import h5py
import netCDF4
from glob import glob
import pandas as pd
import ipywidgets as widgets
import json

In [23]:
# Functions and variables
def Ne_convert(e: float,P: float,T: float) -> float:
    return e*P/(k*T)

def z(p):
    return - 7 * np.log(p/1013.25)

def format_time(hours_float):
    hours = int(hours_float)
    minutes_float = (hours_float - hours) * 60
    minutes = int(minutes_float)
    formatted_time = f'{hours:02}:{minutes:02}:00Z'
    return formatted_time

k = 1.38*10**(-23) # J/K
time_save = np.arange(0,24,0.5)
time_bin = time_save[1]
H_save = np.arange(90,130,4)
h_bin = H_save[1]
bins = len(time_save) * len(H_save)

In [24]:
data_folder_path = '../../DataSorted/*'
data_folders = glob(data_folder_path)
data_folders = sorted(data_folders)
print(len(data_folders), 'folders found.')

dst_index_path = '../../DST_index/global_dst.csv'
dst_index_file = pd.read_csv(dst_index_path)
print(len(dst_index_file), 'days founds.')

hp30_index_path = '../../hp30_index/global_hp30.json'
with open(hp30_index_path, 'r') as file:
    hp30_index_file = json.load(file)
    hp30_date = np.array(hp30_index_file['datetime'])
    hp30_index = np.array(hp30_index_file['Hp30'])
print(int(len(hp30_index_file['datetime'])/48), 'days founds.')

278 folders found.
1097 days founds.
1097 days founds.


In [25]:
Ne_EISCAT_global = np.array([])
dNe_EISCAT_global = np.array([])
Ne_WACCM_global = np.array([])
mag_EISCAT_global = np.array([])
dmag_EISCAT_global = np.array([])
mag_WACCM_global = np.array([])
Date_Global = np.array([])
Geo_Event = np.array([])
Solar_Event = np.array([])
Svalbard_Data = np.array([])
Tromso_Data = np.array([])
Height_Global = np.array([])
Hours_Global = np.array([])
dst_global = np.array([])
ddst_global = np.array([])
hp30_global = np.array([])
dhp30_global = np.array([])

folder_index = 1

progress_bar = widgets.IntProgress(
    value=folder_index,
    min  = 0,
    max  = len(data_folders),
    description = 'Progress:',
    bar_style   = 'info',
    orientation = 'horizontal'
)
display(progress_bar)

for folder in data_folders:
    EISCAT_path = folder + '/MAD*.hdf5'
    WACCM_path = folder + '/*.nc'

    WACCM_files = []

    if len(glob(WACCM_path)) > 1:
        for file in glob(WACCM_path):
            WACCM_files.append(netCDF4.Dataset(file))
    elif len(glob(WACCM_path)) == 1:
        WACCM_files = [netCDF4.Dataset(glob(WACCM_path)[0])]
    else:
        WACCM_files = []

    for file in glob(EISCAT_path): 
        EISCAT_file = h5py.File(file)
        
        data = EISCAT_file['Data']['Table Layout'][:] # Get data from the file
        metadata = EISCAT_file['Metadata']['Data Parameters'][:] # Get data parameters from the file
        parameters = [parameter[0] for parameter in metadata] # Get the name of each parameters

        data = np.array([np.array(tuple.tolist()) for tuple in data])
        dataframe = pd.DataFrame(data, columns=parameters)
    
        h_start = np.array(dataframe[b'GDALT']) # Start Altitude km
        h_end = np.array(dataframe[b'RANGE']) # END Altitude km
        Ne = np.array(dataframe[b'NE']) # Electron density m-3
        dNe = np.array(dataframe[b'DNE']) # Electron density error m-3
        Ti = np.array(dataframe[b"TI"]) # Ion temperature K
        Tr = np.array(dataframe[b'TR']) # Electron to ion temperature ratio
        hours = np.array(dataframe[b'HOUR']) # Hours
        minutes = np.array(dataframe[b'MIN']) # Minutes

        EISCAT_date = int(dataframe[b'YEAR'][0]*10000 + dataframe[b'MONTH'][0]*100 + dataframe[b'DAY'][0])

        h_mask = h_end < 150
        h_start = h_start[h_mask]
        h_end = h_end[h_mask]
        Ne = Ne[h_mask]
        dNe = dNe[h_mask]
        Ti = Ti[h_mask]
        Tr = Tr[h_mask]
        Te = Ti * Tr
        hours = hours[h_mask]
        minutes = minutes[h_mask]
        time = hours + minutes/60

        lat = WACCM_files[0]['instr_lat'][:] # Latitude of the instrument
        lon = WACCM_files[0]['instr_lon'][:] # Longitude of the instrument
        num = WACCM_files[0]['instr_num'][:] # Numerical identifier of the instrument
        date = WACCM_files[0]['obs_date'][:] # Observation date
        time_WACCM = WACCM_files[0]['obs_time'][:] # Observation time
        lev = WACCM_files[0]['lev'][:] # 88 levels
        e = WACCM_files[0]['e'][:] # Mixing ratio
        T = WACCM_files[0]['T'] [:]# Temperature

        for WACCM_file in WACCM_files[1:]:
            temp_lat = WACCM_file['instr_lat'][:] # Latitude of the instrument
            temp_lon = WACCM_file['instr_lon'][:] # Longitude of the instrument
            temp_num = WACCM_file['instr_num'][:] # Numerical identifier of the instrument
            temp_date = WACCM_file['obs_date'][:] # Observation date
            temp_time = WACCM_file['obs_time'][:] # Observation time
            temp_e = WACCM_file['e'][:] # Mixing ratio 
            temp_T = WACCM_file['T'][:] # Temperature

            lat = np.concatenate((lat, temp_lat))
            lon = np.concatenate((lon, temp_lon))
            num = np.concatenate((num, temp_num))
            date = np.concatenate((date, temp_date))
            time_WACCM = np.concatenate((time_WACCM, temp_time))
            e = np.concatenate((e, temp_e))
            T = np.concatenate((T, temp_T))
        
         # Date Restriction
        date_mask = date == EISCAT_date
        lat = lat[date_mask]
        lon = lon[date_mask]
        time_WACCM = time_WACCM[date_mask]
        e = e[date_mask]
        T = T[date_mask]

        # Coordinate Restrictions for Tromso and Svalbard
        mask_tromso = (lat > 69.5) & (lat < 69.7)
        mask_svalbard = (lat > 78.8) & (lat < 79.0)

        if 'uhf' in file:
            mask_location = mask_tromso
            Tromso_Data = np.concatenate((Tromso_Data, np.repeat(True, bins)))
            Svalbard_Data = np.concatenate((Svalbard_Data, np.repeat(False, bins)))
        elif '42m' in file:
            mask_location = mask_svalbard
            Tromso_Data = np.concatenate((Tromso_Data, np.repeat(False, bins)))
            Svalbard_Data = np.concatenate((Svalbard_Data, np.repeat(True, bins)))

        lev_mask = lev < 0.01
        P = lev[lev_mask]
        H = z(P)
        NeWACCM = []
        for array in e[mask_location]:
            NeWACCM.append(array[lev_mask])

        NeWACCM = np.array(NeWACCM)

        hours = time_WACCM[mask_location]/3600
        T = T[mask_location]

        date = folder.split('/')[-1]

        if 'GEO' in date and 'SOL' in date:
            date = date.replace('-GEO', '')
            date = date.replace('-SOL', '')
            Geo_Event = np.concatenate((Geo_Event, np.repeat(True, bins)))
            Solar_Event = np.concatenate((Solar_Event, np.repeat(True, bins)))
        elif 'GEO' in date and 'SOL' not in date:
            date = date.replace('-GEO', '')
            Geo_Event = np.concatenate((Geo_Event, np.repeat(True, bins)))
            Solar_Event = np.concatenate((Solar_Event, np.repeat(False, bins)))
        elif 'GEO' not in date and 'SOL' in date:
            date = date.replace('-SOL', '')
            Geo_Event = np.concatenate((Geo_Event, np.repeat(False, bins)))
            Solar_Event = np.concatenate((Solar_Event, np.repeat(True, bins)))
        else:
            Geo_Event = np.concatenate((Geo_Event, np.repeat(False, bins)))
            Solar_Event = np.concatenate((Solar_Event, np.repeat(False, bins)))

        Ne_EISCAT_list = []
        dNe_EISCAT_list = []
        Ne_WACCM_list = []
        dst_list = []
        ddst_list = []
        hp30_list = []
        dhp30_list = []
        
        for height_slot in H_save:
            h_start_mask = (h_start - height_slot <= 4) & (h_start - height_slot > 0)
            h_end_mask = (h_end - height_slot <= 4) & (h_end - height_slot > 0)
            h_mask_EISCAT = h_start_mask + h_end_mask
            h_mask_WACCM = (H - height_slot <= 4) & (H - height_slot > 0)
            for time_slot in time_save:
                t_mask_EISCAT = (time[h_mask_EISCAT] - time_slot <= 0.5) & (time[h_mask_EISCAT] - time_slot > 0)
                t_mask_WACCM = (hours - time_slot <= 0.5) & (hours - time_slot > 0)
                dst_mask = np.array(dst_index_file['DATE'] == date)
                dst_shifted = np.roll(dst_mask,1)
                if sum(t_mask_EISCAT) == 0:
                    Ne_EISCAT_list.append(0)
                    dNe_EISCAT_list.append(0)
                    Ne_WACCM_list.append(0)
                    dst_list.append(0)
                    ddst_list.append(0)
                    hp30_list.append(0)
                    dhp30_list.append(0)
                elif sum(t_mask_WACCM) == 0:
                    Ne_EISCAT_list.append(0)
                    dNe_EISCAT_list.append(0)
                    Ne_WACCM_list.append(0)
                    dst_list.append(0)
                    ddst_list.append(0)
                    hp30_list.append(0)
                    dhp30_list.append(0)
                else:
                    Ne2_EISCAT = np.mean(Ne[h_mask_EISCAT][t_mask_EISCAT])
                    Ne_EISCAT_list.append(Ne2_EISCAT)
                    dNe_EISCAT = np.sqrt(np.sum(dNe[h_mask_EISCAT][t_mask_EISCAT]**2))/len(dNe[h_mask_EISCAT][t_mask_EISCAT])
                    dNe_EISCAT_list.append(dNe_EISCAT)
                    Ne2_WACCM_array = np.array([])
                    index = 0
                    for index in range(len(NeWACCM[t_mask_WACCM])):
                        Ne_array = NeWACCM[t_mask_WACCM][index][h_mask_WACCM]
                        T_array = T[t_mask_WACCM][index][lev_mask][h_mask_WACCM]
                        Ne2_WACCM_array = np.concatenate((Ne2_WACCM_array, Ne_convert(Ne_array,P[h_mask_WACCM]*100,T_array)))
                    Ne2 =  np.mean(Ne2_WACCM_array)
                    Ne_WACCM_list.append(Ne2)
                    int_hour_str = str(int(time_slot) + 1)
                    next_int_hour_str = str(int(time_slot + time_bin) + 1)
                    dst = dst_index_file[int_hour_str][dst_mask].values[0]
                    if int_hour_str != next_int_hour_str and int_hour_str != '24':
                        next_dst = dst_index_file[next_int_hour_str][dst_mask].values[0]
                        dst_list.append((dst + next_dst)/2)
                        ddst_list.append((next_dst - dst)/(2*time_bin))
                    elif int_hour_str == next_int_hour_str and int_hour_str != '24':
                        dst_list.append(dst)
                        next_dst = dst_index_file[next_int_hour_str][dst_mask].values[0]
                        ddst_list.append((next_dst - dst)/time_bin)
                    else:
                        dst_list.append(dst)
                        next_dst = dst_index_file['1'][dst_shifted].values[0]
                        ddst_list.append((next_dst - dst)/time_bin)
                    hp30_mask = hp30_date == f'{date}T{format_time(time_slot)}'
                    hp30_shifted = np.roll(hp30_mask, 1)
                    hp30 = hp30_index[hp30_mask][0]
                    next_hp30 = hp30_index[hp30_shifted][0]
                    dhp30 = (next_hp30 - hp30)/time_bin
                    hp30_list.append(hp30)
                    dhp30_list.append(int(dhp30*100)/100)
                    
            Hours_Global = np.concatenate((Hours_Global, time_save))
            Height_Global = np.concatenate((Height_Global, np.repeat(height_slot, len(time_save))))

        Ne_EISCAT = np.array(Ne_EISCAT_list).astype(int)
        dNe_EISCAT = np.array(dNe_EISCAT_list).astype(int)
        Ne_WACCM = np.array(Ne_WACCM_list).astype(int)
        dst_array = np.array(dst_list)
        ddst_array = np.array(ddst_list)
        hp30_array = np.array(hp30_list)
        dhp30_array = np.array(dhp30_list)

        nan_mask = np.isnan(Ne_EISCAT)
        Ne_EISCAT[nan_mask] = 0
        Ne_WACCM[nan_mask] = 0

        mag_EISCAT = np.zeros((len(H_save) * len(time_save)))
        dmag_EISCAT = np.zeros((len(H_save) * len(time_save)))
        mag_WACCM = np.zeros((len(H_save) * len(time_save)))
        
        mask_zero = Ne_EISCAT != 0
        mag_EISCAT[mask_zero] = np.log10(Ne_EISCAT[mask_zero])
        dmag_EISCAT[mask_zero] = np.log10(dNe_EISCAT[mask_zero])
        mag_WACCM[mask_zero] = np.log10(Ne_WACCM[mask_zero])

        mag_EISCAT = (mag_EISCAT*100).astype(int)/100
        dmag_EISCAT = (dmag_EISCAT*100).astype(int)/100
        mag_WACCM = (mag_WACCM*100).astype(int)/100

        Ne_EISCAT_global = np.concatenate((Ne_EISCAT_global, Ne_EISCAT))
        dNe_EISCAT_global = np.concatenate((dNe_EISCAT_global, dNe_EISCAT))
        Ne_WACCM_global = np.concatenate((Ne_WACCM_global, Ne_WACCM))
        
        mag_EISCAT_global = np.concatenate((mag_EISCAT_global, mag_EISCAT))
        dmag_EISCAT_global = np.concatenate((dmag_EISCAT_global, dmag_EISCAT))
        mag_WACCM_global = np.concatenate((mag_WACCM_global, mag_WACCM))

        dst_global = np.concatenate((dst_global, dst_array))
        ddst_global = np.concatenate((ddst_global, ddst_array))
        hp30_global = np.concatenate((hp30_global, hp30_array))
        dhp30_global = np.concatenate((dhp30_global, dhp30_array))

        Date_Global = np.concatenate((Date_Global, np.repeat(date, bins)))

    folder_index += 1
    progress_bar.value = folder_index

IntProgress(value=1, bar_style='info', description='Progress:', max=278)

  Ne_EISCAT = np.array(Ne_EISCAT_list).astype(int)
  dNe_EISCAT = np.array(dNe_EISCAT_list).astype(int)


In [26]:
data_dict = {'Date' : Date_Global,
             'Hours' : Hours_Global,
             'Height' : Height_Global.astype(int),
             'Svalbard' : Svalbard_Data.astype(int),
             'Tromso' : Tromso_Data.astype(int),
             'Geomagnetic Event' : Geo_Event.astype(int),
             'Solar Proton Event' : Solar_Event.astype(int),
             'EISCAT Density' : Ne_EISCAT_global.astype(int),
             'EISCAT Density Error' : dNe_EISCAT_global.astype(int),
             'WACCM Density' : Ne_WACCM_global.astype(int),
             'EISCAT Magnitude' : mag_EISCAT_global,
             'EISCAT Magnitude Error' : dmag_EISCAT_global, 
             'WACCM Magnitude' : mag_WACCM_global,
             'DST Index' : dst_global,
             'DST Index Gradient' : ddst_global,
             'Hp30 Index' : hp30_global,
             'Hp30 Index Gradient' : dhp30_global,
            }

data_df = pd.DataFrame(data_dict)
file = f'global_data_{len(H_save)}_{len(time_save)}'
data_df.to_csv('../../Results/' + file + '.csv', index=False)

In [27]:
file = f'global_data_{len(H_save)}_{len(time_save)}'