# process_rnx

**Author:** Marilyn Braojos Gutierrez\
**Purpose:** This program aims to process the local RINEX files with GPS satellite broadcast information and create files for each satellite.\
**PhD Milestone:** #1: *Leverage deep learning models to GPS satellite clock bias corrections.*\
**Project:** This program is Step (1) in this PhD milestone. Obtaining the data is the first critical step.\
**References:**\
[1] https://cddis.nasa.gov/Data_and_Derived_Products/GNSS/broadcast_ephemeris_data.html#GPShourly

# Import Libraries

In [1]:
import chardet
import matplotlib.pyplot as plt                               # https://matplotlib.org/3.5.3/api/_as_gen/matplotlib.pyplot.html
import numpy as np                                            # https://numpy.org/
import os                                                     # https://docs.python.org/3/library/os.html
import time                                                   # https://docs.python.org/3/library/time.html

# Processing RINEX Broadcast Files and Extracting Clock Bias Data

In [None]:
def process_rnx(yr, dd):
    try:
        base_dir = 'rnx'        
        day_folder = f"gps_rnx_daily_{yr}{dd}"
        target_dir = os.path.join(base_dir, day_folder)

        if not os.path.isdir(target_dir):
            raise FileNotFoundError(f"Directory {target_dir} does not exist.")


        rnx_files = [file for file in os.listdir(target_dir) if file.endswith('.rnx')] # case-sensitive
        
        data = []
        encoding_errors = []  # List to keep track of encoding errors

        line_ct = 0


        for i, rnx_file in enumerate(rnx_files):
            try: 
                file_path = os.path.join(target_dir, rnx_file) 
    
                with open(file_path, 'rb') as file:
                    raw_data = file.read()
                    result = chardet.detect(raw_data)
                    encoding = result['encoding']
                    confidence = result['confidence']
    
                # Use fallback encoding if detection fails
                if encoding is None:
                    encoding = 'utf-8'
                    print(f"Using fallback encoding {encoding} for file {file_path}")
                    print(f"Detected encoding {encoding} with confidence {confidence} for file {file_path}")
    
                try: 
                    with open(file_path, 'r', encoding=encoding) as file:
                        first_line = file.readline()
        
                        is_version_3 = False
                        is_not_version_3 = False
                        end_header = False
        
                        if '3' in first_line[0:21]: 
                            is_version_3 = True
                        else:
                            is_not_version_3 = True
        
                        for line in file:                
                            if not end_header:
                                if 'END OF HEADER' in line:
                                    end_header = True
                                continue
                            
                            if line[0:3].startswith('G'):
                                if end_header: 
                                    if is_version_3:  
                                        subs_lines = [file.readline() for _ in range(6)]
                                        
                                        if len(subs_lines) == 6 and all(subs_lines):
                                            subs_line1, subs_line2, subs_line3, subs_line4, subs_line5, subs_line6 = subs_lines
        
                                        # subs_line1 = file.readline()
                                        # subs_line2 = file.readline()
                                        # subs_line3 = file.readline()
                                        # subs_line4 = file.readline()
                                        # subs_line5 = file.readline()
                                        # subs_line6 = file.readline()
        
                                        
                                        row = {
                                            'SV Name': line[0:3],
                                            'Epoch Year': int(line[3:8]),
                                            'Epoch Month': int(line[8:11]),
                                            'Epoch Day': int(line[11:14]),
                                            'Epoch Hour': int(line[14:17]),
                                            'Epoch Minute': int(line[17:20]),
                                            'Epoch Second': int(line[20:23]),
                                            'Clock Bias Coefficient (seconds)': float(line[23:42].lower().replace('d', 'e')),
                                            'Clock Bias Drift Coefficient (seconds/second)': float(line[42:61].lower().replace('d', 'e')),
                                            'Clock Bias Drift Rate Coefficient (seconds/second^2)': float(line[61:80].lower().replace('d', 'e')),
                                            
                                            'Mean Motion Difference (semi-circles/sec)': float(subs_line1[42:61].lower().replace('d', 'e')),
                                            'Mean Anomaly at Reference (semi-circles)': float(subs_line1[61:80].lower().replace('d', 'e')),
                                            
                                            'Eccentricity (unitless)': float(subs_line2[23:42].lower().replace('d', 'e')),
                                            'SQRT(Semi-Major Axis) (SQRT(meters))': float(subs_line2[61:80].lower().replace('d', 'e')),
                                            
                                            'Time of Ephemeris (seconds)': float(subs_line3[4:23].lower().replace('d', 'e')),
        
                                            'SV Health (0=OK)': float(subs_line6[23:42].lower().replace('d', 'e')),
        
                                            'File': file_path,
                                                }
                                        data.append(row)    
                                    
                                    # https://files.igs.org/pub/data/format/rinex_clock300.txt
                                    elif is_not_version_3:   
                                        if line[3:13].startswith('G'):
                                            print("File is an unreadable version for now")
        
                    print(f"Done reading file {i + 1} out of {len(rnx_files)}. Filepath: {file_path}")
                
                except (UnicodeDecodeError, FileNotFoundError) as decode_err:
                    encoding_errors.append((file_path, str(decode_err)))
                    print(f"Encoding error for file {file_path}: {decode_err}. Skipping this file.")
            
            except Exception as e: 
                print(f"Error processing file {file_path}: {e}. Skipping this file.")
                continue
                
        if encoding_errors:
            print("Files with encoding errors:")
            for file_path, error in encoding_errors:
                print(f"{file_path}: {error}")

        sv_names = [entry['SV Name'] for entry in data]
        print(f"SV Names for GPS day {dd} of year {yr} have been saved.")
        
        epoch_years = [entry['Epoch Year'] for entry in data]
        print(f"Epoch Years for GPS day {dd} of year {yr} have been saved.")
        
        epoch_months = [entry['Epoch Month'] for entry in data]
        print(f"Epoch Months for GPS day {dd} of year {yr} have been saved.")
        
        epoch_days = [entry['Epoch Day'] for entry in data]
        print(f"Epoch Days for GPS day {dd} of year {yr} have been saved.")
        
        epoch_hours = [entry['Epoch Hour'] for entry in data]
        print(f"Epoch Hours for GPS day {dd} of year {yr} have been saved.")
        
        epoch_minutes = [entry['Epoch Minute'] for entry in data]
        print(f"Epoch Minutes for GPS day {dd} of year {yr} have been saved.")
        
        epoch_seconds = [entry['Epoch Second'] for entry in data]
        print(f"Epoch Seconds for GPS day {dd} of year {yr} have been saved.")
        
        coeff_clock_biases = [entry['Clock Bias Coefficient (seconds)'] for entry in data]
        print(f"Clock bias coefficients (seconds) for GPS day {dd} of year {yr} have been saved.")

        coeff_clock_drifts = [entry['Clock Bias Drift Coefficient (seconds/second)'] for entry in data]
        print(f"Clock drift coefficients (seconds) for GPS day {dd} of year {yr} have been saved.")

        coeff_clock_drift_rates = [entry['Clock Bias Drift Rate Coefficient (seconds/second^2)'] for entry in data]
        print(f"Clock drift rates (seconds) for GPS day {dd} of year {yr} have been saved.")

        mean_motion_diffs = [entry['Mean Motion Difference (semi-circles/sec)'] for entry in data]
        print(f"Mean Motion Differences (semi-circles/sec) for GPS day {dd} of year {yr} have been saved.")
                
        mean_anomaly_refs = [entry['Mean Anomaly at Reference (semi-circles)'] for entry in data]
        print(f"Mean Anomaly Reference (semi-circles) for GPS day {dd} of year {yr} have been saved.")
                
        eccs = [entry['Eccentricity (unitless)'] for entry in data]
        print(f"Eccentricity (unitless) for GPS day {dd} of year {yr} have been saved.")
                
        semi_major_axs = [entry['SQRT(Semi-Major Axis) (SQRT(meters))'] for entry in data]
        print(f"SQRT(Semi-Major Axis) (SQRT(m)) for GPS day {dd} of year {yr} have been saved.")
                
        t_oes = [entry['Time of Ephemeris (seconds)'] for entry in data]
        print(f"Time of Ephemeris (seconds) for GPS day {dd} of year {yr} have been saved.")

        health = [entry['SV Health (0=OK)'] for entry in data]
        print(f"SV Health (0=OK) for GPS day {dd} of year {yr} have been saved.")

        file_names = [entry['File'] for entry in data]
        print(f"Filepaths for GPS day {dd} of year {yr} have been saved.")

        combined = list(zip(sv_names,
                            epoch_years,
                            epoch_months,
                            epoch_days,
                            epoch_hours,
                            epoch_minutes,
                            epoch_seconds,
                            coeff_clock_biases, 
                            coeff_clock_drifts, 
                            coeff_clock_drift_rates, 
                            mean_motion_diffs,
                            mean_anomaly_refs,
                            eccs,
                            semi_major_axs,
                            t_oes,
                            health,
                            file_names,
                           ))
        
        combined_sorted = sorted(combined, key=lambda x: (x[1], x[2], x[3], x[4], x[5], x[6]))

        sv_names_com,epoch_years_com,epoch_months_com,epoch_days_com,epoch_hours_com,epoch_minutes_com,epoch_seconds_com, coeff_clock_biases_com, coeff_clock_drifts_com, coeff_clock_drift_rates_com, mean_motion_diffs_com, mean_anomaly_refs_com, eccs_com, semi_major_axs_com, t_oes_com, health_com, file_names_com = zip(*combined_sorted)

        sv_names_com = list(sv_names_com)
        epoch_years_com = list(epoch_years_com)
        epoch_months_com = list(epoch_months_com)
        epoch_days_com = list(epoch_days_com)
        epoch_hours_com = list(epoch_hours_com)
        epoch_minutes_com = list(epoch_minutes_com)
        epoch_seconds_com = list(epoch_seconds_com)
        coeff_clock_biases_com = list(coeff_clock_biases_com)
        coeff_clock_drifts_com = list(coeff_clock_drifts_com)
        coeff_clock_drift_rates_com = list(coeff_clock_drift_rates_com)
        mean_motion_diffs_com = list(mean_motion_diffs_com)
        mean_anomaly_refs_com = list(mean_anomaly_refs_com)
        eccs_com = list(eccs_com)
        semi_major_axs_com = list(semi_major_axs_com)
        t_oes_com = list(t_oes_com)
        health_com = list(health_com)
        file_names_com = list(file_names_com)

        print(f"Data is sorted by ascending epoch for GPS day {dd} of year {yr} has been completed.")
        
        npz_dir = 'rnx_npz'
        os.makedirs(npz_dir, exist_ok=True)
        
        npz_filename = os.path.join(npz_dir, f'gps_rnx_daily_{yr}{dd}.npz')
        
        np.savez(npz_filename, 
                 satellite=sv_names_com, 
                 yyyy=epoch_years_com, 
                 mm=epoch_months_com, 
                 dd=epoch_days_com, 
                 hh=epoch_hours_com, 
                 mi=epoch_minutes_com, 
                 ss=epoch_seconds_com, 
                 coeff_clock_bias=coeff_clock_biases_com, 
                 coeff_clock_bias_drift = coeff_clock_drifts_com, 
                 coeff_clock_bias_drift_rate = coeff_clock_drift_rates_com, 
                 delta_mean_motion = mean_motion_diffs_com,
                 ref_mean_anomaly = mean_anomaly_refs_com, 
                 eccentricity = eccs_com, 
                 semi_major_ax = semi_major_axs_com,
                 t_oe = t_oes_com,
                 sv_health = health_com,
                 files = file_names_com)
        
        print(f"npz file for GPS day {dd} of year {yr} has been created and stored as {npz_filename}.")

        print(f"Data collection for GPS day {dd} of year {yr} has been completed.")
                    
    except Exception as e:
        print(f"Error fetching data for day {dd} of year {yr}: {e}. Filepath: {file_path}.")

In [None]:
%%time
days = range(1, 366)
days = [f'{day:03d}' for day in days]

for dd in days:
    process_rnx('2018', dd)

# Separate and Store Data Based on SV PRN

In [None]:
def isolate_rnx(satellite_name,yr,dd):

    input_folder = "rnx_npz"
    input_filename = os.path.join(input_folder, f"gps_rnx_daily_{yr}{dd}.npz")

    if not os.path.exists(input_filename):
        raise FileNotFoundError(f"The file {input_filename} does not exist.")
    
    data = np.load(input_filename)
    
    satellite_names = data['satellite']
    year = data['yyyy']
    month = data['mm']
    day = data['dd']
    hour = data['hh']
    minutes = data['mi']
    second = data['ss']
    coeff_clock_bias = data['coeff_clock_bias']
    coeff_block_bias_drift = data['coeff_clock_bias_drift']
    coeff_clock_bias_drift_rate = data['coeff_clock_bias_drift_rate']
    dn = data['delta_mean_motion']
    m0 = data['ref_mean_anomaly']
    ecc = data['eccentricity']
    sqA = data['semi_major_ax']
    toe = data['t_oe']
    health = data['sv_health']
    files = data['files']
    
    matching_indices = [i for i, name in enumerate(satellite_names) if name.startswith(satellite_name)]

    matching_satellite_names = satellite_names[matching_indices]
    matching_year= year[matching_indices]
    matching_month= month[matching_indices]
    matching_day= day[matching_indices]
    matching_hour= hour[matching_indices]
    matching_minutes= minutes[matching_indices]
    matching_second= second[matching_indices]
    matching_coeff_clock_bias= coeff_clock_bias[matching_indices]
    matching_coeff_clock_bias_drift= coeff_block_bias_drift[matching_indices]
    matching_coeff_clock_bias_drift_rate= coeff_clock_bias_drift_rate[matching_indices]
    matching_dn = dn[matching_indices]
    matching_m0 = m0[matching_indices]
    matching_ecc = ecc[matching_indices]
    matching_sqA = sqA[matching_indices]
    matching_toe = toe[matching_indices]
    matching_health = health[matching_indices]
    matching_files= files[matching_indices]

    output_folder = "rnx_npz_sat"
    os.makedirs(output_folder, exist_ok=True)

    satellite_folder = os.path.join(output_folder, satellite_name)
    os.makedirs(satellite_folder, exist_ok=True)

    output_filename = os.path.join(satellite_folder, f"gps_rnx_{yr}{dd}_{satellite_name}.npz")
    
    np.savez(output_filename, 
             satellite = matching_satellite_names, 
             yyyy = matching_year, 
             mm = matching_month, 
             dd = matching_day, 
             hh = matching_hour, 
             mi = matching_minutes, 
             ss = matching_second, 
             clock_bias_coefficients = matching_coeff_clock_bias, 
             clock_bias_drift_coefficients = matching_coeff_clock_bias_drift, 
             clock_bias_drift_rate_coefficients = matching_coeff_clock_bias_drift_rate,
             diff_mean_motion = matching_dn,
             ref_mean_anomaly = matching_m0,
             eccen = matching_ecc,
             sqrt_semi = matching_sqA,
             t_eph = matching_toe,
             health_sv = matching_health,
             filename = matching_files)
    
    print(f"Data has been saved to {output_filename}")

In [None]:
%%time
days = range(1, 366)
days = [f'{day:03d}' for day in days]

for satellite_id in range(1, 32):
    sat_id_str = f'G{satellite_id:02d}'
    for dd in days:
        try:  
            isolate_rnx(sat_id_str,'2018', dd)
        except Exception as e:
            print(f"Exception for satellite {sat_id_str} on day {dd}: {e}")