# process_clk

**Author:** Marilyn Braojos Gutierrez\
**Purpose:** This program aims to process the local RINEX CLK files with GPS satellite final clock bias information and create files for each satellite.\
**PhD Milestone:** #1: *Leverage deep learning models to GPS satellite clock bias corrections.*\
**Project:** This program is Step (1) in this PhD milestone. Obtaining the data is the first critical step.\
**References:**\
[1] https://cddis.nasa.gov/Data_and_Derived_Products/GNSS/broadcast_ephemeris_data.html#GPShourly

# Import Libraries

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Processing RINEX Final Clock Files and Extracting Clock Bias Data by Station

In [15]:
def process_clk(week):
    try:
        base_dir = '/Volumes/MARI/ssdl_gps/clk'        
        week_folder = f"gps_{week}"
        target_dir = os.path.join(base_dir, week_folder)

        if not os.path.isdir(target_dir):
            raise FileNotFoundError(f"Directory {target_dir} does not exist.")


        clk_files = [file for file in os.listdir(target_dir) if file.endswith('.CLK') or file.endswith('.clk')] # CLK is case-sensitive

        data = []
        line_ct = 0

        for i, clk_file in enumerate(clk_files):
            file_path = os.path.join(target_dir, clk_file) 

            with open(file_path, 'r') as file:
                first_line = file.readline()
                second_line = file.readline()
                
                is_version_3 = False
                is_not_version_3 = False
                end_header = False

                if '3.04' in first_line[0:21]: 
                    is_version_3 = True
                else:
                    is_not_version_3 = True
                
                for line in file:
                                        
                    if not end_header:
                        if 'END OF HEADER' in line:
                            end_header = True
                        continue
                    
                    if line.startswith('AS'):
                        if end_header: 
                            if is_version_3:
                                if second_line[21:42].startswith('CNES') or second_line[21:42].startswith('cnes'):  # specify station (capital and lowercase cases b/c case-sensitive)
                                    if line[3:13].startswith('G'):
                                        row = {
                                            'Clock Data Type': line[0:3],
                                            'SV Name': line[3:13],
                                            'Epoch Year': int(line[13:18]),
                                            'Epoch Month': int(line[18:21]),
                                            'Epoch Day': int(line[21:24]),
                                            'Epoch Hour': int(line[24:27]),
                                            'Epoch Minute': int(line[27:30]),
                                            'Epoch Second': int(float(line[30:40])),
                                            'Number of Data Values to Follow': int(line[40:45]),
                                            'Clock Bias (seconds)': float(line[45:66].lower().replace('d', 'e')),
                                            'Version': "Version 3.04",
                                            'File': file_path,
                                                        # 'Clock Bias StdDev (seconds)': clock_bias_stddev,
                                                        # 'Clock Rate (dimensionless)': clock_rate,
                                                        # 'Clock Rate StdDev (dimensionless)': clock_rate_stddev,
                                                        # 'Clock Acceleration (per second)': clock_acceleration,
                                                        # 'Clock Acceleration StdDev (per second)': clock_acceleration_stddev,
                                                        }
                                        data.append(row)
    
                            # https://files.igs.org/pub/data/format/rinex_clock300.txt
                            elif is_not_version_3:   
                                if second_line[20:40].startswith('CNES') or second_line[20:40].startswith('cnes'): # specify station (capital and lowercase cases b/c case-sensitive)
                                    if line[3:8].startswith('G'):
                                        row = {
                                            'Clock Data Type': line[0:3],
                                            'SV Name': line[3:8],
                                            'Epoch Year': int(line[8:12]),
                                            'Epoch Month': int(line[12:15]),
                                            'Epoch Day': int(line[15:18]),
                                            'Epoch Hour': int(line[18:21]),
                                            'Epoch Minute': int(line[21:24]),
                                            'Epoch Second': int(float(line[24:34])),
                                            'Number of Data Values to Follow': int(line[34:40]),
                                            'Clock Bias (seconds)': float(line[40:59].lower().replace('d', 'e')),
                                            'Version': "NOT version 3.04" ,
                                            'File': file_path,
                                                        # 'Clock Bias StdDev (seconds)': clock_bias_stddev,
                                                        # 'Clock Rate (dimensionless)': clock_rate,
                                                        # 'Clock Rate StdDev (dimensionless)': clock_rate_stddev,
                                                        # 'Clock Acceleration (per second)': clock_acceleration,
                                                        # 'Clock Acceleration StdDev (per second)': clock_acceleration_stddev,
                                                        }
                                        data.append(row)


            print(f"Done reading file {i + 1} out of {len(clk_files)}")
        
        sv_names = [entry['SV Name'] for entry in data]
        print(f"SV Names for GPS week {week} have been saved.")
        
        epoch_years = [entry['Epoch Year'] for entry in data]
        print(f"Epoch Years for GPS week {week} have been saved.")
        
        epoch_months = [entry['Epoch Month'] for entry in data]
        print(f"Epoch Months for GPS week {week} have been saved.")
        
        epoch_days = [entry['Epoch Day'] for entry in data]
        print(f"Epoch Days for GPS week {week} have been saved.")
        
        epoch_hours = [entry['Epoch Hour'] for entry in data]
        print(f"Epoch Hours for GPS week {week} have been saved.")
        
        epoch_minutes = [entry['Epoch Minute'] for entry in data]
        print(f"Epoch Minutes for GPS week {week} have been saved.")
        
        epoch_seconds = [entry['Epoch Second'] for entry in data]
        print(f"Epoch Seconds for GPS week {week} have been saved.")
        
        clock_biases = [entry['Clock Bias (seconds)'] for entry in data]
        print(f"Clock biases (seconds) for GPS week {week} have been saved.")

        ver_names = [entry['Version'] for entry in data]
        print(f"Versions for GPS week {week} have been saved.")

        file_names = [entry['File'] for entry in data]
        print(f"Filepaths for GPS week {week} have been saved.")

        combined = list(zip(sv_names,
                            epoch_years,
                            epoch_months,
                            epoch_days,
                            epoch_hours,
                            epoch_minutes,
                            epoch_seconds,
                            clock_biases, 
                            ver_names, 
                            file_names))
        
        combined_sorted = sorted(combined, key=lambda x: (x[1], x[2], x[3], x[4], x[5], x[6]))

        sv_names_com,epoch_years_com,epoch_months_com,epoch_days_com,epoch_hours_com,epoch_minutes_com,epoch_seconds_com,clock_biases_com, ver_names_com, file_names_com = zip(*combined_sorted)

        sv_names_com = list(sv_names_com)
        epoch_years_com = list(epoch_years_com)
        epoch_months_com = list(epoch_months_com)
        epoch_days_com = list(epoch_days_com)
        epoch_hours_com = list(epoch_hours_com)
        epoch_minutes_com = list(epoch_minutes_com)
        epoch_seconds_com = list(epoch_seconds_com)
        clock_biases_com = list(clock_biases_com)
        ver_names_com = list(ver_names_com)
        file_names_com = list(file_names_com)

        print(f"Data is sorted by ascending epoch for GPS week {week} has been completed.")

        npz_dir = '/Volumes/MARI/ssdl_gps/clk_npz'
        os.makedirs(npz_dir, exist_ok=True)
        
        npz_filename = os.path.join(npz_dir, f'grg_gps_clk_{week}.npz') # specify station

        np.savez(npz_filename, 
                 satellite=sv_names_com, 
                 yyyy=epoch_years_com, 
                 mm=epoch_months_com, 
                 dd=epoch_days_com, 
                 hh=epoch_hours_com, 
                 mi=epoch_minutes_com, 
                 ss=epoch_seconds_com, 
                 clock_bias=clock_biases_com, 
                 ver = ver_names_com, 
                 files = file_names_com)

        print(f"npz file for GPS week {week} has been created and stored as {npz_filename}.")

        print(f"Data final product collection for GPS week {week} has been completed.")
                    
    except Exception as e:
        print(f"Error fetching data for week {week}: {e}")

In [16]:
%%time 

# https://beta.ngs.noaa.gov/CORS/Gpscal.shtml
weeks = range(1982,2034)

for week in weeks: 
    process_clk(week)

Done reading file 1 out of 103
Done reading file 2 out of 103
Done reading file 3 out of 103
Done reading file 4 out of 103
Done reading file 5 out of 103
Done reading file 6 out of 103
Done reading file 7 out of 103
Done reading file 8 out of 103
Done reading file 9 out of 103
Done reading file 10 out of 103
Done reading file 11 out of 103
Done reading file 12 out of 103
Done reading file 13 out of 103
Done reading file 14 out of 103
Done reading file 15 out of 103
Done reading file 16 out of 103
Done reading file 17 out of 103
Done reading file 18 out of 103
Done reading file 19 out of 103
Done reading file 20 out of 103
Done reading file 21 out of 103
Done reading file 22 out of 103
Done reading file 23 out of 103
Done reading file 24 out of 103
Done reading file 25 out of 103
Done reading file 26 out of 103
Done reading file 27 out of 103
Done reading file 28 out of 103
Done reading file 29 out of 103
Done reading file 30 out of 103
Done reading file 31 out of 103
Done reading file

# Separate and Store Data Based on SV PRN

In [4]:
def isolate_clk(satellite_name, week, station):

    input_folder = "/Volumes/MARI/ssdl_gps/clk_npz"
    input_filename = os.path.join(input_folder, f'{station}_gps_clk_{week}.npz')

    if not os.path.exists(input_filename):
        raise FileNotFoundError(f"The file {input_filename} does not exist.")
    
    data = np.load(input_filename)
    
    satellite_names = data['satellite']
    year = data['yyyy']
    month = data['mm']
    day = data['dd']
    hour = data['hh']
    minutes = data['mi']
    second = data['ss']
    fin_clock_bias = data['clock_bias']
    ver = data['ver']
    files = data['files']
    
    matching_indices = [i for i, name in enumerate(satellite_names) if name.startswith(satellite_name)]

    matching_satellite_names = satellite_names[matching_indices]
    matching_year= year[matching_indices]
    matching_month= month[matching_indices]
    matching_day= day[matching_indices]
    matching_hour= hour[matching_indices]
    matching_minutes= minutes[matching_indices]
    matching_second= second[matching_indices]
    matching_fin_clock_bias= fin_clock_bias[matching_indices]
    matching_ver = ver[matching_indices]
    matching_files= files[matching_indices]

    output_folder = "/Volumes/MARI/ssdl_gps/clk_npz_sat"
    os.makedirs(output_folder, exist_ok=True)

    satellite_folder = os.path.join(output_folder, satellite_name)
    os.makedirs(satellite_folder, exist_ok=True)

    output_filename = os.path.join(satellite_folder, f'{station}_gps_clk_{week}_{satellite_name}.npz')
    
    np.savez(output_filename, 
             satellite = matching_satellite_names, 
             yyyy = matching_year, 
             mm = matching_month, 
             dd = matching_day, 
             hh = matching_hour, 
             mi = matching_minutes, 
             ss = matching_second, 
             clock_bias_vals = matching_fin_clock_bias, 
             vers = matching_ver,
             filename = matching_files)
    
    print(f"Data has been saved to {output_filename} for satellite: {satellite_name} on week: {week} from station: {station}")

In [17]:
%%time
weeks = range(1982,2034)
satellites = range(21, 22)
station = 'grg'

for satellite_id in satellites:
    sat_id_str = f'G{satellite_id:02d}'
    for week in weeks:
        try:  
            isolate_clk(sat_id_str, week, station)
        except Exception as e:
            print(f"Exception for satellite {sat_id_str} on week {isolate_clk}: {e}")

Data has been saved to /Volumes/MARI/ssdl_gps/clk_npz_sat/G21/grg_gps_clk_1982_G21.npz for satellite: G21 on week: 1982 from station: grg
Data has been saved to /Volumes/MARI/ssdl_gps/clk_npz_sat/G21/grg_gps_clk_1983_G21.npz for satellite: G21 on week: 1983 from station: grg
Data has been saved to /Volumes/MARI/ssdl_gps/clk_npz_sat/G21/grg_gps_clk_1984_G21.npz for satellite: G21 on week: 1984 from station: grg
Data has been saved to /Volumes/MARI/ssdl_gps/clk_npz_sat/G21/grg_gps_clk_1985_G21.npz for satellite: G21 on week: 1985 from station: grg
Data has been saved to /Volumes/MARI/ssdl_gps/clk_npz_sat/G21/grg_gps_clk_1986_G21.npz for satellite: G21 on week: 1986 from station: grg
Data has been saved to /Volumes/MARI/ssdl_gps/clk_npz_sat/G21/grg_gps_clk_1987_G21.npz for satellite: G21 on week: 1987 from station: grg
Data has been saved to /Volumes/MARI/ssdl_gps/clk_npz_sat/G21/grg_gps_clk_1988_G21.npz for satellite: G21 on week: 1988 from station: grg
Data has been saved to /Volumes/MA