# Import

In [2]:
import csv
from ftplib import FTP_TLS
import gzip
import matplotlib.pyplot as plt
import numpy as np
import os
from unlzw import unlzw

# FTP Config

In [None]:
host = "gdc.cddis.eosdis.nasa.gov"
port = 21  # passive mode
username = "anonymous"
password = ""

In [None]:
ftp = FTP_TLS()
ftp.connect(host, port)
ftp.login(username, password)
ftp.prot_p()
ftp.retrlines('LIST') # list files in server

# Download Data Locally

In [None]:
def fetch_data(yr,dd):
        try:
            base_dir = f'/pub/gps/data/hourly/{yr}'
            ftp.cwd(base_dir)

            data_dir = 'rnx'
            new_dir = os.path.join(data_dir, f'gps_rnx_{yr}{dd}')
            os.makedirs(new_dir, exist_ok=True)

            # for dd in range(1,365):
                
            #     dd_str = f'{dd:03d}'
            dd_str = dd
            dd_dir = os.path.join(base_dir, dd_str)
            ftp.cwd(dd_dir)
            
            for hr in range(24):
                    
                files = []
                hr_str = f'{hr:02d}'
                hour_dir = os.path.join(dd_dir, hr_str)
                ftp.cwd(hour_dir)
        
                ftp.retrlines('NLST', files.append)
    
                rnx_files = [file for file in files if file.endswith('GN.rnx.gz')] # case-sensitive
                    
                for i, rnx_file in enumerate(rnx_files):
                    local_filename = os.path.join(new_dir, rnx_file)
                        
                    with open(local_filename, 'wb') as f:
                        ftp.retrbinary(f"RETR {rnx_file}", f.write)
                    
                    if rnx_file.endswith('.gz'):
                        local_uncompressed = local_filename[:-3]
                        try: 
                            with gzip.open(local_filename, 'rb') as gzipped_file, open(local_uncompressed, 'wb') as decompressed_file:
                                decompressed_file.write(gzipped_file.read())
                                files.append(local_uncompressed)
                        except Exception as e: 
                            print(f"Error uncompressing file {local_filename}: {e}. Will begin uncompressing file as a UNIX compressed file")
                            with open(local_filename, 'rb') as compressed_file, open(local_uncompressed, 'wb') as decompressed_file:
                                decompressed_file.write(unlzw(compressed_file.read()))
                                files.append(local_uncompressed)
                                print(f"Successfully uncompressed and saved file {local_filename} as a UNIX compressed file")
                            # continue  # Skip to the next file
        
                    print(f"Done saving file {i + 1} out of {len(rnx_files)} for Hour: {hr} Day/Year: {dd}/{yr}")
                print(f"Done uncompressing and saving files for Day/Year: {dd}/{yr} in folder: {new_dir}")

        except Exception as e:
            print(f"Error fetching data: {e}")

In [None]:
%%time 
fetch_data('2019', '008')

# Navigate to IGS Products

## Functions

In [4]:
def fetch_rnx(yr, dd):
    try:
        base_dir = 'rnx'        
        day_folder = f"gps_rnx_{yr}{dd}"
        target_dir = os.path.join(base_dir, day_folder)

        if not os.path.isdir(target_dir):
            raise FileNotFoundError(f"Directory {target_dir} does not exist.")


        rnx_files = [file for file in os.listdir(target_dir) if file.endswith('.rnx')] # case-sensitive
        
        data = []
        line_ct = 0


        for i, rnx_file in enumerate(rnx_files):
            file_path = os.path.join(target_dir, rnx_file) 
            
            with open(file_path, 'r') as file:
                first_line = file.readline()

                is_version_3 = False
                is_not_version_3 = False
                end_header = False

                if '3' in first_line[0:21]: 
                    is_version_3 = True
                else:
                    is_not_version_3 = True

                for line in file:                
                    if not end_header:
                        if 'END OF HEADER' in line:
                            end_header = True
                        continue
                    
                    if line[0:3].startswith('G'):
                        if end_header: 
                            if is_version_3:  
                                subs_line1 = file.readline()
                                subs_line2 = file.readline()
                                subs_line3 = file.readline()

                                
                                row = {
                                    'SV Name': line[0:3],
                                    'Epoch Year': int(line[3:8]),
                                    'Epoch Month': int(line[8:11]),
                                    'Epoch Day': int(line[11:14]),
                                    'Epoch Hour': int(line[14:17]),
                                    'Epoch Minute': int(line[17:20]),
                                    'Epoch Second': int(line[20:23]),
                                    'Clock Bias Coefficient (seconds)': float(line[23:42].lower().replace('d', 'e')),
                                    'Clock Bias Drift Coefficient (seconds/second)': float(line[42:61].lower().replace('d', 'e')),
                                    'Clock Bias Drift Rate Coefficient (seconds/second^2)': float(line[61:80].lower().replace('d', 'e')),
                                    
                                    'Mean Motion Difference (semi-circles/sec)': float(subs_line1[42:61].lower().replace('d', 'e')),
                                    'Mean Anomaly at Reference (semi-circles)': float(subs_line1[61:80].lower().replace('d', 'e')),
                                    
                                    'Eccentricity (unitless)': float(subs_line2[23:42].lower().replace('d', 'e')),
                                    'SQRT(Semi-Major Axis) (SQRT(meters))': float(subs_line2[61:80].lower().replace('d', 'e')),
                                    
                                    'Time of Ephemeris (seconds)': float(subs_line3[4:23].lower().replace('d', 'e')),
                                    
                                    'File': file_path,
                                        }
                                data.append(row)

                            
                            # https://files.igs.org/pub/data/format/rinex_clock300.txt
                            elif is_not_version_3:   
                                if line[3:13].startswith('G'):
                                    print("File is an unreadable version for now")

            print(f"Done reading file {i + 1} out of {len(rnx_files)}")

        sv_names = [entry['SV Name'] for entry in data]
        print(f"SV Names for GPS day {dd} of year {yr} have been saved.")
        
        epoch_years = [entry['Epoch Year'] for entry in data]
        print(f"Epoch Years for GPS day {dd} of year {yr} have been saved.")
        
        epoch_months = [entry['Epoch Month'] for entry in data]
        print(f"Epoch Months for GPS day {dd} of year {yr} have been saved.")
        
        epoch_days = [entry['Epoch Day'] for entry in data]
        print(f"Epoch Days for GPS day {dd} of year {yr} have been saved.")
        
        epoch_hours = [entry['Epoch Hour'] for entry in data]
        print(f"Epoch Hours for GPS day {dd} of year {yr} have been saved.")
        
        epoch_minutes = [entry['Epoch Minute'] for entry in data]
        print(f"Epoch Minutes for GPS day {dd} of year {yr} have been saved.")
        
        epoch_seconds = [entry['Epoch Second'] for entry in data]
        print(f"Epoch Seconds for GPS day {dd} of year {yr} have been saved.")
        
        coeff_clock_biases = [entry['Clock Bias Coefficient (seconds)'] for entry in data]
        print(f"Clock bias coefficients (seconds) for GPS day {dd} of year {yr} have been saved.")

        coeff_clock_drifts = [entry['Clock Bias Drift Coefficient (seconds/second)'] for entry in data]
        print(f"Clock drift coefficients (seconds) for GPS day {dd} of year {yr} have been saved.")

        coeff_clock_drift_rates = [entry['Clock Bias Drift Rate Coefficient (seconds/second^2)'] for entry in data]
        print(f"Clock drift rates (seconds) for GPS day {dd} of year {yr} have been saved.")

        mean_motion_diffs = [entry['Mean Motion Difference (semi-circles/sec)'] for entry in data]
        print(f"Mean Motion Differences (semi-circles/sec) for GPS day {dd} of year {yr} have been saved.")
                
        mean_anomaly_refs = [entry['Mean Anomaly at Reference (semi-circles)'] for entry in data]
        print(f"Mean Anomaly Reference (semi-circles) for GPS day {dd} of year {yr} have been saved.")
                
        eccs = [entry['Eccentricity (unitless)'] for entry in data]
        print(f"Eccentricity (unitless) for GPS day {dd} of year {yr} have been saved.")
                
        semi_major_axs = [entry['SQRT(Semi-Major Axis) (SQRT(meters))'] for entry in data]
        print(f"SQRT(Semi-Major Axis) (SQRT(m)) for GPS day {dd} of year {yr} have been saved.")
                
        t_oes = [entry['Time of Ephemeris (seconds)'] for entry in data]
        print(f"Time of Ephemeris (seconds) for GPS day {dd} of year {yr} have been saved.")

        file_names = [entry['File'] for entry in data]
        print(f"Filepaths for GPS day {dd} of year {yr} have been saved.")

        combined = list(zip(sv_names,
                            epoch_years,
                            epoch_months,
                            epoch_days,
                            epoch_hours,
                            epoch_minutes,
                            epoch_seconds,
                            coeff_clock_biases, 
                            coeff_clock_drifts, 
                            coeff_clock_drift_rates, 
                            mean_motion_diffs,
                            mean_anomaly_refs,
                            eccs,
                            semi_major_axs,
                            t_oes,
                            file_names,
                           ))
        
        combined_sorted = sorted(combined, key=lambda x: (x[1], x[2], x[3], x[4], x[5], x[6]))

        sv_names_com,epoch_years_com,epoch_months_com,epoch_days_com,epoch_hours_com,epoch_minutes_com,epoch_seconds_com, coeff_clock_biases_com, coeff_clock_drifts_com, coeff_clock_drift_rates_com, mean_motion_diffs_com, mean_anomaly_refs_com, eccs_com, semi_major_axs_com, t_oes_com, file_names_com = zip(*combined_sorted)

        sv_names_com = list(sv_names_com)
        epoch_years_com = list(epoch_years_com)
        epoch_months_com = list(epoch_months_com)
        epoch_days_com = list(epoch_days_com)
        epoch_hours_com = list(epoch_hours_com)
        epoch_minutes_com = list(epoch_minutes_com)
        epoch_seconds_com = list(epoch_seconds_com)
        coeff_clock_biases_com = list(coeff_clock_biases_com)
        coeff_clock_drifts_com = list(coeff_clock_drifts_com)
        coeff_clock_drift_rates_com = list(coeff_clock_drift_rates_com)
        mean_motion_diffs_com = list(mean_motion_diffs_com)
        mean_anomaly_refs_com = list(mean_anomaly_refs_com)
        eccs_com = list(eccs_com)
        semi_major_axs_com = list(semi_major_axs_com)
        t_oes_com = list(t_oes_com)
        file_names_com = list(file_names_com)

        print(f"Data is sorted by ascending epoch for GPS day {dd} of year {yr} has been completed.")

        npz_filename = f'gps_rnx_{yr}{dd}.npz'
        
        np.savez(npz_filename, 
                 satellite=sv_names_com, 
                 yyyy=epoch_years_com, 
                 mm=epoch_months_com, 
                 dd=epoch_days_com, 
                 hh=epoch_hours_com, 
                 mi=epoch_minutes_com, 
                 ss=epoch_seconds_com, 
                 coeff_clock_bias=coeff_clock_biases_com, 
                 coeff_clock_bias_drift = coeff_clock_drifts_com, 
                 coeff_clock_bias_drift_rate = coeff_clock_drift_rates_com, 
                 delta_mean_motion = mean_motion_diffs_com,
                 ref_mean_anomaly = mean_anomaly_refs_com, 
                 eccentricity = eccs_com, 
                 semi_major_ax = semi_major_axs_com,
                 t_oe = t_oes_com,
                 files = file_names_com)
        
        print(f"npz file for GPS day {dd} of year {yr} has been created and stored as {npz_filename}.")

        print(f"Data collection for GPS day {dd} of year {yr} has been completed.")
                    
    except Exception as e:
        print(f"Error fetching data for day {dd} of year {yr}: {e}")

In [6]:
%%time
fetch_rnx('2019', '008')

Done reading file 1 out of 1882
Done reading file 2 out of 1882
Done reading file 3 out of 1882
Done reading file 4 out of 1882
Done reading file 5 out of 1882
Done reading file 6 out of 1882
Done reading file 7 out of 1882
Done reading file 8 out of 1882
Done reading file 9 out of 1882
Done reading file 10 out of 1882
Done reading file 11 out of 1882
Done reading file 12 out of 1882
Done reading file 13 out of 1882
Done reading file 14 out of 1882
Done reading file 15 out of 1882
Done reading file 16 out of 1882
Done reading file 17 out of 1882
Done reading file 18 out of 1882
Done reading file 19 out of 1882
Done reading file 20 out of 1882
Done reading file 21 out of 1882
Done reading file 22 out of 1882
Done reading file 23 out of 1882
Done reading file 24 out of 1882
Done reading file 25 out of 1882
Done reading file 26 out of 1882
Done reading file 27 out of 1882
Done reading file 28 out of 1882
Done reading file 29 out of 1882
Done reading file 30 out of 1882
Done reading file 3