**This program is complete**

**Author:** Marilyn Braojos 

**Purpose:** This program aims to connect to the CDDIS servers to obtain GPS final clock products. It is separated into 3 main parts: 

* Connecting to CDDIS servers
* Downloading data for a given GPS week
* Saving data for that week as an npz file

# Import

In [4]:
from ftplib import FTP_TLS
import gzip
import matplotlib.pyplot as plt
import numpy as np
import os
import csv

# FTP Config

In [6]:
host = "gdc.cddis.eosdis.nasa.gov"
port = 21  # passive mode
username = "anonymous"
password = ""

In [7]:
ftp = FTP_TLS()
ftp.connect(host, port)
ftp.login(username, password)
ftp.prot_p()
ftp.retrlines('LIST') # list files in server

lrwxrwxrwx    1 ftp      ftp             8 Sep 06  2022 GSAC -> pub/GSAC
lrwxrwxrwx    1 ftp      ftp             9 Sep 06  2022 doris -> pub/doris
lrwxrwxrwx    1 ftp      ftp            11 Sep 06  2022 glonass -> pub/glonass
lrwxrwxrwx    1 ftp      ftp             7 Sep 06  2022 gnss -> pub/gps
lrwxrwxrwx    1 ftp      ftp             7 Sep 06  2022 gps -> pub/gps
drwxrwxr-x   26 ftp      ftp          4096 Jan 01 00:16 highrate
lrwxrwxrwx    1 ftp      ftp             8 Sep 06  2022 misc -> pub/misc
lrwxrwxrwx    1 ftp      ftp            12 Sep 06  2022 products -> pub/products
drwxrwxr-x   13 ftp      ftp          4096 Feb 21  2023 pub
lrwxrwxrwx    1 ftp      ftp            11 Sep 06  2022 reports -> pub/reports
lrwxrwxrwx    1 ftp      ftp             7 Sep 06  2022 slr -> pub/slr
lrwxrwxrwx    1 ftp      ftp             8 Sep 06  2022 vlbi -> pub/vlbi


'226 Directory send OK.'

# Download and Save Data Locally

In [9]:
def fetch_data(week):
        try:
            base_dir = '/pub/gps/products'
            ftp.cwd(base_dir)
            
            week_folder = f"{week}"
            ftp.cwd(week_folder)
        
            # List all files in the directory
            
            files = []
            ftp.retrlines('NLST', files.append)
    
            clk_files = [file for file in files if file.endswith('.CLK.gz')] # CLK is case-sensitive
    
            data_dir = 'clk'
            new_folder = os.path.join(data_dir, f'gps_{week}')
            os.makedirs(new_folder, exist_ok=True)
    
            for i, clk_file in enumerate(clk_files):
                local_filename = os.path.join(new_folder, clk_file)
                
                with open(local_filename, 'wb') as f:
                      ftp.retrbinary(f"RETR {clk_file}", f.write)
    
                if clk_file.endswith('.gz'):
                    local_uncompressed = local_filename[:-3]
                    with gzip.open(local_filename, 'rb') as gzipped_file, open(local_uncompressed, 'wb') as decompressed_file:
                         decompressed_file.write(gzipped_file.read())
                    files.append(local_uncompressed)

                print(f"Done saving file {i + 1} out of {len(clk_files)}")

        except Exception as e:
            print(f"Error fetching data for week {week}: {e}")

In [10]:
%%time 
fetch_data(2035)

Done saving file 1 out of 25
Done saving file 2 out of 25
Done saving file 3 out of 25
Done saving file 4 out of 25
Done saving file 5 out of 25
Done saving file 6 out of 25
Done saving file 7 out of 25
Done saving file 8 out of 25
Done saving file 9 out of 25
Done saving file 10 out of 25
Done saving file 11 out of 25
Done saving file 12 out of 25
Done saving file 13 out of 25
Done saving file 14 out of 25
Done saving file 15 out of 25
Done saving file 16 out of 25
Done saving file 17 out of 25
Done saving file 18 out of 25
Done saving file 19 out of 25
Done saving file 20 out of 25
Done saving file 21 out of 25
Done saving file 22 out of 25
Done saving file 23 out of 25
Done saving file 24 out of 25
Done saving file 25 out of 25
CPU times: user 2.68 s, sys: 1.19 s, total: 3.87 s
Wall time: 53.3 s


# Save Relevant IGS Data Products

## Functions

In [13]:
def fetch_clks(week):
    try:
        base_dir = 'clk'        
        week_folder = f"gps_{week}"
        target_dir = os.path.join(base_dir, week_folder)

        if not os.path.isdir(target_dir):
            raise FileNotFoundError(f"Directory {target_dir} does not exist.")


        clk_files = [file for file in os.listdir(target_dir) if file.endswith('.CLK')] # CLK is case-sensitive
        
        data = []
        line_ct = 0


        for i, clk_file in enumerate(clk_files):
            file_path = os.path.join(target_dir, clk_file) 

            with open(file_path, 'r') as file:
                first_line = file.readline()
                is_version_3 = False
                is_not_version_3 = False
                end_header = False

                if '3.04' in first_line[0:21]: 
                    is_version_3 = True
                else:
                    is_not_version_3 = True
                
                for line in file:
                                        
                    if not end_header:
                        if 'END OF HEADER' in line:
                            end_header = True
                        continue
                    
                    if line.startswith('AS'):
                        if end_header: 
                            if is_version_3:   
                                if line[3:13].startswith('G'):
                                    clock_data_type = line[0:3]
                                    sv_name = line[3:13]
                                    epoch_year = int(line[13:18])
                                    epoch_month = int(line[18:21])
                                    epoch_day = int(line[21:24])
                                    epoch_hour = int(line[24:27])
                                    epoch_minute = int(line[27:30])
                                    epoch_second = float(line[30:40])
                                    num_data_value = int(line[40:45])
                                    clock_bias = float(line[45:66].lower().replace('d', 'e'))
                                    file_ver = "Version 3.04" 
                                    used_file = file_path
                                            # clock_bias_stddev = float(columns[10].lower().replace('d', 'e')) if len(columns) > 10 else np.nan
                                            # clock_rate = float(columns[11].lower().replace('d', 'e')) if len(columns) > 11 else np.nan
                                            # clock_rate_stddev = float(columns[12].lower().replace('d', 'e')) if len(columns) > 12 else np.nan
                                            # clock_acceleration = float(columns[13].lower().replace('d', 'e')) if len(columns) > 13 else np.nan
                                            # clock_acceleration_stddev = float(columns[14].lower().replace('d', 'e')) if len(columns) > 14 else np.nan
                                            
                                    row = {
                                        'Clock Data Type': clock_data_type,
                                        'SV Name': sv_name,
                                        'Epoch Year': epoch_year,
                                        'Epoch Month': epoch_month,
                                        'Epoch Day': epoch_day,
                                        'Epoch Hour': epoch_hour,
                                        'Epoch Minute': epoch_minute,
                                        'Epoch Second': epoch_second,
                                        'Number of Data Values to Follow': num_data_value,
                                        'Clock Bias (seconds)': clock_bias,
                                        'Version': file_ver,
                                        'File': file_path,
                                                    # 'Clock Bias StdDev (seconds)': clock_bias_stddev,
                                                    # 'Clock Rate (dimensionless)': clock_rate,
                                                    # 'Clock Rate StdDev (dimensionless)': clock_rate_stddev,
                                                    # 'Clock Acceleration (per second)': clock_acceleration,
                                                    # 'Clock Acceleration StdDev (per second)': clock_acceleration_stddev,
                                                    }
                                    data.append(row)
    
                            # https://files.igs.org/pub/data/format/rinex_clock300.txt
                            elif is_not_version_3:   
                                if line[3:8].startswith('G'):
                                    clock_data_type = line[0:3]
                                    sv_name = line[3:8]
                                    epoch_year = int(line[8:12])
                                    epoch_month = int(line[12:15])
                                    epoch_day = int(line[15:18])
                                    epoch_hour = int(line[18:21])
                                    epoch_minute = int(line[21:24])
                                    epoch_second = float(line[24:34])
                                    num_data_value = int(line[34:40])
                                    clock_bias = float(line[40:59].lower().replace('d', 'e'))
                                    file_ver = "NOT version 3.04" 
                                    used_file = file_path
                                            # clock_bias_stddev = float(columns[10].lower().replace('d', 'e')) if len(columns) > 10 else np.nan
                                            # clock_rate = float(columns[11].lower().replace('d', 'e')) if len(columns) > 11 else np.nan
                                            # clock_rate_stddev = float(columns[12].lower().replace('d', 'e')) if len(columns) > 12 else np.nan
                                            # clock_acceleration = float(columns[13].lower().replace('d', 'e')) if len(columns) > 13 else np.nan
                                            # clock_acceleration_stddev = float(columns[14].lower().replace('d', 'e')) if len(columns) > 14 else np.nan
                                            
                                    row = {
                                        'Clock Data Type': clock_data_type,
                                        'SV Name': sv_name,
                                        'Epoch Year': epoch_year,
                                        'Epoch Month': epoch_month,
                                        'Epoch Day': epoch_day,
                                        'Epoch Hour': epoch_hour,
                                        'Epoch Minute': epoch_minute,
                                        'Epoch Second': epoch_second,
                                        'Number of Data Values to Follow': num_data_value,
                                        'Clock Bias (seconds)': clock_bias,
                                        'Version': file_ver,
                                        'File': file_path,
                                                    # 'Clock Bias StdDev (seconds)': clock_bias_stddev,
                                                    # 'Clock Rate (dimensionless)': clock_rate,
                                                    # 'Clock Rate StdDev (dimensionless)': clock_rate_stddev,
                                                    # 'Clock Acceleration (per second)': clock_acceleration,
                                                    # 'Clock Acceleration StdDev (per second)': clock_acceleration_stddev,
                                                    }
                                    data.append(row)

            print(f"Done reading file {i + 1} out of {len(clk_files)}")
        
        sv_names = [entry['SV Name'] for entry in data]
        print(f"SV Names for GPS week {week} have been saved.")
        
        epoch_years = [entry['Epoch Year'] for entry in data]
        print(f"Epoch Years for GPS week {week} have been saved.")
        
        epoch_months = [entry['Epoch Month'] for entry in data]
        print(f"Epoch Months for GPS week {week} have been saved.")
        
        epoch_days = [entry['Epoch Day'] for entry in data]
        print(f"Epoch Days for GPS week {week} have been saved.")
        
        epoch_hours = [entry['Epoch Hour'] for entry in data]
        print(f"Epoch Hours for GPS week {week} have been saved.")
        
        epoch_minutes = [entry['Epoch Minute'] for entry in data]
        print(f"Epoch Minutes for GPS week {week} have been saved.")
        
        epoch_seconds = [entry['Epoch Second'] for entry in data]
        print(f"Epoch Seconds for GPS week {week} have been saved.")
        
        clock_biases = [entry['Clock Bias (seconds)'] for entry in data]
        print(f"Clock biases (seconds) for GPS week {week} have been saved.")

        ver_names = [entry['Version'] for entry in data]
        print(f"Versions for GPS week {week} have been saved.")

        file_names = [entry['File'] for entry in data]
        print(f"Filepaths for GPS week {week} have been saved.")

        combined = list(zip(sv_names,epoch_years,epoch_months,epoch_days,epoch_hours,epoch_minutes,epoch_seconds,clock_biases, ver_names, file_names))
        combined_sorted = sorted(combined, key=lambda x: (x[1], x[2], x[3], x[4], x[5], x[6]))

        sv_names_com,epoch_years_com,epoch_months_com,epoch_days_com,epoch_hours_com,epoch_minutes_com,epoch_seconds_com,clock_biases_com, ver_names_com, file_names_com = zip(*combined_sorted)

        sv_names_com = list(sv_names_com)
        epoch_years_com = list(epoch_years_com)
        epoch_months_com = list(epoch_months_com)
        epoch_days_com = list(epoch_days_com)
        epoch_hours_com = list(epoch_hours_com)
        epoch_minutes_com = list(epoch_minutes_com)
        epoch_seconds_com = list(epoch_seconds_com)
        clock_biases_com = list(clock_biases_com)
        ver_names_com = list(ver_names_com)
        file_names_com = list(file_names_com)

        print(f"Data is sorted by ascending epoch for GPS week {week} has been completed.")

        np.savez(f'gps_{week}.npz', satellite=sv_names_com, yyyy=epoch_years_com, mm=epoch_months_com, dd=epoch_days_com, hh=epoch_hours_com, mi=epoch_minutes_com, ss=epoch_seconds_com, clock_bias=clock_biases_com, ver = ver_names_com, files = file_names_com)
        print(f"npz file for GPS week {week} has been created.")

        print(f"Data collection for GPS week {week} has been completed.")
                    
    except Exception as e:
        print(f"Error fetching data for week {week}: {e}")

In [14]:
%%time
fetch_clks(2035)

Done reading file 1 out of 25
Done reading file 2 out of 25
Done reading file 3 out of 25
Done reading file 4 out of 25
Done reading file 5 out of 25
Done reading file 6 out of 25
Done reading file 7 out of 25
Done reading file 8 out of 25
Done reading file 9 out of 25
Done reading file 10 out of 25
Done reading file 11 out of 25
Done reading file 12 out of 25
Done reading file 13 out of 25
Done reading file 14 out of 25
Done reading file 15 out of 25
Done reading file 16 out of 25
Done reading file 17 out of 25
Done reading file 18 out of 25
Done reading file 19 out of 25
Done reading file 20 out of 25
Done reading file 21 out of 25
Done reading file 22 out of 25
Done reading file 23 out of 25
Done reading file 24 out of 25
Done reading file 25 out of 25
SV Names for GPS week 2035 have been saved.
Epoch Years for GPS week 2035 have been saved.
Epoch Months for GPS week 2035 have been saved.
Epoch Days for GPS week 2035 have been saved.
Epoch Hours for GPS week 2035 have been saved.
Ep