# Get NetCDF Files
This notebook will retrieve random vipir data from ftp site: 'ftp.ngdc.noaa.gov/ionosonde/mids11'

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ftplib import FTP
import re

from netCDF4 import Dataset

In [2]:
%matplotlib inline


## FTP site params

In [3]:
ftpsite ='ftp.ngdc.noaa.gov'
ftpdir = '/ionosonde/mids11/WI937/individual/2019/306/image'
ftpstndir = '/ionosonde/mids11'

## Station List

In [7]:
#ftp://ftp.ngdc.noaa.gov//ionosonde/mids11/SAA0K/individual/2019/252/image
#ftp://ftp.ngdc.noaa.gov/ionosonde/mids11/SAA0K/individual/2019/252/ionogram/

def get_station_list(ftpserver = 'ftp.ngdc.noaa.gov',rootdir = '/ionosonde/data'):
    
    with FTP(ftpserver) as ftp:
        ftp.login()
        ftp.cwd(rootdir)
        stn_list = [f for f in ftp.nlst() if re.search('[0-9A-Z]{5}',f)]        
    
    return stn_list

In [8]:
stations = get_station_list()

In [9]:
len(stations)

156

## Get the last ionograms from each station

In [19]:
# TODO: make sure the directory lists are sorted in order to take the most recent entry
def get_ionogram_ngi(ftp, stn, rootdir = '/ionosonde/data', datadir='individual'):
    
    try:
        file_list={}
        ftp.cwd(f'{rootdir}/{stn}/{datadir}')
        yrs = ftp.nlst()
        
        year = yrs[-1] # year of most recent data
    
        daysdir = f'{rootdir}/{stn}/{datadir}/{year}'
        ftp.cwd(daysdir)
        days = [d for d in ftp.nlst() if re.search('[0-9]{3}',d)]
        if len(days) == 0: #no observation days
            day = '999' #invalid day
        else:
            day = days[-1] # most recent days' data (assume nlst returns them that way)

        daydir = f'{daysdir}/{day}/ionogram'
        print(f'daydir: {daydir}')
        
 
        ftp.cwd(daydir)
        file_list[f'{year}-{day}'] = ftp.nlst()
    except:
        file_list[f'9999-999'] = []
    
    return file_list

In [20]:
with FTP(ftpsite) as ftp:
    ftp.login()
    vipir_list = {}
    for stn in stations:
        vipir_list[stn] = get_ionogram_ngi(ftp, stn)

daydir: /ionosonde/data/09429/individual/2020/999/ionogram
daydir: /ionosonde/data/AA109/individual/2014/210/ionogram
daydir: /ionosonde/data/AC843/individual/2013/999/ionogram
daydir: /ionosonde/data/AD930/individual/2020/999/ionogram
daydir: /ionosonde/data/AFJ49/individual/2012/999/ionogram
daydir: /ionosonde/data/AH223/individual/2018/999/ionogram
daydir: /ionosonde/data/AL945/individual/2020/019/ionogram
daydir: /ionosonde/data/AM269/individual/2018/999/ionogram
daydir: /ionosonde/data/AN438/individual/2012/999/ionogram
daydir: /ionosonde/data/AS00Q/individual/2020/018/ionogram
daydir: /ionosonde/data/AS237/individual/2020/999/ionogram
daydir: /ionosonde/data/AT138/individual/2020/019/ionogram
daydir: /ionosonde/data/AU930/individual/2020/019/ionogram
daydir: /ionosonde/data/BBJ3R/individual/2020/019/ionogram
daydir: /ionosonde/data/BC840/individual/2020/019/ionogram
daydir: /ionosonde/data/BD840/individual/2018/999/ionogram
daydir: /ionosonde/data/BJJ32/individual/2012/999/ionogr

## Summarize the Station Info

In [21]:
def get_obs_type(fname):
    """
    returns the extension from the file name supplied
    """
    comps = fname.split('.')

    ftype = 'Unknown' if len(comps)==1 or comps[1] == '' else comps[1]
        
    return ftype

In [22]:
def get_summary(vipir, stn):
    last_obs_date = list(vipir[stn].keys())[-1]
    n_data_files =  len(vipir[stn][last_obs_date])
    f_types = ','.join(set([get_obs_type(fn) for fn in vipir[stn][last_obs_date]]))
    
    return stn, last_obs_date, n_data_files, f_types
    

In [23]:
station_summary = pd.DataFrame([get_summary(vipir_list,stn) for stn in stations],
                               columns=['StationName','LastObsDate','NObservations','ObsTypes'])

In [24]:
station_summary

Unnamed: 0,StationName,LastObsDate,NObservations,ObsTypes
0,09429,9999-999,0,
1,AA109,9999-999,0,
2,AC843,9999-999,0,
3,AD651,9999-999,0,
4,AD930,9999-999,0,
...,...,...,...,...
151,WI937,2020-019,13,NGI
152,WK546,9999-999,0,
153,WP937,2020-103,1,MMM
154,YA462,9999-999,0,


## Stations with NGI files

In [25]:
station_summary.query('ObsTypes == \'NGI\'')

Unnamed: 0,StationName,LastObsDate,NObservations,ObsTypes
71,IV437,2020-019,17,NGI
77,JV433,2020-019,11,NGI
127,SJJ18,2020-019,9,NGI
151,WI937,2020-019,13,NGI


## Other available File Types

In [26]:
station_summary.ObsTypes.value_counts()

       135
RSF     11
MMM      5
NGI      4
SBF      1
Name: ObsTypes, dtype: int64

## Get Some NGI files

want to get a ranom mix of 2019 and 2020 files. We'll assume that the stations have data for those years

In [29]:
stations_ngi = station_summary.query('ObsTypes==\'NGI\'').StationName

In [30]:
stations_ngi

71     IV437
77     JV433
127    SJJ18
151    WI937
Name: StationName, dtype: object