# Get NetCDF Files
This notebook will retrieve random vipir data from ftp site: 'ftp.ngdc.noaa.gov/ionosonde/mids11'

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ftplib import FTP
import re

from netCDF4 import Dataset

In [2]:
%matplotlib inline


## FTP site params

In [3]:
ftpsite ='ftp.ngdc.noaa.gov'
ftpdir = '/ionosonde/mids11/WI937/individual/2019/306/image'
ftpstndir = '/ionosonde/mids11'

## Station List

In [4]:
#ftp://ftp.ngdc.noaa.gov//ionosonde/mids11/SAA0K/individual/2019/252/image
#ftp://ftp.ngdc.noaa.gov/ionosonde/mids11/SAA0K/individual/2019/252/ionogram/

def get_station_list(ftpserver = 'ftp.ngdc.noaa.gov',rootdir = '/ionosonde/mids11'):
    
    with FTP(ftpserver) as ftp:
        ftp.login()
        ftp.cwd(rootdir)
        stn_list = [f for f in ftp.nlst() if re.search('[0-9A-Z]{5}',f)]        
    
    return stn_list

In [5]:
stations = get_station_list()

In [6]:
len(stations)

98

## Get the last ionograms from each station

In [7]:
# TODO: make sure the directory lists are sorted in order to take the most recent entry
def get_ionogram_ngi(ftp, stn, rootdir = '/ionosonde/mids11', datadir='individual'):
    
    ftp.cwd(f'{rootdir}/{stn}/{datadir}')
    yrs = ftp.nlst()
    file_list={}
    year = yrs[-1] # year of most recent data
   
    daysdir = f'{rootdir}/{stn}/{datadir}/{year}'
    ftp.cwd(daysdir)
    days = [d for d in ftp.nlst() if re.search('[0-9]{3}',d)]
    if len(days) == 0: #no observation days
        day = '999' #invalid day
    else:
        day = days[-1] # most recent days' data (assume nlst returns them that way)

    daydir = f'{daysdir}/{day}/ionogram'
    #print(f'daydir: {daydir}')
    
    try:
        ftp.cwd(daydir)
        file_list[f'{year}-{day}'] = ftp.nlst()
    except:
        file_list[f'{year}-{day}'] = []
    
    return file_list

In [8]:
with FTP(ftpsite) as ftp:
    ftp.login()
    vipir_list = {}
    for stn in stations:
        vipir_list[stn] = get_ionogram_ngi(ftp, stn)

## Summarize the Station Info

In [9]:
def get_obs_type(fname):
    """
    returns the extension from the file name supplied
    """
    comps = fname.split('.')

    ftype = 'Unknown' if len(comps)==1 or comps[1] == '' else comps[1]
        
    return ftype

In [10]:
def get_summary(vipir, stn):
    last_obs_date = list(vipir[stn].keys())[-1]
    n_data_files =  len(vipir[stn][last_obs_date])
    f_types = ','.join(set([get_obs_type(fn) for fn in vipir[stn][last_obs_date]]))
    
    return stn, last_obs_date, n_data_files, f_types
    

In [11]:
station_summary = pd.DataFrame([get_summary(vipir_list,stn) for stn in stations],
                               columns=['StationName','LastObsDate','NObservations','ObsTypes'])

In [12]:
station_summary

Unnamed: 0,StationName,LastObsDate,NObservations,ObsTypes
0,09429,2019-365,0,
1,AD930,2019-999,0,
2,AL945,2019-365,0,
3,AS00Q,2019-365,96,RSF
4,AS237,2019-264,0,
...,...,...,...,...
93,VT139,2019-365,96,RSF
94,WI937,2019-365,720,NGI
95,WK546,2019-365,0,
96,WP937,2019-365,288,MMM


## Stations with NGI files

In [13]:
station_summary.query('ObsTypes == \'NGI\'')

Unnamed: 0,StationName,LastObsDate,NObservations,ObsTypes
43,IV437,2019-365,960,NGI
48,JV433,2019-365,576,NGI
82,SJJ18,2019-365,482,NGI
94,WI937,2019-365,720,NGI


## Other available File Types

In [14]:
station_summary.ObsTypes.value_counts()

       69
RSF    18
MMM     6
NGI     4
SBF     1
Name: ObsTypes, dtype: int64