# Get NetCDF Files
This notebook will retrieve random vipir data from ftp site: 'ftp.ngdc.noaa.gov/ionosonde/mids11'

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ftplib import FTP
import re

from netCDF4 import Dataset

In [2]:
%matplotlib inline

## FTP site params

In [3]:
ftpsite ='ftp.ngdc.noaa.gov'
ftpdir = '/ionosonde/mids11/WI937/individual/2019/306/image'
ftpstndir = '/ionosonde/mids11'

## Station List

In [4]:
#ftp://ftp.ngdc.noaa.gov//ionosonde/mids11/SAA0K/individual/2019/252/image
#ftp://ftp.ngdc.noaa.gov/ionosonde/mids11/SAA0K/individual/2019/252/ionogram/

def get_station_list(ftpserver = 'ftp.ngdc.noaa.gov',rootdir = '/ionosonde/data'):
    
    with FTP(ftpserver) as ftp:
        ftp.login()
        ftp.cwd(rootdir)
        stn_list = [f for f in ftp.nlst() if re.search('[0-9A-Z]{5}',f)]        
    
    return stn_list

In [5]:
stations = get_station_list()

In [6]:
len(stations)

156

## Get the last ionograms from each station

In [7]:
# TODO: make sure the directory lists are sorted in order to take the most recent entry
def get_ionogram_ngi(ftp, stn, rootdir = '/ionosonde/data', datadir='individual'):
    
    try:
        file_list={}
        ftp.cwd(f'{rootdir}/{stn}/{datadir}')
        yrs = ftp.nlst()
        
        year = yrs[-1] # year of most recent data
    
        daysdir = f'{rootdir}/{stn}/{datadir}/{year}'
        ftp.cwd(daysdir)
        days = [d for d in ftp.nlst() if re.search('[0-9]{3}',d)]
        if len(days) == 0: #no observation days
            day = '999' #invalid day
        else:
            day = days[-1] # most recent days' data (assume nlst returns them that way)

        daydir = f'{daysdir}/{day}/ionogram'
        #print(f'daydir: {daydir}')
        
 
        ftp.cwd(daydir)
        file_list[f'{year}-{day}'] = ftp.nlst()
    except:
        file_list[f'9999-999'] = []
    
    return file_list

In [8]:
with FTP(ftpsite) as ftp:
    ftp.login()
    vipir_list = {}
    for stn in stations:
        vipir_list[stn] = get_ionogram_ngi(ftp, stn)

## Summarize the Station Info

In [10]:
def get_obs_type(fname):
    """
    returns the extension from the file name supplied
    """
    comps = fname.split('.')

    ftype = 'Unknown' if len(comps)==1 or comps[1] == '' else comps[1]
        
    return ftype

In [11]:
def get_summary(vipir, stn):
    last_obs_date = list(vipir[stn].keys())[-1]
    n_data_files =  len(vipir[stn][last_obs_date])
    f_types = ','.join(set([get_obs_type(fn) for fn in vipir[stn][last_obs_date]]))
    
    return stn, last_obs_date, n_data_files, f_types
    

In [12]:
station_summary = pd.DataFrame([get_summary(vipir_list,stn) for stn in stations],
                               columns=['StationName','LastObsDate','NObservations','ObsTypes'])

In [13]:
station_summary

Unnamed: 0,StationName,LastObsDate,NObservations,ObsTypes
0,09429,9999-999,0,
1,AA109,9999-999,0,
2,AC843,9999-999,0,
3,AD651,9999-999,0,
4,AD930,9999-999,0,
...,...,...,...,...
151,WI937,2020-158,597,NGI
152,WK546,9999-999,0,
153,WP937,9999-999,0,
154,YA462,9999-999,0,


## Stations with NGI files

In [14]:
station_summary.query('ObsTypes == \'NGI\'')

Unnamed: 0,StationName,LastObsDate,NObservations,ObsTypes
71,IV437,2020-158,399,NGI
77,JV433,2020-158,479,NGI
127,SJJ18,2020-158,459,NGI
151,WI937,2020-158,597,NGI


## Other available File Types

In [15]:
station_summary.ObsTypes.value_counts()

       127
RSF     20
MMM      4
NGI      4
SBF      1
Name: ObsTypes, dtype: int64

## Get Some NGI files

want to get a ranom mix of 2019 and 2020 files. We'll assume that the stations have data for those years

In [16]:
stations_ngi = station_summary.query('ObsTypes==\'NGI\'').StationName

In [17]:
stations_ngi

71     IV437
77     JV433
127    SJJ18
151    WI937
Name: StationName, dtype: object

In [18]:
def get_station_daylist(ftp, stn, years=['2019', '2020'], rootdir = '/ionosonde/data', datadir='individual'):
    ftp.cwd(f'{rootdir}/{stn}/{datadir}')
    flist = []
    for yr in years:
        #print(f'Year: {yr}')
        dlist = [f for f in ftp.nlst(yr) if re.search('[0-9]{3}$',f)]
        # go through each of the days to see if there are any *.NGI files in the day directory
        for d in dlist:
            ddir = f'{rootdir}/{stn}/{datadir}/{d}/ionogram'
            #print(ddir)
            #ftp.cwd(ddir)
            cdfs = [f for f in ftp.nlst(ddir) if re.search('.*\.NGI$',f)]
            if len(cdfs) >0:
                flist.append(d)


    return flist


In [19]:
with FTP(ftpsite) as ftp:
    ftp.login()
    flist = {}
    for stn in stations_ngi:
        flist[stn] = get_station_daylist(ftp, stn)

In [24]:
#number of days for which each station has data
for stn in flist:
    print(f'Station: {stn}, number of days\' data: {len(flist[stn])}')

Station: IV437, number of days' data: 72
Station: JV433, number of days' data: 57
Station: SJJ18, number of days' data: 70
Station: WI937, number of days' data: 99


In [74]:
np.random.seed(1234)

In [75]:
#ftp://ftp.ngdc.noaa.gov/ionosonde/data/WI937/individual/2019/356/ionogram/
rootdir = '/ionosonde/data'
datadir='individual'
n_each = 50
with FTP(ftpsite) as ftp:
    ftp.login()
    for stn in flist:
        daylist = np.random.choice(flist[stn],n_each,replace=False)
        for d in daylist:
            ddir = f'{rootdir}/{stn}/{datadir}/{d}/ionogram'
            ftp.cwd(ddir)
            cdfs = [f for f in ftp.nlst() if re.search('.*\.NGI$',f)]
            if len(cdfs)>0:
                cdf = np.random.choice(cdfs, 1)[0]
            else:
                cdf = 'No File'
            print(f'Directory: {ddir}, File: {cdf}')
            with open(f'netcdf/{cdf}','wb') as fout:
                ftp.retrbinary(f'RETR {ddir}/{cdf}', fout.write)




Directory: /ionosonde/data/IV437/individual/2019/308/ionogram, File: IV437_2019308194615.NGI
Directory: /ionosonde/data/IV437/individual/2019/316/ionogram, File: IV437_2019316181500.NGI
Directory: /ionosonde/data/IV437/individual/2019/344/ionogram, File: IV437_2019344150000.NGI
Directory: /ionosonde/data/IV437/individual/2019/336/ionogram, File: IV437_2019336100115.NGI
Directory: /ionosonde/data/IV437/individual/2019/287/ionogram, File: IV437_2019287214500.NGI
Directory: /ionosonde/data/IV437/individual/2020/015/ionogram, File: IV437_2020015170115.NGI
Directory: /ionosonde/data/IV437/individual/2020/018/ionogram, File: IV437_2020018124730.NGI
Directory: /ionosonde/data/IV437/individual/2019/352/ionogram, File: IV437_2019352114605.NGI
Directory: /ionosonde/data/IV437/individual/2019/132/ionogram, File: IV437_2019132233000.NGI
Directory: /ionosonde/data/IV437/individual/2019/357/ionogram, File: IV437_2019357071605.NGI
Directory: /ionosonde/data/IV437/individual/2019/323/ionogram, File: I

In [73]:
!ls -l netcdf


total 131124
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus 17659636 Jan 21 00:59 IV437_2019068040840.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus 17659636 Jan 21 00:59 IV437_2019093173858.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus 17659636 Jan 21 00:59 IV437_2020017073000.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus 17659632 Jan 21 00:59 JV433_2019349051730.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus 17659632 Jan 21 00:59 JV433_2020007195845.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus 17659632 Jan 21 00:59 JV433_2020016174845.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus  4087004 Jan 21 00:59 SJJ18_2019208095801.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus  4087004 Jan 21 00:59 SJJ18_2019209193801.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus  4087004 Jan 21 00:59 SJJ18_2019315203601.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_leo_mcmanus  8036440 Jan 21 00:59 WI937_2019192024902.NGI
-rw-rw-r-- 1 kevin_leo_mcmanus kevin_