# Python code to download the NOAA NCEP NCAR daily reanalysis 

In [None]:
mode = 'download'
# mode = 'update'

In [None]:
import sys

In [None]:
print(sys.executable)

In [None]:
import pathlib
import ftplib

In [None]:
import numpy as np 
import pandas as pd 
import xarray as xr

### set the local path where the data will be downloaded 

### output directory 

In [None]:
CWD = pathlib.Path.cwd()

In [None]:
opath = CWD.parents[1] / 'data' / 'NCEP_NCAR' / 'daily'

In [None]:
if not opath.exists(): 
    opath.mkdir(parents=True) 

### set the URL and the location of the datasets on the NOAA server 

In [None]:
url = 'ftp2.psl.noaa.gov'

In [None]:
folder = 'Datasets/ncep.reanalysis.dailyavgs/pressure'

### set the domain for extraction 

In [None]:
latN = 15
latS = -60
lonW = 155
lonE = 235

level = 1000

### Now connect, get the list of files, and download either the whole dataset or the last file 

In [None]:
with ftplib.FTP(url) as ftp: 
    
    # login 
    ftp.login()
    
    # move into the datasets directory 
    ftp.cwd(folder)
    
    # get the list of files
    filenames = ftp.nlst()
    
    # select the geopotential files 
    filenames = [f for f in filenames if 'hgt' in f]
    
    # if download mode, we download everything (the whole dataset)
    if mode == 'download': 
        for filename in filenames: 
            if opath.joinpath(filename).exists(): 
                print(f"{filename} already downloaded and extracted in {str(local_path)}, skipping to next file")
            else:
                with open(opath.joinpath(filename), 'wb') as f:
                    ftp.retrbinary('RETR ' + filename, f.write)
                if not opath.joinpath(filename).exists(): 
                    print(f"download failed for {filename}")
                else:
                    print(f"{filename} successfully downloaded in {str(opath)}, now extracting domain")
                    
                    # open the file 
                    dset = xr.open_dataset(opath.joinpath(filename))
                    
                    # extract the domain and the level, latN first as goes from N to S 
                    dset = dset.sel(lon=slice(lonW, lonE), lat=slice(latN, latS), level=level)

                    # remove singleton dimension 
                    dset = dset.squeeze()
            
                    # flip the latitudes to go from S to N
                    dset = dset.sortby('lat')
                    
                    # remove the file 
                    opath.joinpath(filename).unlink() 
                    
                    # saves the extracted domain 
                    dset.to_netcdf(opath.joinpath(filename))
                    
                    # close the dataset 
                    dset.close()
                    
                    if opath.joinpath(filename).exists(): 
                        print(f"successfully extracted domain for {str(opath.joinpath(filename))}")

    # if update mode, we only download the last file ... 
    
    elif mode == 'update':
        filename = filenames[-1]
        with open(opath.joinpath(filename), 'wb') as f:
            ftp.retrbinary('RETR ' + filename, f.write)
        if not opath.joinpath(filename).exists(): 
            print(f"download failed for {filename}")
        else:
            print(f"{filename} successfully downloaded in {str(opath)}, now extracting domain")

             # open the file 
            dset = xr.open_dataset(opath.joinpath(filename))
            
            # extract the domain and the level, latN first as goes from N to S
            dset = dset.sel(lon=slice(lonW, lonE), lat=slice(latN, latS), level=level)
            
            # remove singleton dimension 
            dset = dset.squeeze()
            
            # flip the latitudes to go from S to N
            dset = dset.sortby('lat')

            # remove the file 
            opath.joinpath(filename).unlink() 
            
            # saves the extracted domain 
            dset.to_netcdf(opath.joinpath(filename))

            # close the dataset 
            dset.close()
            
            if opath.joinpath(filename).exists(): 
                print(f"successfully extracted domain for {str(opath.joinpath(filename))}")
                
        