# Scrape GOCCP data from FTP site
- Get 2D phase and cloud cover data for the period 06-09 - 05.13

In [224]:
import sys
import time
# Add common resources folder to path
sys.path.append("/mnt/mcc-ns9600k/jonahks/git_repos/netcdf_analysis/Common/")
sys.path.append("/mnt/mcc-ns9600k/jonahks/git_repos/netcdf_analysis/")

from imports import (
    pd, np, xr, mpl, plt, sns, os, 
    datetime, sys, crt, gridspec,
    polyfit, ccrs, LinearRegression, metrics
    )

from functions import (
    masked_average, interpretNS, plot_slf_isotherms, 
    add_weights, process_caliop, process_for_slf,
    noresm_slf_to_df, regress_1d
    )

from ftplib import FTP # note this import here

%matplotlib inline

In [252]:
host = os.uname()[1]
if 'jupyter' in host.split('-'): # Check if running on NIRD through the Jupyter Hub
    print('Running through MC2 Jupyter Hub')
    model_dir = '/mnt/mcc-ns9600k/jonahks/'
    os.chdir(model_dir)

else:  # Assume that we're running on a local machine and mounting NIRD
    print('Running on %s, attempting to mount ns9600k/jonahks/ from NIRD' % str(host))
    os.system('fusermount -zu ~/drivemount/')  # unmount first
    os.system('sshfs jonahks@login.nird.sigma2.no:"p/jonahks/" ~/drivemount/')    # Calling mountnird from .bashrc doesn't work
    os.chdir('/home/jonahks/drivemount/')
    save_dir = '~/DATAOUT/'
    save_to = os.path.expanduser(save_dir)

output_dir = 'GOCCP_data/2Ddata'
case_dir = 'mnth15runs/'   # inconsistent label compared to jupy_test
mods_dir = 'inp_validation/'

# Check that each important directory can be accessed:    
access_paths = os.path.exists(mods_dir) and os.path.exists(output_dir) and os.path.exists(model_dir)
print('Can access all directory paths:', access_paths)

os.chdir(output_dir) # Move to where files will be saved and organized

Running through MC2 Jupyter Hub
Can access all directory paths: True


## Function for getting the month of a file on this server

In [227]:
def getmonth(filename, year):
    parts = filename.split('_')
    date = [x for x in parts if year in x] # grab date regardless of position
    
    if len(date) > 1:
        print('Ambiguous naming of file: %s \n Cannot determine month.' % filename)
        return None
    else: 
        stamp = date[0]
        month = stamp[-2:]
        
        return month

In [269]:
# FTP download function from https://stackoverflow.com/questions/11573817/how-to-download-a-file-via-ftp-with-python-ftplib

# This function will redownload and replace if there is a previously existing file of the same name.
def download(filename):
    print("Downloading=> %s" % filename)
    
    with open(filename,'wb') as f:
        ftp.retrbinary('RETR ' + filename,f.write)
    
    #fhandle = open(filename, 'wb')
    #ftp.retrbinary('RETR ' + filename, fhandle.write)
    fhandle.close()
    
    print("Download Succesful")
    

In [276]:
ftp.retrlines('LIST')

drwxr-xr-x   3 ftp      ftp          4096 Nov 29 16:21 .
drwxr-xr-x   5 ftp      ftp          4096 Oct 26  2018 ..
drwxr-xr-x   2 ftp      ftp        159744 Nov 29 16:14 daily
-rw-r--r--   1 ftp      ftp        201376 Jan 10  2019 MapHigh330m_201801_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201024 Jan 10  2019 MapHigh330m_201802_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201376 Jan 10  2019 MapHigh330m_201803_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201376 Nov 29 15:54 MapHigh330m_201804_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201256 Nov 29 15:59 MapHigh330m_201805_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201376 Nov 29 16:02 MapHigh330m_201806_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201492 Nov 29 16:06 MapHigh330m_201807_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201492 Nov 29 16:10 MapHigh330m_201808_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        19997

'226 TÃ©lÃ©chargement terminÃ©'

In [275]:
ftp.cwd('avg')

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'

In [277]:
download('MapHigh330m_201801_avg_CFMIP2_sat_3.1.2.nc')

Downloading=> MapHigh330m_201801_avg_CFMIP2_sat_3.1.2.nc


NameError: name 'fhandle' is not defined

In [248]:
# ftp.retrlines('LIST') # print files and directories in current directory, useful for navigating around with cwd

## FTP initializing

Following: https://www.pythonforbeginners.com/code-snippets-source-code/how-to-use-ftp-in-python

In [228]:
# Site is: ftp://ftp.climserv.ipsl.polytechnique.fr/cfmip/GOCCP_v3/

ftp = FTP('ftp.climserv.ipsl.polytechnique.fr')   # connect to host, default port

ftp.login()               # user anonymous, passwd anonymous@

#ftp.retrlines('LIST')     # list directory contents 

'230 User anonymous logged in.'

## Specify dates and strings for identifying file types.

In [265]:
#startdate = '06-2009'
startdate = '06-2006'
#enddate = '05-2013'
enddate = '08-2007'

# these strings will be used to select files, so make sure they are unique to your data
filetypes = ['MapLowMidHigh330m', 'MapLowMidHigh_Phase330m']

start_sep = np.array(startdate.split('-')).astype('int')
end_sep = np.array(enddate.split('-')).astype('int')

years = np.arange(start_sep[-1], end_sep[-1]+1) # Add one to the end to make the interval inclusive
first_month = start_sep[0]
last_month = end_sep[0]

In [261]:
os.listdir()
yr = '2020'
if not os.path.exists(yr):
    os.makedirs(yr)
    
os.chdir(yr)
print(os.listdir())
os.chdir('../')
print(os.listdir())


[]
['2020']


# Main "for" loop

Current issue. Permissions are not changed to the user when the file is downloaded...

In [267]:
# Navigate to the base folder for use. Here I'm just getting 2D data for CLDTOT variables.

ftp.cwd("/cfmip/GOCCP_v3/2D_Maps/grid_2x2xL40")

allscrapes = []

for i, yr in enumerate(years):
    
    # store by year to reduce clutter, create directory if it doesn't already exist and move into it
    if not os.path.exists(str(yr)): 
        os.makedirs(str(yr))
    os.chdir(str(yr))    
    
    try:
        ftp.cwd("%s/avg/" % str(yr)) # go to where monthly averages are
        files = ftp.nlst() # list files
        
        toscrape = [] # list of files to grab
        for var in filetypes:
            _temp = [x for x in files if var in x] # select only files of the variable type of interest
            toscrape = toscrape + _temp

        toscrape.sort()

        # check edge cases to remove months before the startdate
        if i == 0: # first year
            _temp_scrape = []
            for j in toscrape:
                _mnth = np.int(getmonth(j, str(yr)))
                if (_mnth >= first_month): # remove months outside of the intended period
                    _temp_scrape.append(j)
            
            toscrape = _temp_scrape
            
        if i == len(years)-1:  # last year
            _temp_scrape = []
            for j in toscrape:
                _mnth = np.int(getmonth(j, str(yr)))
                if (_mnth <= last_month): # remove months outside of the intended period
                    _temp_scrape.append(j)
        
            toscrape = _temp_scrape
        
        toscrape.sort()
        for selected in toscrape:
            download(selected)
        
        allscrapes = allscrapes + toscrape # not necessary
        
        ftp.cwd("../../") # return to the outer directory
        
        
    except:
        print("Didn't work for %s" % i)
    
    # Back out to the nesting dir so that next the loop is not buried in successive layers...
    os.chdir('../')    

Downloading=> MapLowMidHigh330m_200606_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200607_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200608_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200609_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200610_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200611_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200612_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200606_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200607_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200608_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200609_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200610_avg_CFMIP2_sat_3.1.2.nc
Downl

In [77]:
files = ftp.nlst()

files

['..', 'night', '.', 'avg', 'day']