# Scrape GOCCP data from FTP site
- Get 2D phase and cloud cover data for the period 06-09 - 05.13

In [7]:
import sys
import time
# Add common resources folder to path
sys.path.append("/mnt/mcc-ns9600k/jonahks/git_repos/netcdf_analysis/Common/")
sys.path.append("/mnt/mcc-ns9600k/jonahks/git_repos/netcdf_analysis/")
sys.path.append("/home/jonahks/git_repos/netcdf_analysis/")
sys.path.append("/home/jonahks/git_repos/netcdf_analysis/Common/")

from imports import (
    pd, np, xr, mpl, plt, sns, os, 
    datetime, sys, crt, gridspec,
    polyfit, ccrs, LinearRegression, metrics
    )

from functions import (
    masked_average, interpretNS, plot_slf_isotherms, 
    add_weights, process_caliop, process_for_slf,
    noresm_slf_to_df, regress_1d
    )

from ftplib import FTP # note this import here

%matplotlib inline

In [10]:
host = os.uname()[1]
if 'jupyter' in host.split('-'): # Check if running on NIRD through the Jupyter Hub
    print('Running through MC2 Jupyter Hub')
    model_dir = '/mnt/mcc-ns9600k/jonahks/'
    os.chdir(model_dir)

else:  # Assume that we're running on a local machine and mounting NIRD
    print('Running on %s, attempting to mount ns9600k/jonahks/ from NIRD' % str(host))
    os.system('fusermount -zu ~/drivemount/')  # unmount first
    os.system('sshfs jonahks@login.nird.sigma2.no:"p/jonahks/" ~/drivemount/')    # Calling mountnird from .bashrc doesn't work
    model_dir = '/home/jonahks/drivemount/'
    os.chdir(model_dir)
    save_dir = '~/DATAOUT/'
    save_to = os.path.expanduser(save_dir)

output_dir = 'GOCCP_data/2Ddata'
case_dir = 'mnth15runs/'   # inconsistent label compared to jupy_test
mods_dir = 'inp_validation/'

# Check that each important directory can be accessed:    
access_paths = os.path.exists(mods_dir) and os.path.exists(output_dir) and os.path.exists(model_dir)
print('Can access all directory paths:', access_paths)

os.chdir(output_dir) # Move to where files will be saved and organized

Running on storm.uio.no, attempting to mount ns9600k/jonahks/ from NIRD
Can access all directory paths: True


## Function for getting the month of a file on this server

In [11]:
def getmonth(filename, year):
    parts = filename.split('_')
    date = [x for x in parts if year in x] # grab date regardless of position
    
    if len(date) > 1:
        print('Ambiguous naming of file: %s \n Cannot determine month.' % filename)
        return None
    else: 
        stamp = date[0]
        month = stamp[-2:]
        
        return month

## FTP download function from:
https://stackoverflow.com/questions/11573817/how-to-download-a-file-via-ftp-with-python-ftplib


In [12]:
# This function will redownload and replace if there is a previously existing file of the same name.
def download(filename):
    print("Downloading=> %s" % filename)
    
    #with open(filename,'wb') as f: # "use w+, wb"
    #    ftp.retrbinary('RETR ' + filename,f.write)
    
    fhandle = open(filename, 'wb')
    ftp.retrbinary('RETR ' + filename, fhandle.write)
    fhandle.close()
    
    print("Download Succesful")
    

## Specify dates and strings for identifying file types.

In [44]:
startdate = '06-2009'
enddate = '05-2013'

# these strings will be used to select files, so make sure they are unique to your data
filetypes = ['MapLowMidHigh330m', 'MapLowMidHigh_Phase330m']

start_sep = np.array(startdate.split('-')).astype('int')
end_sep = np.array(enddate.split('-')).astype('int')

years = np.arange(start_sep[-1], end_sep[-1]+1) # Add one to the end to make the interval inclusive
first_month = start_sep[0]
last_month = end_sep[0]

# Main "for" loop

Current issue. Permissions are not changed to the user when the file is downloaded...

In [45]:
# Navigate to the base folder for use. Here I'm just getting 2D data for CLDTOT variables.
# For 3D data: "/cfmip/GOCCP_v3/3D_CloudFraction/grid_2x2xL40/"
ftp.cwd("/cfmip/GOCCP_v3/2D_Maps/grid_2x2xL40")

# Navigate to the base folder for use. Here I'm just getting 2D data for CLDTOT variables.
os.chdir("/%s/GOCCP_data/2Ddata" % model_dir)

allscrapes = []

for i, yr in enumerate(years):
    
    # store by year to reduce clutter, create directory if it doesn't already exist and move into it
    if not os.path.exists(str(yr)): 
#        os.makedirs(str(yr), mode=0o777) # this is the most important line in this file. Permissions must be right.
        os.mkdir(str(yr))
        os.chdir(str(yr))    
    
    try:
        ftp.cwd("%s/avg/" % str(yr)) # go to where monthly averages are
        files = ftp.nlst() # list files
        
        toscrape = [] # list of files to grab
        for var in filetypes:
            _temp = [x for x in files if var in x] # select only files of the variable type of interest
            toscrape = toscrape + _temp

        toscrape.sort()

        # check edge cases to remove months before the startdate
        if i == 0: # first year
            _temp_scrape = []
            for j in toscrape:
                _mnth = np.int(getmonth(j, str(yr)))
                if (_mnth >= first_month): # remove months outside of the intended period
                    _temp_scrape.append(j)
            
            toscrape = _temp_scrape
            
        if i == len(years)-1:  # last year
            _temp_scrape = []
            for j in toscrape:
                _mnth = np.int(getmonth(j, str(yr)))
                if (_mnth <= last_month): # remove months outside of the intended period
                    _temp_scrape.append(j)
        
            toscrape = _temp_scrape
        
        toscrape.sort()
        for selected in toscrape:
            download(selected)
        
        allscrapes = allscrapes + toscrape # not necessary
        
        ftp.cwd("../../") # return to the outer directory
        
        
    except:
        print("Didn't work for %s" % i)
    
    # Back out to the nesting dir so that next the loop is not buried in successive layers...
    os.chdir('../')    

Downloading=> MapLowMidHigh330m_200906_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200906_avg_CFMIP2_sat_3.1.2_tmp.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200906_avg_CFMIP2_sat_3.1.2_tmp1.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200907_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200907_avg_CFMIP2_sat_3.1.2_tmp.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200907_avg_CFMIP2_sat_3.1.2_tmp1.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200908_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200908_avg_CFMIP2_sat_3.1.2_tmp.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200908_avg_CFMIP2_sat_3.1.2_tmp1.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200909_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200909_avg_CFMIP2_sat_3.1.2_tmp.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200909_avg_CFMIP2_sat_3.1.2_tmp1.nc

In [33]:
os.listdir()

['safe', '2014', '2010', '2011', '2012', '2013', '2020', '2009']

In [35]:
os.remove('2014/MapLowMidHigh_Phase330m_201402_avg_CFMIP2_sat_3.1.2.nc')

PermissionError: [Errno 13] Permission denied: '2014/MapLowMidHigh_Phase330m_201402_avg_CFMIP2_sat_3.1.2.nc'

In [38]:
os.mkdir('hiJohannes')

In [77]:
files = ftp.nlst()

files

['..', 'night', '.', 'avg', 'day']

## FTP initializing

Following: https://www.pythonforbeginners.com/code-snippets-source-code/how-to-use-ftp-in-python

In [13]:
# Site is: ftp://ftp.climserv.ipsl.polytechnique.fr/cfmip/GOCCP_v3/

ftp = FTP('ftp.climserv.ipsl.polytechnique.fr')   # connect to host, default port

ftp.login()               # user anonymous, passwd anonymous@

ftp.cwd('cfmip/GOCCP_v3/2D_Maps/grid_2x2xL40/2018/avg')
#ftp.retrlines('LIST')     # list directory contents 

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'

In [14]:
ftp.retrlines('LIST')     # list directory contents 

drwxr-xr-x   3 ftp      ftp         20480 Mar  2 19:12 .
drwxr-xr-x   5 ftp      ftp          4096 Oct 26  2018 ..
drwxr-xr-x   2 ftp      ftp        159744 Nov 29 16:14 daily
-rw-r--r--   1 ftp      ftp        201376 Jan 10  2019 MapHigh330m_201801_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201472 Mar  2 18:17 MapHigh330m_201801_avg_CFMIP2_sat_3.1.2_tmp1.nc
-rw-r--r--   1 ftp      ftp        201472 Mar  2 18:17 MapHigh330m_201801_avg_CFMIP2_sat_3.1.2_tmp.nc
-rw-r--r--   1 ftp      ftp        201024 Jan 10  2019 MapHigh330m_201802_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201120 Mar  2 18:21 MapHigh330m_201802_avg_CFMIP2_sat_3.1.2_tmp1.nc
-rw-r--r--   1 ftp      ftp        201120 Mar  2 18:21 MapHigh330m_201802_avg_CFMIP2_sat_3.1.2_tmp.nc
-rw-r--r--   1 ftp      ftp        201376 Jan 10  2019 MapHigh330m_201803_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201472 Mar  2 18:26 MapHigh330m_201803_avg_CFMIP2_sat_3.1.2_tmp1.nc
-rw-r--r--   1 ft

'226 TÃ©lÃ©chargement terminÃ©'

In [18]:
download('MapHigh330m_201802_avg_CFMIP2_sat_3.1.2.nc')

Downloading=> MapHigh330m_201802_avg_CFMIP2_sat_3.1.2.nc
Download Succesful


In [298]:
import stat
path= 'MapHigh330m_201801_avg_CFMIP2_sat_3.1.2.nc'
st = os.stat(path)
os.chmod(path, st.st_mode | stat.S_IWOTH)

In [248]:
# ftp.retrlines('LIST') # print files and directories in current directory, useful for navigating around with cwd

## FTP initializing

Following: https://www.pythonforbeginners.com/code-snippets-source-code/how-to-use-ftp-in-python

In [40]:
# Site is: ftp://ftp.climserv.ipsl.polytechnique.fr/cfmip/GOCCP_v3/

ftp = FTP('ftp.climserv.ipsl.polytechnique.fr')   # connect to host, default port

ftp.login()               # user anonymous, passwd anonymous@

ftp.cwd('cfmip/GOCCP_v3/2D_Maps/grid_2x2xL40/2018/avg')
#ftp.retrlines('LIST')     # list directory contents 

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'

In [7]:
ftp.retrlines('LIST')

drwxr-xr-x   4 ftp      ftp          4096 Jan 14  2019 .
drwxrwxr-x   5 ftp      ftp          4096 Feb  7  2018 ..
drwxr-xr-x   3 ftp      ftp        167936 Jan 14  2019 daily
-rw-r--r--   1 ftp      ftp        201552 Jan  9  2019 MapHigh330m_201001_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201184 Jan  9  2019 MapHigh330m_201002_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201552 Jan  9  2019 MapHigh330m_201003_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201428 Jan  9  2019 MapHigh330m_201004_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201552 Jan  9  2019 MapHigh330m_201005_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201428 Jan  9  2019 MapHigh330m_201006_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201552 Jan  9  2019 MapHigh330m_201007_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201184 Jan  9  2019 MapHigh330m_201008_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        20142

'226 TÃ©lÃ©chargement terminÃ©'