# Scrape GOCCP data from FTP site
- Get 2D phase and cloud cover data for the period 06-09 - 05.13

In [25]:
import sys
import time
# Add common resources folder to path
sys.path.append("/mnt/mcc-ns9600k/jonahks/git_repos/netcdf_analysis/Common/")
sys.path.append("/mnt/mcc-ns9600k/jonahks/git_repos/netcdf_analysis/")
sys.path.append("/home/jonahks/git_repos/netcdf_analysis/")
sys.path.append("/home/jonahks/git_repos/netcdf_analysis/Common/")
sys.path.append("/Users/jonahshaw/git_repos/netcdf_analysis/")
sys.path.append("/Users/jonahshaw/git_repos/netcdf_analysis/Common/")

from imports import (
    pd, np, xr, mpl, plt, sns, os, 
    datetime, sys, crt, gridspec,
    polyfit, ccrs, LinearRegression, metrics
    )

from functions import (
    masked_average, interpretNS, plot_slf_isotherms, 
    add_weights, process_caliop, process_for_slf,
    noresm_slf_to_df, regress_1d
    )

from ftplib import FTP # note this import here

%matplotlib inline

In [2]:
host = os.uname()[1]
if 'jupyter' in host.split('-'): # Check if running on NIRD through the Jupyter Hub
    print('Running through MC2 Jupyter Hub')
    model_dir = '/mnt/mcc-ns9600k/jonahks/'
    os.chdir(model_dir)

else:  # Assume that we're running on a local machine and mounting NIRD
    print('Running on %s, attempting to mount ns9600k/jonahks/ from NIRD' % str(host))
    os.system('fusermount -zu ~/drivemount/')  # unmount first
    os.system('sshfs jonahks@login.nird.sigma2.no:"p/jonahks/" ~/drivemount/')    # Calling mountnird from .bashrc doesn't work
    model_dir = '/home/jonahks/drivemount/'
    os.chdir(model_dir)
    save_dir = '~/DATAOUT/'
    save_to = os.path.expanduser(save_dir)

output_dir = 'GOCCP_data/2Ddata'
case_dir = 'mnth15runs/'   # inconsistent label compared to jupy_test
mods_dir = 'inp_validation/'

# Check that each important directory can be accessed:    
access_paths = os.path.exists(mods_dir) and os.path.exists(output_dir) and os.path.exists(model_dir)
print('Can access all directory paths:', access_paths)

os.chdir(output_dir) # Move to where files will be saved and organized

Running on Jonahs-MacBook-Pro.local, attempting to mount ns9600k/jonahks/ from NIRD


FileNotFoundError: [Errno 2] No such file or directory: '/home/jonahks/drivemount/'

In [3]:
os.chdir("/Users/jonahshaw/obs")

In [27]:
os.listdir()

['CALIOP', 'CLOUDSAT']

## Function for getting the month of a file on this server

In [12]:
def getmonth(filename, year):
    parts = filename.split('_')
    date = [x for x in parts if year in x] # grab date regardless of position
    
    if len(date) > 1:
        print('Ambiguous naming of file: %s \n Cannot determine month.' % filename)
        return None
    else: 
        stamp = date[0]
        month = stamp[-2:]
        
        return month

## FTP download function from:
https://stackoverflow.com/questions/11573817/how-to-download-a-file-via-ftp-with-python-ftplib


In [13]:
# This function will redownload and replace if there is a previously existing file of the same name.
def download(filename):
    print("Downloading=> %s" % filename)
    
    #with open(filename,'wb') as f: # "use w+, wb"
    #    ftp.retrbinary('RETR ' + filename,f.write)
    
    fhandle = open(filename, 'wb')
    ftp.retrbinary('RETR ' + filename, fhandle.write)
    fhandle.close()
    
    print("Download Succesful")
    

## Specify dates and strings for identifying file types.

In [14]:
startdate = '01-2006'
enddate = '12-2015'

# these strings will be used to select files, so make sure they are unique to your data
filetypes = ['MapLowMidHigh330m', 'MapLowMidHigh_Phase330m']

start_sep = np.array(startdate.split('-')).astype('int')
end_sep = np.array(enddate.split('-')).astype('int')

years = np.arange(start_sep[-1], end_sep[-1]+1) # Add one to the end to make the interval inclusive
first_month = start_sep[0]
last_month = end_sep[0]

## FTP initializing

Following: https://www.pythonforbeginners.com/code-snippets-source-code/how-to-use-ftp-in-python

In [15]:
# Site is: ftp://ftp.climserv.ipsl.polytechnique.fr/cfmip/GOCCP_v3/

ftp = FTP('ftp.climserv.ipsl.polytechnique.fr')   # connect to host, default port

ftp.login()               # user anonymous, passwd anonymous@

# ftp.cwd('cfmip/GOCCP_v3/2D_Maps/grid_2x2xL40/2018/avg') # could use a different directory here
ftp.cwd('cfmip/CloudSat/CloudSat_Reflectivity/version_5/') # could use a different directory here
#ftp.retrlines('LIST')     # list directory contents 

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'

In [10]:
ftp.nlst('Monthly')

['Monthly/.',
 'Monthly/..',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20150201_20150228.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20160401_20160430.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20101201_20101231.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20140701_20140731.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20100401_20100430.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20160201_20160229.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20150401_20150430.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20060901_20060930.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20110101_20110131.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20120301_20120331.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20061101_20061130.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20160801_20160831.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20120501_20120531.nc.gz',
 'Monthly/cfadDbze94_obs4MIPs_CloudSat_R05_20100601_20100630.nc.gz',
 'Mon

In [23]:
ftp.retrlines('LIST')     # list directory contents 

drwxr-xr-x   2 ftp      ftp         20480 May 21  2019 .
drwxr-xr-x   3 ftp      ftp          4096 May 27  2019 ..
-rw-r--r--   1 ftp      ftp      18390040 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20060901_20060930.nc.gz
-rw-r--r--   1 ftp      ftp      19846839 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20061001_20061031.nc.gz
-rw-r--r--   1 ftp      ftp      19346403 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20061101_20061130.nc.gz
-rw-r--r--   1 ftp      ftp      19820846 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20061201_20061231.nc.gz
-rw-r--r--   1 ftp      ftp      19109556 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20070101_20070131.nc.gz
-rw-r--r--   1 ftp      ftp      18385829 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20070201_20070228.nc.gz
-rw-r--r--   1 ftp      ftp      18269365 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20070301_20070331.nc.gz
-rw-r--r--   1 ftp      ftp      18551293 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20070401_2007

'226 TÃ©lÃ©chargement terminÃ©'

In [22]:
ftp.cwd('Monthly')

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'

In [24]:
download('cfadDbze94_obs4MIPs_CloudSat_R05_20150201_20150228.nc.gz')

Downloading=> cfadDbze94_obs4MIPs_CloudSat_R05_20150201_20150228.nc.gz
Download Succesful


In [19]:
os.listdir()

['2013',
 '2014',
 '2015',
 '2012',
 'cfadDbze94_obs4MIPs_CloudSat_R05_20060901_20160930.nc.gz',
 '2008',
 '2006',
 '2007',
 '2009',
 '2010',
 'cfadDbze94_obs4MIPs_CloudSat_R05_20100101_20100131.nc.gz',
 '2011']

__CloudSat:__  
ftp://ftp.climserv.ipsl.polytechnique.fr/cfmip/CloudSat/CloudSat_Reflectivity/version_5/

# Main "for" loop

Current issue. Permissions are not changed to the user when the file is downloaded...

In [18]:
# Navigate to the base folder for use. Here I'm just getting 2D data for CLDTOT variables.
# For 3D data: "/cfmip/GOCCP_v3/3D_CloudFraction/grid_2x2xL40/"
ftp.cwd("/cfmip/GOCCP_v3/2D_Maps/grid_2x2xL40")

# Navigate to the base folder for use. Here I'm just getting 2D data for CLDTOT variables.
# os.chdir("/%s/GOCCP_data/2Ddata" % model_dir)
os.chdir("/Users/jonahshaw/obs")

allscrapes = []

for i, yr in enumerate(years):
    
    # store by year to reduce clutter, create directory if it doesn't already exist and move into it
    if not os.path.exists(str(yr)): 
#        os.makedirs(str(yr), mode=0o777) # this is the most important line in this file. Permissions must be right.
        os.mkdir(str(yr))
        os.chdir(str(yr))    
    
    try:
        ftp.cwd("%s/avg/" % str(yr)) # go to where monthly averages are
        files = ftp.nlst() # list files
        
        toscrape = [] # list of files to grab
        for var in filetypes:
            _temp = [x for x in files if var in x] # select only files of the variable type of interest
            toscrape = toscrape + _temp

        toscrape.sort()

        # check edge cases to remove months before the startdate
        if i == 0: # first year
            _temp_scrape = []
            for j in toscrape:
                _mnth = np.int(getmonth(j, str(yr)))
                if (_mnth >= first_month): # remove months outside of the intended period
                    _temp_scrape.append(j)
            
            toscrape = _temp_scrape
            
        if i == len(years)-1:  # last year
            _temp_scrape = []
            for j in toscrape:
                _mnth = np.int(getmonth(j, str(yr)))
                if (_mnth <= last_month): # remove months outside of the intended period
                    _temp_scrape.append(j)
        
            toscrape = _temp_scrape
        
        toscrape.sort()
        for selected in toscrape:
            download(selected)
        
        allscrapes = allscrapes + toscrape # not necessary
        
        ftp.cwd("../../") # return to the outer directory
        
        
    except:
        print("Didn't work for %s" % i)
    
    # Back out to the nesting dir so that next the loop is not buried in successive layers...
    os.chdir('../')    

Downloading=> MapLowMidHigh330m_200606_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200607_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200608_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200609_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200610_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200611_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh330m_200612_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200606_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200607_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200608_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200609_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_200610_avg_CFMIP2_sat_3.1.2.nc
Downl

Download Succesful
Downloading=> MapLowMidHigh330m_201012_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201001_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201002_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201003_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201004_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201005_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201006_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201007_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201008_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201009_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201010_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLo

Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201401_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201402_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201403_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201404_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201405_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201406_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201407_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201408_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201409_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201410_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=> MapLowMidHigh_Phase330m_201411_avg_CFMIP2_sat_3.1.2.nc
Download Succesful
Downloading=>

In [7]:
os.listdir()

['efficiency_testbed.ipynb',
 'mfdataset_test.ipynb',
 'Untitled.ipynb',
 'regression_testbed.ipynb',
 '.ipynb_checkpoints',
 'ganglia_issues_tests.ipynb',
 'write_permissions.ipynb',
 'download_goccp.ipynb',
 'nudging_tests.ipynb']

In [77]:
files = ftp.nlst()

files

['..', 'night', '.', 'avg', 'day']

## FTP initializing

Following: https://www.pythonforbeginners.com/code-snippets-source-code/how-to-use-ftp-in-python

In [8]:
# Site is: ftp://ftp.climserv.ipsl.polytechnique.fr/cfmip/GOCCP_v3/

ftp = FTP('ftp.climserv.ipsl.polytechnique.fr')   # connect to host, default port

ftp.login()               # user anonymous, passwd anonymous@

ftp.cwd('cfmip/GOCCP_v3/2D_Maps/grid_2x2xL40/2018/avg')
#ftp.retrlines('LIST')     # list directory contents 

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'

In [17]:
ftp.retrlines('LIST')     # list directory contents 

drwxr-xr-x   3 ftp      ftp          4096 May 27  2019 .
drwxr-xr-x   6 ftp      ftp         20480 May 27  2019 ..
-rw-r--r--   1 ftp      ftp      1600227621 May 21  2019 cfadDbze94_obs4MIPs_CloudSat_R05_20060901_20160930.nc.gz
-rw-r--r--   1 ftp      ftp       2038172 May 21  2019 CloudSat_Reflectivity_Height_Histogram_cfadDbze94_Quality_Statement.pdf
drwxr-xr-x   2 ftp      ftp         20480 May 21  2019 Monthly


'226 TÃ©lÃ©chargement terminÃ©'

In [9]:
download('MapHigh330m_201802_avg_CFMIP2_sat_3.1.2.nc')

Downloading=> MapHigh330m_201802_avg_CFMIP2_sat_3.1.2.nc
Download Succesful


In [298]:
import stat
path= 'MapHigh330m_201801_avg_CFMIP2_sat_3.1.2.nc'
st = os.stat(path)
os.chmod(path, st.st_mode | stat.S_IWOTH)

In [248]:
# ftp.retrlines('LIST') # print files and directories in current directory, useful for navigating around with cwd

## FTP initializing

Following: https://www.pythonforbeginners.com/code-snippets-source-code/how-to-use-ftp-in-python

In [40]:
# Site is: ftp://ftp.climserv.ipsl.polytechnique.fr/cfmip/GOCCP_v3/

ftp = FTP('ftp.climserv.ipsl.polytechnique.fr')   # connect to host, default port

ftp.login()               # user anonymous, passwd anonymous@

ftp.cwd('cfmip/GOCCP_v3/2D_Maps/grid_2x2xL40/2018/avg')
#ftp.retrlines('LIST')     # list directory contents 

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'

In [7]:
ftp.retrlines('LIST')

drwxr-xr-x   4 ftp      ftp          4096 Jan 14  2019 .
drwxrwxr-x   5 ftp      ftp          4096 Feb  7  2018 ..
drwxr-xr-x   3 ftp      ftp        167936 Jan 14  2019 daily
-rw-r--r--   1 ftp      ftp        201552 Jan  9  2019 MapHigh330m_201001_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201184 Jan  9  2019 MapHigh330m_201002_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201552 Jan  9  2019 MapHigh330m_201003_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201428 Jan  9  2019 MapHigh330m_201004_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201552 Jan  9  2019 MapHigh330m_201005_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201428 Jan  9  2019 MapHigh330m_201006_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201552 Jan  9  2019 MapHigh330m_201007_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        201184 Jan  9  2019 MapHigh330m_201008_avg_CFMIP2_sat_3.1.2.nc
-rw-r--r--   1 ftp      ftp        20142

'226 TÃ©lÃ©chargement terminÃ©'