# Scrape MISR data from FTP site
- New data has a time coordinate so it is easier to combine

In [1]:
import sys
# Add common resources folder to path
sys.path.append('/glade/u/home/jonahshaw/Scripts/git_repos/CESM2_analysis')
sys.path.append('/glade/u/home/jonahshaw/Scripts/git_repos/CESM2_analysis/Common/')
# sys.path.append("/home/jonahks/git_repos/netcdf_analysis/Common/")

from imports import (
    pd, np, xr, mpl, plt, sns, os, 
    datetime, sys, crt, gridspec,
    ccrs, metrics, Iterable
    )

from functions import (
    masked_average, add_weights, sp_map,
    season_mean, get_dpm, leap_year, share_ylims,
    to_png
    )

from classes import SatComp_Metric, CT_SLF_Metric
from collections import deque
from ftplib import FTP # note this import here

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
host = os.uname()[1]
if 'jupyter' in host.split('-'): # Check if running on NIRD through the Jupyter Hub
    print('Running through MC2 Jupyter Hub')
    model_dir = '/mnt/mcc-ns9600k/jonahks/'
    os.chdir(model_dir)
    
if 'casper' in host: # Check if running on NIRD through the Jupyter Hub
    print('Running through CISL Casper')
    model_dir = '/glade/p/cesm/pcwg/jenkay/COSP/'
    os.chdir(model_dir)

elif 'r6i6n31' in host: # Check if running on NIRD through the Jupyter Hub
    print('Running through Cheyenne')
    model_dir = '/glade/p/cesm/pcwg/jenkay/COSP/'
    os.chdir(model_dir)
    
else:  # Assume that we're running on a local machine and mounting NIRD
    print('Running on %s, attempting to mount ns9600k/jonahks/ from NIRD' % str(host))
    os.system('fusermount -zu ~/drivemount/')  # unmount first
    os.system('sshfs jonahks@login.nird.sigma2.no:"p/jonahks/" ~/drivemount/')    # Calling mountnird from .bashrc doesn't work
    os.chdir('/home/jonahks/drivemount/')
    save_dir = '~/DATAOUT/'
    save_to = os.path.expanduser(save_dir)

output_dir = '/glade/u/home/jonahshaw/figures/'
case_dir = '/glade/p/cesm/pcwg/jenkay/COSP/cesm21/'
save_dir = '/glade/u/home/jonahshaw/w/obs/CALIPSO/GOCCP/'

# Check that each important directory can be accessed:    
access_paths = os.path.exists(output_dir) and os.path.exists(case_dir) and os.path.exists(save_dir)
print('Can access all directory paths:', access_paths)

Running through CISL Casper
Can access all directory paths: True


## FTP initializing

Following: https://www.pythonforbeginners.com/code-snippets-source-code/how-to-use-ftp-in-python

In [14]:
# Site is: ftp://ftp.climserv.ipsl.polytechnique.fr/cfmip/GOCCP_v3/

ftp = FTP('ftp.climserv.ipsl.polytechnique.fr')   # connect to host, default port

ftp.login()               # user anonymous, passwd anonymous@

ftp.cwd('cfmip/MISR/CMOR/clMISR')
#ftp.retrlines('LIST')     # list directory contents 

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'

## FTP download function from:
https://stackoverflow.com/questions/11573817/how-to-download-a-file-via-ftp-with-python-ftplib


In [5]:
# This function will redownload and replace if there is a previously existing file of the same name.
def download(filename):
    print("Downloading=> %s" % filename)
    
    #with open(filename,'wb') as f: # "use w+, wb"
    #    ftp.retrbinary('RETR ' + filename,f.write)
    
    fhandle = open(filename, 'wb')
    ftp.retrbinary('RETR ' + filename, fhandle.write)
    fhandle.close()
    
    print("Download Successful")
    

In [21]:
os.chdir('/glade/work/jonahshaw/obs/MISR/')

In [None]:
ftp.retrlines('LIST')     # list directory contents 

## Download all the files!

#### This is probably well suited to parallelizing.

In [18]:
files = ftp.nlst()

In [26]:
for i in files:
    if i[-3:] == '.gz':
        download(i)

Downloading=> clMISR_obs4MIPs_MISR_V7_20090901_20090930.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20011001_20011031.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20140801_20140831.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20121101_20121130.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20071001_20071031.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20100801_20100831.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20170801_20170831.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20130601_20130630.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20040601_20040630.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20190601_20190630.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20120701_20120731.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIPs_MISR_V7_20040701_20040731.nc.gz
Download Succesful
Downloading=> clMISR_obs4MIP

In [17]:
ftp.cwd('clMISR')

'250 Commande CWD exÃ©cutÃ©e avec succÃ¨s'