<a href="https://colab.research.google.com/github/marvande/master-thesis/blob/main/ProcessingNcFiles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Processing google cloud files of all years into one NC file

Authenticate

In [1]:
# Authenticate
from google.colab import auth
auth.authenticate_user()
# Give our project ID
project_id = 'ee-iceshelf-gee4geo'
!gcloud config set project {project_id}
# Download the file from a given Google Cloud Storage bucket.
!gsutil cp gs://ee-downscalingclimatemodels/test.txt /tmp/gsutil_download.txt
# Print the result to make sure the transfer worked.
!cat /tmp/gsutil_download.txt 

Are you sure you wish to set property [core/project] to ee-iceshelf-gee4geo?

Do you want to continue (Y/n)?  y

Updated property [core/project].
Copying gs://ee-downscalingclimatemodels/test.txt...
/ [1 files][   13.0 B/   13.0 B]                                                
Operation completed over 1 objects/13.0 B.                                       
AIAIAIAIAIAIA

## Imports

In [2]:
! pip install zarr xarray fsspec gcsfs

Collecting zarr
  Downloading zarr-2.11.1-py3-none-any.whl (153 kB)
[?25l[K     |██▏                             | 10 kB 21.0 MB/s eta 0:00:01[K     |████▎                           | 20 kB 25.3 MB/s eta 0:00:01[K     |██████▍                         | 30 kB 30.6 MB/s eta 0:00:01[K     |████████▌                       | 40 kB 15.4 MB/s eta 0:00:01[K     |██████████▊                     | 51 kB 12.2 MB/s eta 0:00:01[K     |████████████▉                   | 61 kB 13.7 MB/s eta 0:00:01[K     |███████████████                 | 71 kB 13.4 MB/s eta 0:00:01[K     |█████████████████               | 81 kB 14.6 MB/s eta 0:00:01[K     |███████████████████▎            | 92 kB 16.0 MB/s eta 0:00:01[K     |█████████████████████▍          | 102 kB 14.6 MB/s eta 0:00:01[K     |███████████████████████▌        | 112 kB 14.6 MB/s eta 0:00:01[K     |█████████████████████████▋      | 122 kB 14.6 MB/s eta 0:00:01[K     |███████████████████████████▉    | 133 kB 14.6 MB/s eta 0:00:0

In [3]:
import xarray as xr
import zarr
import os
import pandas as pd
from os import listdir
from os.path import isfile, join

import ftplib
import sys
from re import search
from tqdm import tqdm 
import glob

import fsspec
import gcsfs

## Setting up

In [4]:
PROJECT = 'ee-iceshelf-gee4geo'
BUCKET = "ee-downscalingclimatemodels"

In [5]:
os.environ["GCLOUD_PROJECT"] = PROJECT

In [6]:
# Google cloud
from google.cloud import storage
storage_client = storage.Client()
bucket = storage_client.bucket(BUCKET)

In [7]:
def empty_dir(pathLocal):
  # delete all files as precaution
  for file_name in os.listdir(pathLocal):
      # construct full file path
      file = pathLocal + file_name
      if os.path.isfile(file):
          os.remove(file)

def create_dir(path):
  # create empty directory for variable
  if not os.path.exists(path):
      os.makedirs(path)

def listFilesGC(path, VAR):
  # Get all files already on GC:
  filesGC = []
  for blob in storage_client.list_blobs(bucket, prefix=f'Chris_data/RawData/MAR-ACCESS1.3/{path}/'):
    #print(str(blob))
    file_ = str(blob)
    if search(VAR, file_):
      span = search(f"{VAR}ant(.*?).nc", file_).span(0)
      filesGC.append(file_[span[0]:span[1]])
  return filesGC

def downloadFromGC(destName, pathLocal, filesGC):
  N = len(filesGC)
  for i in tqdm(range(N)):
    file_name = filesGC[i]
    # Download from GC locally
    blob = bucket.blob(destName + file_name)
    blob.download_to_filename(pathLocal+file_name)

def filesInDir(pathLocal):
  return sorted([f for f in listdir(pathLocal) if isfile(join(pathLocal, f))])

def ZarrPerDecade(onlyfiles, NumDecades, pathLocal, pathLocalZarr, pathGC, m = 10):
  for j in tqdm(range(NumDecades)):
    # open first file
    f0 = onlyfiles[j*m]
    df = xr.open_dataset(pathLocal+f0)
    if j < NumDecades-1:
      for f in onlyfiles[(j*m)+1:m*(j+1)]:
        df2 = xr.open_dataset(pathLocal+f)
        df = df.merge(df2)
    else:
      for f in onlyfiles[(j*m)+1:]:
        df2 = xr.open_dataset(pathLocal+f)
        df = df.merge(df2)
    # upload to GC
    df.to_zarr('gs://'+pathGC+f'{path}_decade_{j+1}.zarr', mode = 'w', consolidated = True)

# Download files from google cloud

Copy all files from the google cloud bucket:

Other way of doing it:

`! gsutil -m cp gs://ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/RH/* /tmp/RH/`

and upload to google cloud
` !gsutil -m cp -r /tmp/CC_zarr/CC*.zarr gs://ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/zarr_data/CC_zarr/`

In [8]:
def CreateDecadeData(path, VAR, m = 10):
  pathLocal = f'/tmp/{path}/'
  pathLocalZarr = f'/tmp/{path}_zarr/'

  # create empty dir:
  create_dir(pathLocal)
  create_dir(pathLocalZarr)

  # empty dir out of precaution
  empty_dir(pathLocal)
  empty_dir(pathLocalZarr)

  # list files on GC
  filesGC = listFilesGC(path, VAR)
  print(f'Number of files already on GC: {len(filesGC)}')

  # Download files from GC
  destName = f'Chris_data/RawData/MAR-ACCESS1.3/{path}/'
  print('Downloading files from GC')
  downloadFromGC(destName, pathLocal, filesGC)

  # Get all file names locally
  onlyfiles = filesInDir(pathLocal)
  print(f'Number of files downloaded from GC: {len(onlyfiles)}')
  print('Examples of files: \n {}'.format(onlyfiles[0]))

  # Create a zarr file per decade:
  pathZarr = f'ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/zarr_data/{path}_zarr/'
   # num years per zarr folder
  NumDecades = int(len(onlyfiles)/m)
  print(f'Number of decades: {NumDecades}, number of files: {len(onlyfiles)}')
  print(f'Saving zarr files at: {pathZarr}')
  ZarrPerDecade(onlyfiles, NumDecades, pathLocal, pathLocalZarr, pathZarr, m)

  # empty dir out of precaution
  empty_dir(pathLocal)
  empty_dir(pathLocalZarr)

## Variables:

In [None]:
# RH (DONE)
path = 'RH'
VAR = path+'_'
CreateDecadeData(path, VAR)

In [None]:
# CC (DONE)
path = 'CC'
VAR = path+'_'
CreateDecadeData(path, VAR)

In [None]:
# RU (DONE)
path = 'RU'
VAR = path+'_'
CreateDecadeData(path, VAR)

Number of files already on GC: 121


100%|██████████| 121/121 [01:15<00:00,  1.61it/s]


Number of files downloaded from GC: 121
Examples of files: 
 RU_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19800101-19801231.nc
Saving zarr files at: ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/zarr_data/RU_zarr/
Number of decades: 12, number of files: 121


100%|██████████| 12/12 [09:43<00:00, 48.65s/it]


In [9]:
# VVP
path = 'VVP'
VAR = path+'_'
CreateDecadeData(path, VAR, m = 10)

Number of files already on GC: 121
Downloading files from GC


100%|██████████| 121/121 [06:50<00:00,  3.39s/it]


Number of files downloaded from GC: 121
Examples of files: 
 VVP_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19800101-19801231.nc
Number of decades: 12, number of files: 121
Saving zarr files at: ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/zarr_data/VVP_zarr/


100%|██████████| 12/12 [27:19<00:00, 136.64s/it]


In [None]:
# LWD
path = 'LWD'
VAR = path+'_'
CreateDecadeData(path, VAR, m = 10)

Number of files already on GC: 120
Downloading files from GC


100%|██████████| 120/120 [01:06<00:00,  1.81it/s]


Number of files downloaded from GC: 120
Examples of files: 
 LWD_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19800101-19801231.nc
Number of decades: 12, number of files: 120
Saving zarr files at: ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/zarr_data/LWD_zarr/


100%|██████████| 12/12 [03:26<00:00, 17.19s/it]


In [None]:
# ME
path = 'ME'
VAR = path+'_'
CreateDecadeData(path, VAR, m = 10)

Number of files already on GC: 121
Downloading files from GC


100%|██████████| 121/121 [01:03<00:00,  1.91it/s]


Number of files downloaded from GC: 121
Examples of files: 
 ME_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19800101-19801231.nc
Number of decades: 12, number of files: 121
Saving zarr files at: ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/zarr_data/ME_zarr/


100%|██████████| 12/12 [03:31<00:00, 17.64s/it]


In [10]:
# UUP
path = 'UUP'
VAR = path+'_'
CreateDecadeData(path, VAR, m = 10)

Number of files already on GC: 96
Downloading files from GC


100%|██████████| 96/96 [06:27<00:00,  4.04s/it]


Number of files downloaded from GC: 96
Examples of files: 
 UUP_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19820101-19821231.nc
Number of decades: 9, number of files: 96
Saving zarr files at: ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/zarr_data/UUP_zarr/


100%|██████████| 9/9 [21:38<00:00, 144.30s/it]


In [11]:
# QQP
path = 'QQP'
VAR = path+'_'
CreateDecadeData(path, VAR, m = 10)

Number of files already on GC: 121
Downloading files from GC


100%|██████████| 121/121 [06:12<00:00,  3.08s/it]


Number of files downloaded from GC: 121
Examples of files: 
 QQP_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19800101-19801231.nc
Number of decades: 12, number of files: 121
Saving zarr files at: ee-downscalingclimatemodels/Chris_data/RawData/MAR-ACCESS1.3/zarr_data/QQP_zarr/


100%|██████████| 12/12 [27:04<00:00, 135.41s/it]


# FTP download files and move to google cloud

## Prepare ftp session:

In [10]:
# test connection to server:
ftp_server = 'ftp.climato.be'
ftp_session= ftplib.FTP(ftp_server)
ftp_session.login()
ftp_session.quit()

'221 Goodbye.'

In [11]:
import time
import random
randSleep = random.randint(0, 3)

# Write a function that initiates a FTP session
def open_ftp_session(ftp_server):
    """
       Open a ftp session given the server ftp address,
       the user's ID and the user's password.
       
       @param ftp_server: name of the ftp server (string)
       @param my_userid:  user ID on the ftp server (string)
       @param my_passwd:  user password on the ftp server (string)
    """
    ftp_session = ftplib.FTP(ftp_server)
    ftp_session.login()
    return ftp_session


def ftp_dir_content(ftp_session, dir_name=None):
    """
       List the content of a diirectory in a ftp server.
       If the directory is not provided, will list the content
       of the top directory.
       
       @param ftp_session: ftp session object
       @param dir_name:    name of the directory you want to access (string)
        
       Returned Value:
          - List of directories and files 
           (similar to the Unix command 'ls -l')
    """ 
    data = []
    if dir_name != None:
        # Change directory
        ftp_session.cwd(dir_name)
    # Get the list of files
    ftp_session.dir(data.append)
    return data

def ftp_get_file(ftp_session, file_name):
    """
         Get a file from a ftp server
         @param ftp_session: ftp session object
         @param file_name: name of the file you want to download  
    """
    try:
        ftp_session.retrbinary("RETR " + file_name ,open(file_name, 'wb').write)
    except:
        print("Error - Cannot obtain file: "+ file_name)

def listFilesFTP(ftp_server, VAR):
  ftpFiles = []
  ftp_session = open_ftp_session(ftp_server)
  data = ftp_dir_content(ftp_session, dir_name='climato/ckittel/MARv3.11/Marijn/MAR-ACCESS1.3/')
  for line in data:
    if search(VAR, line):
      span = search(f"{VAR}ant(.*?).nc", line).span(0)
      ftpFiles.append(line[span[0]:span[1]])
  return ftpFiles

def copyFTP_GC(path, remainingFiles, ftp_server):
  # Copy remaining files from FTP to GC:
  dir_name  = 'climato/ckittel/MARv3.11/Marijn/MAR-ACCESS1.3/'
  destName = f'Chris_data/RawData/MAR-ACCESS1.3/{path}/'
  N = len(remainingFiles)
  for i in tqdm(range(N)):
    file_name = remainingFiles[i]
    ftp_session = open_ftp_session(ftp_server)
    ftp_session.cwd(dir_name)   
    ftp_get_file(ftp_session, file_name)
    time.sleep(randSleep)
    # upload to google cloud:
    blob = bucket.blob(destName+file_name)
    blob.upload_from_filename(file_name)

In [12]:
ftp_session = open_ftp_session(ftp_server)
# To list the top directories in the server
ftp_session.retrlines('LIST')

drwxr-xr-x    4 1004     0            4096 Sep 11  2015 AIC2015
drwxr-xr-x   14 1000     1000         4096 Apr 19  2021 climato
drwx------    2 1047     49         131072 Feb 03 12:08 cmi
drwxr-xr-x    2 1000     1000         4096 Mar 15  2011 data
drwxr-xr-x   11 1000     1000         4096 Mar 16 09:31 fettweis
drwx---rwx   10 2012     49           4096 Jul 14  2015 flexipac
drwxr-xr-x    2 2001     100         20480 Mar 01 04:20 irm
drwxr-xr-x    5 2002     100         12288 Oct 19  2016 irm2
drwxr-xr-x   11 1008     49          94208 Mar 31 07:10 katabata
drwx------    2 0        0           16384 Feb 25  2017 lost+found
drwxr-xr-x    2 1050     49         323584 Mar 31 05:47 merygrid
lrwxrwxrwx    1 0        0              14 Feb 23  2017 upload -> climato/upload


'226 Directory send OK.'

In [13]:
data = ftp_dir_content(ftp_session, dir_name='climato/ckittel/MARv3.11/Marijn/MAR-ACCESS1.3/')
for line in data[:5]:
    print("-", line)

- -rw-r--r--    1 1028     1000     38208563 Mar 17 09:43 CC_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19800101-19801231.nc
- -rw-r--r--    1 1028     1000     38104367 Mar 17 09:43 CC_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19810101-19811231.nc
- -rw-r--r--    1 1028     1000     38104367 Mar 17 09:43 CC_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19820101-19821231.nc
- -rw-r--r--    1 1028     1000     38104367 Mar 17 09:43 CC_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19830101-19831231.nc
- -rw-r--r--    1 1028     1000     38208563 Mar 17 09:43 CC_ant-35km_ACCESS1.3_rcp8.5_r1i1p1_ULg-MAR311_v1_day_19840101-19841231.nc


## RH

In [None]:
# create empty directory for variable
path = 'RH'
VAR = path+'_'

# Get all files already on GC:
"""filesGC = []
for blob in storage_client.list_blobs(bucket, prefix=f'Chris_data/RawData/MAR-ACCESS1.3/{path}/'):
  #print(str(blob))
  file_ = str(blob)
  if search(VAR, file_):
    span = search(f"{VAR}ant(.*?).nc", file_).span(0)
    filesGC.append(file_[span[0]:span[1]])"""
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

Number of files already on GC: 121
Number of files on ftp: 121
Remaining files to put on GC: 0


0it [00:00, ?it/s]


## CC

In [None]:
# create empty directory for variable
path = 'CC'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## RU

In [None]:
# create empty directory for variable
path = 'RU'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

Number of files already on GC: 121
Number of files on ftp: 121
Remaining files to put on GC: 0


0it [00:00, ?it/s]


## VVP

In [None]:
# create empty directory for variable
path = 'VVP'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

Number of files already on GC: 121
Number of files on ftp: 121
Remaining files to put on GC: 0


0it [00:00, ?it/s]


## UUP: (ND)

In [None]:
# create empty directory for variable
path = 'UUP'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## LWD:

In [None]:
# create empty directory for variable
path = 'LWD'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

Number of files already on GC: 1
Number of files on ftp: 121
Remaining files to put on GC: 120


100%|██████████| 120/120 [30:03<00:00, 15.03s/it]


## ME:

In [None]:
# create empty directory for variable
path = 'ME'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

Number of files already on GC: 0
Number of files on ftp: 121
Remaining files to put on GC: 121


100%|██████████| 121/121 [30:00<00:00, 14.88s/it]


## QQP:

In [None]:
# create empty directory for variable
path = 'QQP'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## RF: (ND)

In [None]:
# create empty directory for variable
path = 'RF'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## SF: (ND)

In [None]:
# create empty directory for variable
path = 'SF'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## SMB: (ND)

In [None]:
# create empty directory for variable
path = 'SMB'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## SP: (ND)

In [None]:
# create empty directory for variable
path = 'SP'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## SU:(ND)

In [None]:
# create empty directory for variable
path = 'SU'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## SWD: (ND)

In [None]:
# create empty directory for variable
path = 'SWD'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## TT: (ND)

In [None]:
# create empty directory for variable
path = 'TT'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

## TTP (ND)

In [None]:
# create empty directory for variable
path = 'TTP'
VAR = path+'_'

# Get all files already on GC:
filesGC = listFilesGC(path, VAR)
print(f'Number of files already on GC: {len(filesGC)}')

# Get all filenames in FTP server
ftpFiles = listFilesFTP(ftp_server, VAR)
print(f'Number of files on ftp: {len(ftpFiles)}')

# Find difference of two lists
remainingFiles = list(set(ftpFiles) - set(filesGC))
print(f'Remaining files to put on GC: {len(remainingFiles)}')

# Copy remaining files from FTP to GC:
copyFTP_GC(path, remainingFiles, ftp_server)

In [None]:
['RH', 'RU', 'VVP', 'UUP', 'LWD', 'ME']