In [108]:
from ftplib import FTP
import traceback
import sys
import os
import boto3

In [94]:
BUCKET_NAME = "files-cnes-datasus"
BASE_FILES_NAME = "BASE_DE_DADOS_CNES"
SITE = "ftp.datasus.gov.br"
FTP_FOLDER = "cnes"

In [95]:
def print_error() -> None:
    """Print the error message and exit script."""
    traceback.print_exc()
    print("Closing...")
    sys.exit()

In [96]:
# GET ZIPFILES NAMES FROM FTP
with FTP("ftp.datasus.gov.br") as ftp:
    try:
        # LOGIN
        if ftp.login().startswith("230"): # ftp.login() enter the connection and return a string with the response
            print("Logged in.\n")
        else:
            print("Failed to log in.")
            sys.exit()

        # GO TO 'cnes' DIRECTORY
        if ftp.cwd("cnes").startswith("250"): # ftp.cwd() 'change working diretory' to cnes and return a string with the response
            print("Directory changed to 'cnes'.\n")
        else:
            print("Failed to change directory.")
            sys.exit()
    except:
        print_error()
    try:
        zipfiles_names_ftp = []
        for file in ftp.nlst(): # ftp.nlst() return a list with all files name
            if file.startswith(BASE_FILES_NAME): # if file isn't already on folder
                zipfiles_names_ftp.append(file)
        print("All zipfiles names collected from ftp server.\n")
    except:
        print_error()

Logged in.

Directory changed to 'cnes'.

All zipfiles names collected from ftp server.



In [99]:
s3_client = boto3.client("s3")
response = s3_client.list_objects(Bucket=BUCKET_NAME)["Contents"]
content_zipfiles = [k["Key"] for k in response if k["Key"].startswith("zipfiles/")]
zipfiles_names_bucket = []
if len(content_zipfiles) == 1:
    print("No zip files.")
else:
    zipfiles_names_bucket = [item[len("zipfiles/"):] for item in content_zipfiles][1:]

In [100]:
zipfiles_names_bucket

['BASE_DE_DADOS_CNES_201809.ZIP', 'BASE_DE_DADOS_CNES_201810.ZIP']

In [101]:
def download_zipfile(site: str, ftp_folder: str, zip_files_path: str, zipfiles_names_bucket: str, file: str) -> None:
    """Access the ftp connection, go to folder and download files with the base name passed as an argument."""
    with FTP(site) as ftp:
        try:
            # LOGIN
            if ftp.login().startswith("230"): # ftp.login() enter the connection and return a string with the response
                print("Logged in.\n")
            else:
                print("Failed to log in.")
                sys.exit()

            # GO TO 'cnes' DIRECTORY
            if ftp.cwd(ftp_folder).startswith("250"): # ftp.cwd() 'change working diretory' to cnes and return a string with the response
                print("Directory changed to 'cnes'.\n")
            else:
                print("Failed to change directory.")
                sys.exit()
        except:
            print_error()

        print("Downloading zipfile...")
        if file not in zipfiles_names_bucket: # if file isn't already on local folder
            with open(zip_files_path + f"{file}", "wb") as f:
                print(f"Downloading {file}...")
                retCode = ftp.retrbinary(f"RETR {file}", f.write) # download the file and return a string with the response
                if retCode.startswith("226"):
                    print(f"{file} downloaded.")
                else:
                    print(f"Error downloading file: {retCode}")

In [105]:
def upload_zipfile(s3_resource: boto3.resource, filename: str, bucket: str) -> None:
    s3_resource.meta.client.upload_file(
    Filename=filename,
    Bucket=bucket,
    Key="zipfiles/" + filename
    )

In [None]:
download_zipfile(SITE, FTP_FOLDER, "./", zipfiles_names_bucket, zipfiles_names_ftp[0])

In [106]:
s3_resource = boto3.resource("s3")
upload_zipfile(s3_resource, zipfiles_names_ftp[0], BUCKET_NAME)

In [109]:
os.remove(zipfiles_names_ftp[0])