In [180]:
import ftplib
import re
from datetime import datetime, timezone
from math import inf
from pathlib import Path
from shutil import move
from tempfile import TemporaryDirectory
from zipfile import ZipFile

In [55]:
HOST = 'ftp.nerc-murchison.ac.uk'
AUX_OBS = '/geomag/Swarm/AUX_OBS'

In [52]:
def _search(path='', passive=True):
    with ftplib.FTP(HOST) as ftp:
        ftp.login()
        ftp.set_pasv(passive)
        result = []
        ftp.dir(path, result.append)
    result = [(e[56:], e[0]) for e in result]
    return result

In [98]:
def search(rate, begin_date=None, end_date=None, passive=True):
    
    begin_date = -inf if begin_date is None else begin_date.replace(tzinfo=timezone.utc).timestamp()
    end_date = inf if end_date is None else end_date.replace(tzinfo=timezone.utc).timestamp()
    
    pattern = re.compile(
        r'SW_OPER_AUX_OBS[_MS]2__'
        r'(?P<validity_start>\d{8}T\d{6})_'
        r'(?P<validity_stop>\d{8}T\d{6})_\d{4}\.\w{3}$'
    )
    
    if rate not in ('hour', 'minute', 'second'):
        raise ValueError(
            f"OBS data rate can be 'hour', 'minute' or 'second', "
            f"not '{rate}'"
        )
    result = []
    
    for name in (elm[0] for elm in _search(f'{AUX_OBS}/{rate}', passive)
                 if elm[1] == '-'):
        m = pattern.match(name)
        if m:
            validity_start, validity_stop = m.groupdict().values()
            validity_start = datetime.strptime(validity_start, '%Y%m%dT%H%M%S').\
                replace(tzinfo=timezone.utc).timestamp()
            validity_stop = datetime.strptime(validity_stop, '%Y%m%dT%H%M%S').\
                replace(tzinfo=timezone.utc).timestamp()
            if validity_start <= end_date and validity_stop >= begin_date:
                result.append(name)
    return [f'{AUX_OBS}/{rate}/{name}' for name in result]

In [102]:
search('hour')

['/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19000101T000000_19001231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19010101T000000_19011231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19020101T000000_19021231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19030101T000000_19031231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19040101T000000_19041231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19050101T000000_19051231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19060101T000000_19061231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19070101T000000_19071231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19080101T000000_19081231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19090101T000000_19091231T235959_0122.ZIP',
 '/geomag/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__19100101T000000_19101231T235959_0122.ZIP',

In [208]:
class OBSFile:
    
    __PATTERN = re.compile(
        r'SW_OPER_(?P<file_type>AUX_OBS[_MS]2_)_'
        r'(?P<validity_start>\d{8}T\d{6})_'
        r'(?P<validity_stop>\d{8}T\d{6})_\d{4}\.\w{3}$'
    )

    def __init__(self, path):
        self.__path = Path(path)
        match = self.__PATTERN.match(self.__path.name)
        if not match:
            raise ValueError(
                f'file not valid: {self.__path.name}'
            )
        properties = match.groupdict()
        self.__file_type = properties['file_type']
        self.__validity_start = datetime.strptime(
            properties['validity_start'], '%Y%m%dT%H%M%S'
        ).replace(tzinfo=timezone.utc)
        self.__validity_stop = datetime.strptime(
            properties['validity_stop'], '%Y%m%dT%H%M%S'
        ).replace(tzinfo=timezone.utc)
    
    def __str__(self):
        return str(self.path)
    
    def __repr__(self):
        return f'<OBSFile({str(self)})>'
    
    @property
    def path(self):
        return self.__path
    
    @property
    def name(self):
        return self.path.name
    
    @property
    def file_type(self):
        return self.__file_type
    
    @property
    def validity_start(self):
        return self.__validity_start

    @property
    def validity_stop(self):
        return self.__validity_stop
    
    def unzip(self, outdir=''):
        outdir = Path(outdir)
        if self.path.suffix.upper() != '.ZIP':
            return None
        with ZipFile(self.path) as zf:
            name = zf.namelist()[0]
            match = self.__PATTERN.match(name)
            if not match:
                raise ValueError(
                    f'file not valid: {str(self)}'
                )
            zf.extract(name, outdir)
        return OBSFile(outdir / name)
    
    def as_pandas(self):
        if self.path.suffix.upper() == '.ZIP':
            raise ValueError(
                f"can't read zipped file: {str(self)}"
            )
        if self.file_type == 'AUX_OBS_2_':
            pass
        else:
            pass


In [209]:
test = '/Users/luca mariani/Workspace/data/Swarm/AUX_OBS/hour/SW_OPER_AUX_OBS_2__20200101T000000_20201231T235959_0122.ZIP'

In [210]:
o = OBSFile(test)

In [211]:
o.unzip()

<OBSFile(SW_OPER_AUX_OBS_2__20200101T000000_20201231T235959_0122.txt)>