In [4]:
import geopandas
import pandas as pd
import os

In [5]:
# import geopandas got an error, and resolved using opening the jupyter notebook with anaconda navigator instead
# of powershell prompt

In [6]:
# The topic of interest
topic_dir = "/annual/kl/historical/"
print("Subdirectory on FTP Server:", topic_dir)

Subdirectory on FTP Server: /annual/kl/historical/


In [7]:
#local_ftp_dir         = "../data/original/DWD/"      # Local directory to store local ftp data copies, the local data source or input data. 
local_ftp_dir         = "data/original/DWD/"      # Local directory to store local ftp data copies, the local data source or input data. 
local_ftp_station_dir = local_ftp_dir + topic_dir # Local directory where local station info is located
local_ftp_ts_dir      = local_ftp_dir + topic_dir # Local directory where time series downloaded from ftp are located

#local_generated_dir   = "../data/generated/DWD/" # The generated of derived data in contrast to local_ftp_dir
local_generated_dir   = "data/generated/DWD/" # The generated of derived data in contrast to local_ftp_dir
local_station_dir     = local_generated_dir + topic_dir # Derived station data, i.e. the CSV file
local_ts_merged_dir   = local_generated_dir + topic_dir # Parallel merged time series, wide data frame with one TS per column
local_ts_appended_dir = local_generated_dir + topic_dir # Serially appended time series, long data frame for QGIS TimeManager Plugin

In [8]:
#import os
os.makedirs(local_ftp_dir,exist_ok = True) # it does not complain if the dir already exists.
os.makedirs(local_ftp_station_dir,exist_ok = True)
os.makedirs(local_ftp_ts_dir,exist_ok = True)

os.makedirs(local_generated_dir,exist_ok = True)
os.makedirs(local_station_dir,exist_ok = True)
os.makedirs(local_ts_merged_dir,exist_ok = True)
os.makedirs(local_ts_appended_dir,exist_ok = True)

In [9]:
# check if directories are rightly generated
print(local_ftp_dir)
print(local_ftp_station_dir)
print(local_ftp_ts_dir)
print()
print(local_generated_dir)
print(local_station_dir)
print(local_ts_merged_dir)
print(local_ts_appended_dir)

data/original/DWD/
data/original/DWD//annual/kl/historical/
data/original/DWD//annual/kl/historical/

data/generated/DWD/
data/generated/DWD//annual/kl/historical/
data/generated/DWD//annual/kl/historical/
data/generated/DWD//annual/kl/historical/


### FTP Connection

## Connection parameters

In [10]:
server = "opendata.dwd.de"
user   = "anonymous"
passwd = ""

In [11]:
# This is the search pattern common to ALL station description file names 
station_desc_pattern = "_Beschreibung_Stationen.txt"

# Below this directory tree node all climate data are stored.
ftp_climate_data_dir = "/climate_environment/CDC/observations_germany/climate/"

# The absolute ftp directory with the data (topic) of concern
ftp_dir =  ftp_climate_data_dir + topic_dir
print("Absolute FTP directory path with data of concern:", ftp_dir)

Absolute FTP directory path with data of concern: /climate_environment/CDC/observations_germany/climate//annual/kl/historical/


### FTP Connect

In [12]:
import ftplib
ftp = ftplib.FTP(server)
res = ftp.login(user=user, passwd = passwd)
print(res)

230 Login successful.


In [13]:
ret = ftp.cwd(".")

In [14]:
#ftp.quit()

### FTP Grab File Function

In [15]:
def grabFile(ftpfullname,localfullname):
    try:
        ret = ftp.cwd(".") # A dummy action to check the connection and to provoke an exception if necessary.
        localfile = open(localfullname, 'wb')
        ftp.retrbinary('RETR ' + ftpfullname, localfile.write, 1024)
        localfile.close()
    
    except ftplib.error_perm:
        print("FTP ERROR. Operation not permitted. File not found?")

    except ftplib.error_temp:
        print("FTP ERROR. Timeout.")

    except ConnectionAbortedError:
        print("FTP ERROR. Connection aborted.")

## Generate Pandas Dataframe from FTP Directory Listing

In [16]:
def gen_df_from_ftp_dir_listing(ftp, ftpdir):
    lines = []
    flist = []
    try:    
        res = ftp.retrlines("LIST "+ftpdir, lines.append)
    except:
        print("Error: ftp.retrlines() failed. ftp timeout? Reconnect!")
        return
        
    if len(lines) == 0:
        print("Error: ftp dir is empty")
        return
    
    for line in lines:
#        print(line)
        [ftype, fsize, fname] = [line[0:1], int(line[31:42]), line[56:]]
#        itemlist = [line[0:1], int(line[31:42]), line[56:]]
#        flist.append(itemlist)
        
        fext = os.path.splitext(fname)[-1]
        
        if fext == ".zip":
            station_id = int(fname.split("_")[2])
        else:
            station_id = -1 
        
        flist.append([station_id, fname, fext, fsize, ftype])
        
        

    df_ftpdir = pd.DataFrame(flist,columns=["station_id", "name", "ext", "size", "type"])
    return(df_ftpdir)

In [17]:
df_ftpdir = gen_df_from_ftp_dir_listing(ftp, ftp_dir)

In [18]:
df_ftpdir.head(10)

Unnamed: 0,station_id,name,ext,size,type
0,-1,KL_Jahreswerte_Beschreibung_Stationen.txt,.txt,240187,-
1,1,jahreswerte_KL_00001_19310101_19860630_hist.zip,.zip,12989,-
2,3,jahreswerte_KL_00003_18510101_20110331_hist.zip,.zip,20125,-
3,44,jahreswerte_KL_00044_19710301_20211231_hist.zip,.zip,16176,-
4,52,jahreswerte_KL_00052_19730101_20011231_hist.zip,.zip,13759,-
5,61,jahreswerte_KL_00061_19750701_19780831_hist.zip,.zip,9130,-
6,70,jahreswerte_KL_00070_19730601_19860930_hist.zip,.zip,9680,-
7,71,jahreswerte_KL_00071_19861101_20191231_hist.zip,.zip,14916,-
8,72,jahreswerte_KL_00072_19781001_19950531_hist.zip,.zip,12892,-
9,73,jahreswerte_KL_00073_19530101_20211231_hist.zip,.zip,17379,-


Dataframe with TS Zip Files

In [19]:
#df_ftpdir["ext"]==".zip"
df_zips = df_ftpdir[df_ftpdir["ext"]==".zip"]
df_zips.set_index("station_id", inplace = True)
df_zips.head(10)

Unnamed: 0_level_0,name,ext,size,type
station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,jahreswerte_KL_00001_19310101_19860630_hist.zip,.zip,12989,-
3,jahreswerte_KL_00003_18510101_20110331_hist.zip,.zip,20125,-
44,jahreswerte_KL_00044_19710301_20211231_hist.zip,.zip,16176,-
52,jahreswerte_KL_00052_19730101_20011231_hist.zip,.zip,13759,-
61,jahreswerte_KL_00061_19750701_19780831_hist.zip,.zip,9130,-
70,jahreswerte_KL_00070_19730601_19860930_hist.zip,.zip,9680,-
71,jahreswerte_KL_00071_19861101_20191231_hist.zip,.zip,14916,-
72,jahreswerte_KL_00072_19781001_19950531_hist.zip,.zip,12892,-
73,jahreswerte_KL_00073_19530101_20211231_hist.zip,.zip,17379,-
78,jahreswerte_KL_00078_19610101_20211231_hist.zip,.zip,13962,-


Download the Station Description File

In [20]:
station_fname = df_ftpdir[df_ftpdir['name'].str.contains(station_desc_pattern)]["name"].values[0]
print(station_fname)

# ALternative
#station_fname2 = df_ftpdir[df_ftpdir["name"].str.match("^.*Beschreibung_Stationen.*txt$")]["name"].values[0]
#print(station_fname2)

KL_Jahreswerte_Beschreibung_Stationen.txt


In [21]:
print("grabFile: ")
print("From: " + ftp_dir + station_fname)
print("To:   " + local_ftp_station_dir + station_fname)
grabFile(ftp_dir + station_fname, local_ftp_station_dir + station_fname)

grabFile: 
From: /climate_environment/CDC/observations_germany/climate//annual/kl/historical/KL_Jahreswerte_Beschreibung_Stationen.txt
To:   data/original/DWD//annual/kl/historical/KL_Jahreswerte_Beschreibung_Stationen.txt
FTP ERROR. Timeout.
