# RES Timeseries Data Extraction


This Jupyter Notebook downloads and cleans up solar and wind timeseries from the German TSOs TransnetBW and Tennet.

## Loading some python libraries needed later

In [1]:
import urllib
import os
import pandas as pd
import numpy as np
import io

## Configuring URLs

In [43]:
conf = {
    'transnetbw': {
        'pv':  "https://www.transnetbw.de/de/kennzahlen/erneuerbare-energien/fotovoltaik?app=solar&activeTab=csv&selectMonatDownload={month}&view=1&download=true",
        'wind':  'https://www.transnetbw.de/de/kennzahlen/erneuerbare-energien/windenergie?app=wind&activeTab=csv&selectMonatDownload={month}&view=1&download=true',
    }
    ,'tennet': {
        'pv':  "http://www.tennettso.de/site/de/phpbridge?commandpath=Tatsaechliche_und_prognostizierte_Solarenergieeinspeisung%2FmonthDataSheetCsv.php&sub=total&querystring=monat%3D{year}-{month:02d}&contenttype=text%2Fx-csv",
        'wind':  "http://www.tennettso.de/site/de/phpbridge?commandpath=Tatsaechliche_und_prognostizierte_Windenergieeinspeisung%2FmonthDataSheetCsv.php&querystring=monat%3D{year}-{month:02d}&contenttype=text%2Fx-csv",
    },
}

## Downloading the data

Here we loop through the configuration defined above by TSO (transnetbw, tennet) and Technology (pv, wind).

In [44]:
def urls(years, months, url_templates):
    for year in years:
        for month in months:
            for url in url_templates:
                yield url.format(**{'year': year, 'month': month})
                                        
def contents(urls):
    for url in urls:
        yield io.BytesIO(urllib.request.urlopen(url).read())

## Define individual read functions

The TSOs have different columns and date formats they use in their CSV files. Here we define individual functions for the two TSOs to read-in a single monthly file in their specific format.

In [45]:
TENNET_ID = 'tennet'
TRANSNET_ID = 'transnet'

def tennet_data_sets(contents):
    for i, content in enumerate(contents):
        yield pd.read_csv(
            content,
            sep=";",
            skiprows=4,
            names=['datum', 'position', 'forecast_{}_{:d}'.format(TENNET_ID, i), 'actual_{}_{}'.format(TRANSNET_ID, i)],
            usecols=[0, 1, 2, 3]
        )
        
def transnet_data_sets(contents):
    for i, content in enumerate(contents):
        forecast_col = 'forecast_{}_{}'.format(TRANSNET_ID, i)
        actual_col = 'actual_{}_{}'.format(TRANSNET_ID, i)
        data = pd.read_csv(
            content,
            decimal=',',
            sep=";",
            parse_dates={'Timestamp' : ['Datum von', 'Uhrzeit von']},
            index_col="Timestamp",
            dayfirst=True,
            keep_date_col=False,
            usecols=[0, 1, 4, 5]
        )
        data.rename(
            columns={'Prognose (MW)': forecast_col, 'Ist-Wert (MW)': actual_col},
            inplace=True
        )
        yield data
        
def forwards_fill_na(data_sets):
    for data_set in data_sets:
        data_set.fillna(method='ffill', inplace=True)
        yield data_set

def fill_na_with_zero(data_sets):
    for data_set in data_sets:
        data_set.fillna(0, inplace=True)
        
def extract_time(data_sets):
    for data_set in data_sets:
        data_set['hour'] = (np.trunc((data_set['position']-1) /4)).astype(int).astype(str)
        data_set['minute'] = (((data_set['position']-1) % 4)*15).astype(int).astype(str)
        data_set['time'] = data_set['datum']+' '+data_set['hour']+':'+data_set['minute']
        data_set['Timestamp'] = pd.to_datetime(data_set['time'])
        data_set.set_index('Timestamp',inplace=True)
        del data_set['position']
        del data_set['hour'] 
        del data_set['minute'] 
        del data_set['datum'] 
        del data_set['time'] 
        yield data_set


## Testing the read functions

If you want to test the individual read functions just for checking if everything works, you can uncomment the line below.

In [None]:
# transnetTestDataWind = readData_transnet("csv/transnetbw/wind/7.csv", 'transnetbw', 'wind')

# And now output the TestData:
# transnetTestDataWind

## Loading the data into memory and cleaning it up



In [46]:
tennet_urls = urls([2015], [1, 2], conf['tennet'].values())
transnetbw_urls = urls([2015], range(1, 2), conf['transnetbw'].values())

In [47]:
tennet_sets = extract_time(forwards_fill_na(tennet_data_sets(contents(urls([2014], [1, 2], conf['tennet'].values())))))
transnetbw_sets = fill_na_with_zero(transnet_data_sets(contents(urls([2015], [1, 2], conf['transnetbw'].values))))

In [48]:
all_data_sets = pd.DataFrame()
for data_set in tennet_sets:
    all_data_sets = all_data_sets.combine_first(data_set)                  

## Validation and Plausibility Checks

Ideally you would do plausibility checks on your data here. (Check for missing data, check that output is never negative, etc.)

In [None]:
# Here you would do plausibility checks...
if any

## Save to one big CSV file

In [None]:
all_data_sets.to_csv("allData.csv")

## Display the data

Something seems to be broken with the Tennet wind data... Other than that, the time spans of the different TSO datasets read-in above are different.

In [49]:
all_data_sets

Unnamed: 0_level_0,actual_transnet_0,actual_transnet_1,actual_transnet_2,actual_transnet_3,forecast_tennet_0,forecast_tennet_1,forecast_tennet_2,forecast_tennet_3
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-01-01 00:00:00,4621,0,,,4091,0,,
2015-01-01 00:15:00,4549,0,,,4182,0,,
2015-01-01 00:30:00,4487,0,,,4273,0,,
2015-01-01 00:45:00,4457,0,,,4365,0,,
2015-01-01 01:00:00,4403,0,,,4427,0,,
2015-01-01 01:15:00,4357,0,,,4456,0,,
2015-01-01 01:30:00,4295,0,,,4484,0,,
2015-01-01 01:45:00,4352,0,,,4513,0,,
2015-01-01 02:00:00,4384,0,,,4534,0,,
2015-01-01 02:15:00,4413,0,,,4545,0,,


## Example: Resample the timeseries data to 60minute

Here we use Pandas' powerful timeseries functionality to resample the data to 60minute intervals 

In [None]:
resultDataSet.resample('60Min',how='sum')