In [8]:
# from scraping.proxygen import get_link
import json
import os
import requests
from os.path import exists, join
from cytoolz import itertoolz as tit
from operator import itemgetter
from statistics import mean, median, stdev
from concurrent.futures import ThreadPoolExecutor, wait
import pandas as pd
from altair import Chart

In [2]:
def save_data(url, savedir):
    if not exists(savedir):
        os.makedirs(savedir, exist_ok=True)
    try:
        headers = {
            'accept': 'application/json,*/*',
            'Referer': 'https://www.energy-charts.de/',
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36',
        }
        fn = url.rsplit('/', 1)[1]
        # get_link(url, header=headers)
        r = requests.get(url, headers=headers)
        try:
            data = r.json()
            with open(join(savedir, fn), 'w') as f:
                json.dump(data, f)
        except:
            pass
    except:
        pass

In [3]:
def generate_b1_urls():
    # init for gen_urls
    urls = [
        'https://www.energy-charts.de/power/week_k_YEAR_WEEK.json',
        'https://www.energy-charts.de/power/week_YEAR_WEEK.json',
        'https://www.energy-charts.de/power/week_exp_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_nuclear_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power/week_sw_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_run_of_river_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_lignite_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_lignite_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_hard_coal_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_oil_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_gas_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_garbage_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_pumped_storage_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_wind_offshore_unit_YEAR_WEEK.json',
        'https://www.energy-charts.de/power_unit/week_wind_onshore_unit_YEAR_WEEK.json'
    ]
    y_urls = [
        'https://www.energy-charts.de/power_inst/dp_inst_year_YEAR.json',
        'https://www.energy-charts.de/power_inst/dp_inst_month_YEAR.json',
        'https://www.energy-charts.de/power_inst/p_inst_month_YEAR.json',
        'https://www.energy-charts.de/power_inst/p_inst_year_YEAR.json'
    ]
    weeks = []
    for x in range(1,52):
        t = str(x)
        if len(t) == 1:
            t = '0' + t
        weeks.append(t)
    years = list(range(2011,2018))
    years = [str(x) for x in years]
    
    #  generate first batch of urls
    gen_urls = []
    for y in years:
        for x in y_urls:
            gen_urls.append(x.replace('YEAR', y))
        for w in weeks:
            for z in urls:
                gen_urls.append(z.replace('YEAR', y).replace('WEEK', w))
    return gen_urls

            
def generate_b2_urls():
    years = ['2015', '2016', '2017']
    months = list(range(1,13))
    for i, x in enumerate(months):
        x = str(x)
        if len(x) == 1:
            x = '0' + x
        months[i] = x
        
    # generate pie_urls
    pie_urls = []
    for x in years:
        for y in months:
            if x == '2017' and int(y) > 2:
                continue
            pie_urls.append('https://www.energy-charts.de/energy_pie/month_pie_{}_{}.json'.format(x, y))
    return pie_urls

In [4]:
def download_batch_one():
    gen_urls = generate_b1_urls()
    tp = ThreadPoolExecutor(max_workers=8)
    futures = []
    for x in gen_urls:
        futures.append(tp.submit(save_data, x, 'datadump'))
    wait(futures)
    tp.shutdown()
    

def download_batch_two():
    pie_urls = generate_b2_urls()
    tp = ThreadPoolExecutor(max_workers=8)
    futures = []
    for x in pie_urls[0:16]:
        futures.append(tp.submit(save_data, x, 'pie/month'))
    wait(futures)
    tp.shutdown()
    
    # get monthly installed capacity data
    cap_url = 'https://www.energy-charts.de/power_inst/p_inst_month.json'
    save_data(cap_url, 'datadump')

In [5]:
def process_data():
    with open('datadump/p_inst_month.json', 'r') as f:
        month_capacity = json.load(f)

    wind_capacity_raw = month_capacity[1]
    solar_capacity_raw = month_capacity[2]

    solarcap = []
    for x in solar_capacity_raw['values']:
        month, year = x[0].split('.')
        yearmonth = int(str(year) + str(month))
        solarcap.append({
            'gw': x[1],
            'yearmonth': yearmonth
        })
    windcap = []
    for x in wind_capacity_raw['values']:
        if x[1]:
            month, year = x[0].split('.')
            yearmonth = int(str(year) + str(month))
            windcap.append({
                'gw': x[1],
                'yearmonth': yearmonth
            })
    windcap = [x for x in windcap if x['gw']]
    solarcap = [x for x in solarcap if x['gw']]

    files = ['pie/month/' + x for x in os.listdir('pie/month/')]
    monthgen = []
    for x in files:
        with open(x, 'r') as f:
            monthgen.append(json.load(f))

    windgen = []
    solargen = []
    for mg in monthgen:
        windgen.append(mg[2])
        solargen.append(mg[3])

    files = [x.replace('pie/month/', '') for x in files]
    for i, sw in enumerate(zip(solargen, windgen)):
        s = sw[0]
        w = sw[1]
        s['tech'] = 'solar'
        s['gen_twh_month'] = s['value']
        s['gen_gwh_month'] = s['value'] * 1000
        w['tech'] = 'wind'
        w['gen_twh_month'] = w['value']
        w['gen_gwh_month'] = w['value'] * 1000
        year, month = files[i].replace('.json', '').replace('month_pie_', '').split('_')
        yearmonth = int(str(year) + str(month))
        w['yearmonth'] = yearmonth
        s['yearmonth'] = yearmonth
        del w['key']
        del s['key']
        del w['color']
        del s['color']

    solargen = sorted(solargen, key=itemgetter('yearmonth'))
    windgen = sorted(windgen, key=itemgetter('yearmonth'))

    month_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    month_hours = []
    for x in month_days:
        month_hours.append(x * 24)

    for i, x in enumerate(solarcap):
        n = i
        while n > 11:
            n = n - 12
        gwh = x['gw'] * month_hours[n]
        solarcap[i].update({
            'gwh': gwh,
            'twh': gwh / 1000
        })

    for i, x in enumerate(windcap):
        n = i
        while n > 11:
            n = n - 12
        gwh = x['gw'] * month_hours[n]
        windcap[i].update({
            'gwh': gwh,
            'twh': gwh / 1000
        })

    solar_comp = []
    for i, x in enumerate(solargen):
        for n, y in enumerate(solarcap):
            if x['yearmonth'] == y['yearmonth']:
                solargen[i].update({
                    'gwh_capacity': y['gwh'],
                    'twh_capacity': y['twh']
                })
                percent = x['gen_gwh_month'] / y['gwh']
                solar_comp.append(solargen[i])
                solar_comp[-1].update({
                    'cap_util': percent
                })

    wind_comp = []
    for i, x in enumerate(windgen):
        for n, y in enumerate(windcap):
            if x['yearmonth'] == y['yearmonth']:
                windgen[i].update({
                    'gwh_capacity': y['gwh'],
                    'twh_capacity': y['twh']
                })
                percent = x['gen_gwh_month'] / y['gwh']
                wind_comp.append(windgen[i])
                wind_comp[-1].update({
                    'cap_util': percent
                })

    all_solar_util = []
    all_solar_cap = []
    all_solargen = []
    for x in solar_comp:
        all_solar_util.append(x['cap_util'])
        all_solar_cap.append(x['twh_capacity'])
        all_solargen.append(x['gen_twh_month'])

    all_wind_util = []
    all_wind_cap = []
    all_wind_gen = []
    for x in wind_comp:
        all_wind_util.append(x['cap_util'])
        all_wind_cap.append(x['twh_capacity'])
        all_wind_gen.append(x['gen_twh_month'])

    print('Capacity Factor of German Solar (Monthly):')
    print()

    solar_plate_max = float("{0:.2f}".format(max([x['twh'] for x in solarcap])))
    solar_plate_median = float("{0:.2f}".format(median([x['twh'] for x in solarcap])))
    solar_plate_mean = float("{0:.2f}".format(mean([x['twh'] for x in solarcap])))
    print('    Nameplate Generation Max (calculated tw/h per month):\t' + str(solar_plate_max))
    print('    Nameplate Generation Median (calculated tw/h per month):\t' + str(solar_plate_median))
    print('    Nameplate Generation Mean (calculated tw/h per month):\t' + str(solar_plate_mean))
    print()

    # source: https://www.energy-charts.de/energy_pie.htm
    # maxgen in July 2015
    solar_prod_max = float("{0:.2f}".format(max(all_solargen)))
    solar_prod_median = float("{0:.2f}".format(median(all_solargen)))
    solar_prod_mean = float("{0:.2f}".format(mean(all_solargen)))
    print('    Actual Production Max (tw/h per month):\t\t\t' + str(solar_prod_max))
    print('    Actual Production Median (tw/h per month):\t\t\t' + str(solar_prod_median))
    print('    Actual Production Mean (tw/h per month):\t\t\t' + str(solar_prod_mean))
    print()

    solar_util_mean = float("{0:.2f}".format(mean(all_solar_util)*100))
    solar_util_median = float("{0:.2f}".format(median(all_solar_util)*100))
    solar_util_stdev = float("{0:.2f}".format(stdev(all_solar_util)*100))
    print('    Median Capacity Factor (%):\t\t\t\t\t' + str(float("{0:.2f}".format(median(all_solar_util)*100))) + '%')
    print('    Mean Capacity Factor (%):\t\t\t\t\t' + str(solar_util_mean) + '%')
    print('    1 Std. Dev. Cap. Factor (percentage points):\t\t' + str(solar_util_stdev))
    print()

    solar_util_max = float("{0:.2f}".format(max(all_solar_util)))
    solar_util_min = float("{0:.2f}".format(min(all_solar_util)))
    print('    Highest Single Month Cap. Factor (%):\t\t\t' + str(solar_util_max * 100) + '%')
    print('    Lowest Single Month Cap. Factor (%):\t\t\t' + str(solar_util_min * 100) + '%')
    print()
    print('\t\t------------------------------------')
    print()

    print('Capacity Factor of German Wind (Monthly):')
    print()

    wind_plate_max = float("{0:.2f}".format(max([x['twh'] for x in windcap])))
    wind_plate_median = float("{0:.2f}".format(median([x['twh'] for x in windcap])))
    wind_plate_mean = float("{0:.2f}".format(mean([x['twh'] for x in windcap])))
    print('    Nameplate Generation Max (calculated tw/h per month):\t' + str(wind_plate_max))
    print('    Nameplate Generation Median (calculated tw/h per month):\t' + str(wind_plate_median))
    print('    Nameplate Generation Mean (calculated tw/h per month):\t' + str(wind_plate_mean))
    print()

    # source: https://www.energy-charts.de/energy_pie.htm
    # maxgen in July 2015
    wind_prod_max = float("{0:.2f}".format(max(all_wind_gen)))
    wind_prod_median = float("{0:.2f}".format(median(all_wind_gen)))
    wind_prod_mean = float("{0:.2f}".format(mean(all_wind_gen)))
    print('    Actual Production Max (tw/h per month):\t\t\t' + str(wind_prod_max))
    print('    Actual Production Median (tw/h per month):\t\t\t' + str(wind_prod_median))
    print('    Actual Production Mean (tw/h per month):\t\t\t' + str(wind_prod_mean))
    print()

    wind_util_mean = float("{0:.2f}".format(mean(all_wind_util)*100))
    wind_util_median = float("{0:.2f}".format(median(all_wind_util)*100))
    wind_util_stdev = float("{0:.2f}".format(stdev(all_wind_util)*100))
    print('    Median Capacity Factor (%):\t\t\t\t\t' + str(float("{0:.2f}".format(median(all_wind_util)*100))) + '%')
    print('    Mean Capacity Factor (%):\t\t\t\t\t' + str(wind_util_mean) + '%')
    print('    1 Std. Dev. Cap. Factor (percentage points):\t\t' + str(wind_util_stdev))
    print()

    wind_util_max = float("{0:.2f}".format(max(all_wind_util)))
    wind_util_min = float("{0:.2f}".format(min(all_wind_util)))
    print('    Highest Single Month Cap. Factor (%):\t\t\t' + str(wind_util_max * 100) + '%')
    print('    Lowest Single Month Cap. Factor (%):\t\t\t' + str(wind_util_min * 100) + '%')

In [6]:
if __name__ == '__main__':
    # note: batch_one is about 2100 files...
    # download_batch_one()
    
#     download_batch_two()
    process_data()

Capacity Factor of German Solar (Monthly):

    Nameplate Generation Max (calculated tw/h per month):	30.48
    Nameplate Generation Median (calculated tw/h per month):	24.4
    Nameplate Generation Mean (calculated tw/h per month):	20.42

    Actual Production Max (tw/h per month):			5.46
    Actual Production Median (tw/h per month):			3.19
    Actual Production Mean (tw/h per month):			3.09

    Median Capacity Factor (%):					11.23%
    Mean Capacity Factor (%):					10.74%
    1 Std. Dev. Cap. Factor (percentage points):		6.41

    Highest Single Month Cap. Factor (%):			19.0%
    Lowest Single Month Cap. Factor (%):			2.0%

		------------------------------------

Capacity Factor of German Wind (Monthly):

    Nameplate Generation Max (calculated tw/h per month):	33.86
    Nameplate Generation Median (calculated tw/h per month):	29.5
    Nameplate Generation Mean (calculated tw/h per month):	29.61

    Actual Production Max (tw/h per month):			11.22
    Actual Production Median (tw