In [1]:
# Download the latest heating degree day data from the DWD FTP server
import os
import requests
from bs4 import BeautifulSoup

url = 'https://opendata.dwd.de/climate_environment/CDC/derived_germany/techn/daily/heating_degreedays/hdd_3807/recent/'
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Use BeautifulSoup to parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Set the output directory
    output_dir = '../data/hdd'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through the links in the HTML
    for link in soup.find_all('a'):
        file_name = link.get('href')
        if file_name.endswith('.csv'):
            print(f'Downloading: {file_name}')
            
            # Download the CSV file and save it in the output directory
            file_response = requests.get(url + file_name)
            with open(os.path.join(output_dir, file_name), 'wb') as output_file:
                output_file.write(file_response.content)
else:
    print(f'Error: {response.status_code}')


PermissionError: [Errno 13] Permission denied: '/Users/binbai'

In [5]:
# Process the downloaded CSV files to calculate the average HDD for each day
import os
import pandas as pd
from datetime import datetime

# Specify the folder containing the CSV files
folder_path = "../data/hdd"

# List all files in the folder
files = os.listdir(folder_path)

# Initialize an empty list to store the results
results = []

# Iterate through the files and read the CSVs
for file in files:
    if file.endswith('.csv'):
        file_path = os.path.join(folder_path, file)

        # Read the CSV into a DataFrame and skip the first three rows (header)
        df = pd.read_csv(file_path, sep=';', skiprows=3)

        # Extract the year and month from the file name
        year = int(file[-10:-6])
        month = int(file[-6:-4])

        # Calculate the average HDD for each day
        daily_average = df.iloc[:, 4:].mean()

        # Create a list of dates for the month
        days_in_month = len(daily_average)
        dates = pd.date_range(datetime(year, month, 1), periods=days_in_month)

        # Combine the dates and daily averages into a new DataFrame
        df_result = pd.DataFrame({'Date': dates, 'Average_HDD': daily_average})
        results.append(df_result)

# Concatenate the results into a single DataFrame
results_df = pd.concat(results)

# Sort the DataFrame by the 'Date' column
results_df = results_df.sort_values('Date')

# Reset the index and drop the old index
results_df = results_df.reset_index(drop=True)

# Save the results to a new CSV file
results_df.to_csv('average_hdd_data.csv', index=False)


In [2]:
# Check if the date is a weekend or holiday in Germany from the given CSV file.
import pandas as pd
import holidays

def is_weekend_or_holiday(date_str):
    # Convert the string to a datetime object
    date_obj = pd.to_datetime(date_str)
    # Check if the date is a weekend
    if date_obj.weekday() >= 5:
        return 1
    # Check if the date is a German holiday
    de_holidays = holidays.Germany(years=date_obj.year)
    if date_obj in de_holidays:
        return 1
    return 0

# Read the CSV into a DataFrame
df = pd.read_csv('../data/daily_NGD.csv')

# Apply the function on the "Date" column
df['Is_Weekend_or_Holiday'] = df['Date'].apply(is_weekend_or_holiday)

# Save the DataFrame back to the CSV file
df.to_csv("../data/daily_NGD.csv", index=False)
