In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv('Past_PV_Power_data_50kW.csv')

# Convert 'time' column to datetime format
data['time'] = pd.to_datetime(data['time'], format='%Y%m%d:%H%M')

# Prepare the dataset for modeling
X = data[['G(i)', 'T2m', 'WS10m']]  # Exclude 'H_sun' from the features since it is not included in met.ie API. Not an important feature anyway
y = data['P']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #random_state sets the same train-test split each time the code is run. 
                                                                                        #Without this, each call could result in different splits, which may lead to variations in model performance

# Initialize and train the Gradient Boosting Regressor
gbm_basic = GradientBoostingRegressor(n_estimators=500, learning_rate=0.1, max_depth=3, random_state=42) #n_estimators = number of regression trees. 
gbm_basic.fit(X_train, y_train) #GBM learns from the training data

# Predict on the testing set
y_pred_basic = gbm_basic.predict(X_test) #Predicts values for y based on test features. 

# Calculate RMSE for the predictions
rmse_basic = np.sqrt(mean_squared_error(y_test, y_pred_basic)) #y_test is actual values for y

#Calculate the RMSE for the training data. Want to make sure that the model is not overfitting
y_train_pred = gbm_basic.predict(X_train)
rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))


print(f'Training RMSE: {rmse_train} Watts')
print(f'Testing RMSE: {rmse_basic} Watts')


Training RMSE: 131.71596229261036 Watts
Testing RMSE: 139.1060468819065 Watts


# Only focusing on Forecast A in the API

In [2]:
import requests
import xml.etree.ElementTree as ET

# Parse the forecast data
api_url = "https://metwdb-openaccess.ichec.ie/metno-wdb2ts/locationforecast?lat=52.519;long=-8.712;from=2024-03-01T00:00;to=2024-03-02T00:00"
response = requests.get(api_url)
xml_data = ET.fromstring(response.content)

# Prepare lists to hold the extracted forecast data
global_radiation = []
wind_speed = []
air_temperature = []
times = []

# Iterate over each time element in the forecast data
for time_element in xml_data.findall(".//time"):
    from_time = time_element.get('from')
    to_time = time_element.get('to')
    
    # Only process Block A where 'from' and 'to' times are the same
    if from_time == to_time:
        for location_element in time_element.findall(".//location"):
            temperature_element = location_element.find(".//temperature")
            if temperature_element is not None:
                air_temperature.append(float(temperature_element.get('value')))
            else:
                air_temperature.append(np.nan)  #Handles any missing values

            wind_speed_element = location_element.find(".//windSpeed")
            if wind_speed_element is not None:
                wind_speed.append(float(wind_speed_element.get('mps')))
            else:
                wind_speed.append(np.nan)

            global_radiation_element = location_element.find(".//globalRadiation")
            if global_radiation_element is not None:
                global_radiation.append(float(global_radiation_element.get('value')))
            else:
                global_radiation.append(0)  # Assuming night time or no data available

            times.append(from_time)

# Prepare the forecast DataFrame
forecast_df = pd.DataFrame({
    'time': times,
    'G(i)': global_radiation,
    'WS10m': wind_speed,
    'T2m': air_temperature
})


# Correction factor for adjusting API global radiation values to match PVGIS data. PVGIS is at angle, API assumes horizontal measurement of global radiation
# Value for correction_factor found through trial an error to match average global radiation data for month from PVGIS data. This correction factor is to be confirmed
correction_factor = 1.3


# Apply the correction factor to the global radiation values
forecast_df['G(i)'] = forecast_df['G(i)'] * correction_factor



#Forecast
X_forecast = forecast_df[['G(i)', 'T2m', 'WS10m']]

# Predict future PV power output using the prepared forecast data
predicted_pv_output = gbm_basic.predict(X_forecast)

# Set any PV power output below 50 watts to 0. 
predicted_pv_output_adjusted = np.where(predicted_pv_output < 50, 0, predicted_pv_output)

# Create a DataFrame for the predicted output with adjusted values
output_df = pd.DataFrame({
    'time': forecast_df['time'],
    'predicted_PV_output': predicted_pv_output_adjusted
})

# Export predicted values to a CSV file
output_csv_path = 'future_PV_power_predictions.csv'
output_df.to_csv(output_csv_path, index=False)

print(f'Predicted PV power output for future dates has been saved to {output_csv_path}')


Predicted PV power output for future dates has been saved to future_PV_power_predictions.csv
