http://reports.ieso.ca/docrefs/helpfile/GenOutputCapability_h4.pdf
20 MW and greater only, if telemetry error, power will be N/A, outage power will be 0

http://reports.ieso.ca/public/GenOutputCapability/PUB_GenOutputCapability.xml

In [19]:
from xml.etree import ElementTree as ET
import pandas as pd

# Load and parse the XML file
file_path = 'PUB_GenOutputCapability.xml'
tree = ET.parse(file_path)
root = tree.getroot()

# Define the namespace to access the elements correctly
ns = {'imo': 'http://www.theIMO.com/schema'}

site = "ABKENORA"

# Initialize a list to store the data
data = []

# Find the generator element for the specified site
for generator in root.findall('.//imo:Generator', ns):
    if generator.find('.//imo:GeneratorName', ns).text == site:
        # Extract fuel type once, assuming it's the same for all entries
        fuel_type = generator.find('.//imo:FuelType', ns).text

        # Combine the logic for Outputs and Capabilities into a single loop
        for output in generator.findall('.//imo:Outputs/imo:Output', ns):
            hour = output.find('.//imo:Hour', ns).text
            energy_mw = output.find('.//imo:EnergyMW', ns).text
            
            # Initialize capability data to None to handle cases where it might be missing
            capability_mw = None
            capability = generator.find(f'.//imo:Capabilities/imo:Capability[imo:Hour="{hour}"]', ns)
            if capability is not None:
                capability_mw = capability.find('.//imo:EnergyMW', ns).text

            data.append({
                'Generator Name': site,
                'Fuel Type': fuel_type,
                'Hour': hour,
                'EnergyMW': energy_mw,
                'CapabilityMW': capability_mw
            })

# Convert the list to a DataFrame
df = pd.DataFrame(data)

In [18]:
import requests
import os

# URL of the file to download
url = 'http://reports.ieso.ca/public/GenOutputCapability/PUB_GenOutputCapability.xml'

# Path where the file will be saved
file_path = 'PUB_GenOutputCapability.xml'

# Check if the file already exists
if os.path.exists(file_path):
    # Delete the existing file
    os.remove(file_path)

# Send a GET request to the URL
response = requests.get(url)

# Ensure the request was successful (HTTP status code 200)
if response.status_code == 200:
    # Write the content of the response to a new file
    with open(file_path, 'wb') as file:
        file.write(response.content)
    print("File downloaded and saved successfully.")
else:
    print(f"Failed to download the file. HTTP status code: {response.status_code}")


File downloaded and saved successfully.


In [20]:
df

Unnamed: 0,Generator Name,Fuel Type,Hour,EnergyMW,CapabilityMW
0,ABKENORA,HYDRO,1,14,11
1,ABKENORA,HYDRO,2,14,11
2,ABKENORA,HYDRO,3,14,11
3,ABKENORA,HYDRO,4,14,11
4,ABKENORA,HYDRO,5,14,11
5,ABKENORA,HYDRO,6,14,11
6,ABKENORA,HYDRO,7,14,11
7,ABKENORA,HYDRO,8,13,11
8,ABKENORA,HYDRO,9,13,11
9,ABKENORA,HYDRO,10,14,11


In [21]:
from xml.etree import ElementTree as ET
import pandas as pd

# Load and parse the XML file
file_path = 'PUB_GenOutputCapability.xml'
tree = ET.parse(file_path)
root = tree.getroot()

# Define the namespace to access the elements correctly
ns = {'imo': 'http://www.theIMO.com/schema'}

# Define a list of sites
sites = ["ABKENORA", "STEWARTVLE", "PRINCEFARM"]  # Add your site names here

# Initialize a list to store the data
data = []

# Iterate over each site in the list
for site in sites:
    # Find the generator element for the specified site
    for generator in root.findall('.//imo:Generator', ns):
        generator_name = generator.find('.//imo:GeneratorName', ns).text
        if generator_name in sites:
            # Extract fuel type once, assuming it's the same for all entries
            fuel_type = generator.find('.//imo:FuelType', ns).text

            # Combine the logic for Outputs and Capabilities into a single loop
            for output in generator.findall('.//imo:Outputs/imo:Output', ns):
                hour = output.find('.//imo:Hour', ns).text
                energy_mw = output.find('.//imo:EnergyMW', ns).text
                
                # Initialize capability data to None to handle cases where it might be missing
                capability_mw = None
                capability = generator.find(f'.//imo:Capabilities/imo:Capability[imo:Hour="{hour}"]', ns)
                if capability is not None:
                    capability_mw = capability.find('.//imo:EnergyMW', ns).text

                data.append({
                    'Generator Name': generator_name,
                    'Fuel Type': fuel_type,
                    'Hour': hour,
                    'EnergyMW': energy_mw,
                    'CapabilityMW': capability_mw
                })

# Convert the list to a DataFrame
df = pd.DataFrame(data)

# Display or return the DataFrame
print(df)

    Generator Name Fuel Type Hour EnergyMW CapabilityMW
0         ABKENORA     HYDRO    1       14           11
1         ABKENORA     HYDRO    2       14           11
2         ABKENORA     HYDRO    3       14           11
3         ABKENORA     HYDRO    4       14           11
4         ABKENORA     HYDRO    5       14           11
..             ...       ...  ...      ...          ...
193     PRINCEFARM      WIND   18       61           55
194     PRINCEFARM      WIND   19       79           73
195     PRINCEFARM      WIND   20       91           88
196     PRINCEFARM      WIND   21       92           92
197     PRINCEFARM      WIND   22       67           69

[198 rows x 5 columns]


In [1]:
from xml.etree import ElementTree as ET
import pandas as pd

def parse_xml_for_sites(sites : list) -> pd.DataFrame:
    # Load and parse the XML file
    file_path = 'PUB_GenOutputCapability.xml'
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Define the namespace to access the elements correctly
    ns = {'imo': 'http://www.theIMO.com/schema'}

    # Initialize a list to store the data
    data = []

    # Iterate over each site in the list
    for site in sites:
        # Find the generator element for the specified site
        for generator in root.findall('.//imo:Generator', ns):
            generator_name = generator.find('.//imo:GeneratorName', ns).text
            if generator_name == site:
                # Extract fuel type once, assuming it's the same for all entries
                fuel_type = generator.find('.//imo:FuelType', ns).text

                # Loop to collect data
                for output in generator.findall('.//imo:Outputs/imo:Output', ns):
                    hour = output.find('.//imo:Hour', ns).text
                    energy_mw = output.find('.//imo:EnergyMW', ns).text
                    capability_mw = None
                    capability = generator.find(f'.//imo:Capabilities/imo:Capability[imo:Hour="{hour}"]', ns)
                    if capability is not None:
                        capability_mw = capability.find('.//imo:EnergyMW', ns).text

                    data.append({
                        'Generator Name': generator_name,
                        'Fuel Type': fuel_type,
                        'Hour': int(hour),  # Cast hour to int for comparison
                        'EnergyMW': energy_mw,
                        'CapabilityMW': capability_mw
                    })

    # Convert the list to a DataFrame
    df = pd.DataFrame(data)

    # Filter for the highest available hour for each site
    df_filtered = df.loc[df.groupby('Generator Name')['Hour'].idxmax()]

    return df_filtered

In [2]:
# Define a list of sites
sites =  ["ABKENORA", "STEWARTVLE", "PRINCEFARM"] # Example sites

# Example file path, replace 'PUB_GenOutputCapability.xml' with your actual path


# Call the function and print the result
df_result = parse_xml_for_sites(sites)
print(df_result)

   Generator Name Fuel Type  Hour EnergyMW CapabilityMW
21       ABKENORA     HYDRO    22       13           11
65     PRINCEFARM      WIND    22       67           69
43     STEWARTVLE     HYDRO    22       67          182
