In [51]:
import pandas as pd
import os
import re
import plotly.graph_objects as go
import plotly.express as px

## Quellen
- [Link zum Download der Remit Dateien](https://bmrs.elexon.co.uk/remit?timeRangeType=Custom&timeRangeLength=Custom&assetID=T_HOWAO-1&endpoint=published&revisionHistory=include-history&from=2024-01-01T07%3A30%3A00.000Z&to=2025-01-01T07%3A30%3A00.000Z&messageType=Unavailabilities+of+Electricity+Facilities%2COther+Market+Information&unavailabilityType=Planned%2CUnplanned&eventType=Production+Unavailability%2CTransmission+Unavailability%2CConsumption+Unavailability%2COther+Unavailability)

- [Dokumenation BMRS API and Data Push User Guide](https://assets.elexon.co.uk/wp-content/uploads/2018/09/28160411/BMRS-API-Data-Push-User-Guide.pdf)

In [52]:
#  # Liest alle JSON-Dateien aus einem angegebenen Ordner ein und wandelt sie in CSV-Dateien um
#  # Ordnerpfad zu den JSON-Dateien
# json_folder = 'data/Json_Dateien/'

# # Alle JSON-Dateien im Ordner einlesen und in CSV umwandeln
# for filename in os.listdir(json_folder):
#     if filename.endswith('.json'):
#         json_file = os.path.join(json_folder, filename)  # Vollständiger Pfad zur JSON-Datei
#         data = pd.read_json(json_file)

#         # CSV-Dateiname erstellen
#         csv_file = os.path.join(json_folder, filename.replace('.json', '.csv'))  # Pfad zur gewünschten CSV-Datei
#         data.to_csv(csv_file, index=False)

#         print(f'Die JSON-Datei {filename} wurde erfolgreich in {csv_file} umgewandelt.')

In [53]:
# Ordnerpfad zu den CSV-Dateien
csv_folder = 'data/Json_Dateien'

# Leere Liste zum Speichern der DataFrames
dataframes = []

# Alle CSV-Dateien im Ordner einlesen
for filename in os.listdir(csv_folder):
    if filename.endswith('.csv'):
        csv_file = os.path.join(csv_folder, filename)  # Vollständiger Pfad zur CSV-Datei
        df = pd.read_csv(csv_file)  # Einlesen der CSV-Datei
        dataframes.append(df)  # Hinzufügen des DataFrames zur Liste

# Alle DataFrames zu einem einzigen DataFrame zusammenführen
df = pd.concat(dataframes, ignore_index=True)

In [54]:
df.columns

Index(['id', 'dataset', 'mrid', 'revisionNumber', 'publishTime', 'createdTime',
       'messageType', 'messageHeading', 'eventType', 'unavailabilityType',
       'participantId', 'registrationCode', 'assetId', 'assetType',
       'affectedUnit', 'affectedUnitEIC', 'affectedArea', 'biddingZone',
       'fuelType', 'normalCapacity', 'availableCapacity',
       'unavailableCapacity', 'eventStatus', 'eventStartTime', 'eventEndTime',
       'cause', 'relatedInformation', 'outageProfile'],
      dtype='object')

In [55]:
df.head()

Unnamed: 0,id,dataset,mrid,revisionNumber,publishTime,createdTime,messageType,messageHeading,eventType,unavailabilityType,...,fuelType,normalCapacity,availableCapacity,unavailableCapacity,eventStatus,eventStartTime,eventEndTime,cause,relatedInformation,outageProfile
0,60924,REMIT,11XDONG-PT-----2-NGET-RMT-00001020,2,2020-11-26T13:54:00Z,2020-11-26T13:54:00Z,UnavailabilitiesOfElectricityFacilities,REMIT Information,Production unavailability,Planned,...,Wind Offshore,400,0,400,Active,2020-11-26T09:00:00Z,2020-11-26T18:00:00Z,Planned Outage,HOW01 Z12 Dry run interlink test,"[{'startTime': '2020-11-26T09:00:00Z', 'endTim..."
1,185054,REMIT,11XDONG-PT-----2-NGET-RMT-00001024,2,2020-12-10T16:12:00Z,2020-12-10T16:12:00Z,UnavailabilitiesOfElectricityFacilities,REMIT Information,Production unavailability,Planned,...,Wind Offshore,400,0,400,Active,2020-12-11T06:00:00Z,2020-12-11T16:00:00Z,Planned Outage,HOW01 BMU 2 will be undergoing frequency respo...,"[{'startTime': '2020-12-11T06:00:00Z', 'endTim..."
2,188013,REMIT,11XDONG-PT-----2-NGET-RMT-00001026,1,2020-12-13T19:13:00Z,2020-12-13T19:13:00Z,UnavailabilitiesOfElectricityFacilities,REMIT Information,Production unavailability,Planned,...,Wind Offshore,400,0,400,Active,2020-12-14T07:30:00Z,2020-12-14T09:30:00Z,Planned Outage,HOW01 BMU 2 will be undergoing frequency respo...,"[{'startTime': '2020-12-14T07:30:00Z', 'endTim..."
3,189243,REMIT,11XDONG-PT-----2-NGET-RMT-00001024,1,2020-12-07T21:19:00Z,2020-12-07T21:19:00Z,UnavailabilitiesOfElectricityFacilities,REMIT Information,Production unavailability,Planned,...,Wind Offshore,400,0,400,Active,2020-12-11T08:00:00Z,2020-12-11T18:00:00Z,Planned Outage,HOW01 BMU 2 will be undergoing frequency respo...,"[{'startTime': '2020-12-11T08:00:00Z', 'endTim..."
4,189474,REMIT,11XDONG-PT-----2-NGET-RMT-00001026,2,2020-12-14T09:29:00Z,2020-12-14T09:29:00Z,UnavailabilitiesOfElectricityFacilities,REMIT Information,Production unavailability,Planned,...,Wind Offshore,400,0,400,Active,2020-12-14T07:30:00Z,2020-12-14T10:30:00Z,Planned Outage,HOW01 BMU 2 will be undergoing frequency respo...,"[{'startTime': '2020-12-14T07:30:00Z', 'endTim..."


In [56]:
# Konvertiere die Spalten 'publishTime' und 'createdTime' in Datetime-Objekte
df['publishTime'] = pd.to_datetime(df['publishTime'])
df['createdTime'] = pd.to_datetime(df['createdTime'])

# Versuche, 'eventStatus' und 'eventStartTime' in Datetime-Objekte zu konvertieren; ungültige Werte werden in NaT umgewandelt
df['eventStatus'] = pd.to_datetime(df['eventStatus'], errors='coerce')
df['eventStartTime'] = pd.to_datetime(df['eventStartTime'], errors='coerce')
df['eventEndTime'] = pd.to_datetime(df['eventEndTime'], errors='coerce')


df['eventTime'] = df['eventEndTime'] - df['eventStartTime']


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



In [57]:
df.dtypes

id                                   int64
dataset                             object
mrid                                object
revisionNumber                       int64
publishTime            datetime64[ns, UTC]
createdTime            datetime64[ns, UTC]
messageType                         object
messageHeading                      object
eventType                           object
unavailabilityType                  object
participantId                       object
registrationCode                    object
assetId                             object
assetType                           object
affectedUnit                        object
affectedUnitEIC                     object
affectedArea                        object
biddingZone                         object
fuelType                            object
normalCapacity                       int64
availableCapacity                    int64
unavailableCapacity                  int64
eventStatus                 datetime64[ns]
eventStartT

In [58]:
df.isnull().sum()

id                       0
dataset                  0
mrid                     0
revisionNumber           0
publishTime              0
createdTime              0
messageType              0
messageHeading           0
eventType                0
unavailabilityType       0
participantId            0
registrationCode         0
assetId                  0
assetType                0
affectedUnit             0
affectedUnitEIC          0
affectedArea             0
biddingZone              0
fuelType                 0
normalCapacity           0
availableCapacity        0
unavailableCapacity      0
eventStatus            436
eventStartTime           0
eventEndTime             0
cause                    0
relatedInformation     316
outageProfile           50
eventTime                0
dtype: int64

In [59]:
df.dtypes

id                                   int64
dataset                             object
mrid                                object
revisionNumber                       int64
publishTime            datetime64[ns, UTC]
createdTime            datetime64[ns, UTC]
messageType                         object
messageHeading                      object
eventType                           object
unavailabilityType                  object
participantId                       object
registrationCode                    object
assetId                             object
assetType                           object
affectedUnit                        object
affectedUnitEIC                     object
affectedArea                        object
biddingZone                         object
fuelType                            object
normalCapacity                       int64
availableCapacity                    int64
unavailableCapacity                  int64
eventStatus                 datetime64[ns]
eventStartT

In [60]:
start_date = '2020-09-20'
end_date = '2024-05-24'

df['publishTime'] = pd.to_datetime(df['publishTime'])

df = df[(df['publishTime'] >= start_date) & (df['publishTime'] <= end_date)]

In [61]:
df.shape

(269, 29)

In [62]:
# Funktion zum Extrahieren der Werte aus dem Dictionary
def extract_values(outage_profile):
    # Überprüfen, ob der Wert in der Spalte gültig ist
    if isinstance(outage_profile, str):
        # Suchen nach allen startTime, endTime und capacity Werten
        start_times = re.findall(r"'startTime': '(.*?)'", outage_profile)
        end_times = re.findall(r"'endTime': '(.*?)'", outage_profile)
        capacities = re.findall(r"'capacity': (\d+)", outage_profile)
        
        # Erstellen eines Dictionaries mit den Werten in nebeneinander liegenden Spalten
        extracted_data = {}
        for i in range(len(start_times)):
            extracted_data[f'startTime_{i+1}'] = start_times[i]
            extracted_data[f'endTime_{i+1}'] = end_times[i]
            extracted_data[f'capacity_{i+1}'] = capacities[i]

        return extracted_data
    else:
        return {}

# Extrahieren der Werte aus der 'outageProfile'-Spalte
extracted_values = df['outageProfile'].apply(extract_values)

# Erstellen eines neuen DataFrames aus den extrahierten Werten
extracted_df = pd.DataFrame(extracted_values.tolist())

# Umwandlung der Spalten in den entsprechenden Datentyp
for col in extracted_df.columns:
    if 'startTime' in col or 'endTime' in col:
        extracted_df[col] = pd.to_datetime(extracted_df[col], errors='coerce')  # Fehlerhafte Einträge werden zu NaT
    elif 'capacity' in col:
        extracted_df[col] = extracted_df[col].astype(float)  # Oder int, je nach Bedarf

display(extracted_df.head(2))


Unnamed: 0,startTime_1,endTime_1,capacity_1,startTime_2,endTime_2,capacity_2,startTime_3,endTime_3,capacity_3,startTime_4,...,capacity_9,startTime_10,endTime_10,capacity_10,startTime_11,endTime_11,capacity_11,startTime_12,endTime_12,capacity_12
0,2020-11-26 09:00:00+00:00,2020-11-26 18:00:00+00:00,0.0,NaT,NaT,,NaT,NaT,,NaT,...,,NaT,NaT,,NaT,NaT,,NaT,NaT,
1,2020-12-11 06:00:00+00:00,2020-12-11 16:00:00+00:00,0.0,NaT,NaT,,NaT,NaT,,NaT,...,,NaT,NaT,,NaT,NaT,,NaT,NaT,


In [63]:
# Gruppiere den DataFrame 'df' nach 'publishTime' und zähle die Anzahl der 'id'-Einträge in jeder Gruppe
df_publishTime = df.groupby(['publishTime'])['id'].count()

# Gebe die Liste der 'publishTime'-Indizes aus
print(df_publishTime.index.tolist())

# Sortiere die Ergebnisse in absteigender Reihenfolge
df_publishTime.sort_values(ascending=False)


[Timestamp('2020-09-21 11:42:00+0000', tz='UTC'), Timestamp('2020-09-23 07:56:00+0000', tz='UTC'), Timestamp('2020-09-23 12:12:00+0000', tz='UTC'), Timestamp('2020-09-23 13:33:00+0000', tz='UTC'), Timestamp('2020-09-24 07:35:00+0000', tz='UTC'), Timestamp('2020-09-24 08:03:00+0000', tz='UTC'), Timestamp('2020-09-28 09:13:00+0000', tz='UTC'), Timestamp('2020-09-28 10:31:00+0000', tz='UTC'), Timestamp('2020-09-29 13:02:00+0000', tz='UTC'), Timestamp('2020-09-29 16:39:00+0000', tz='UTC'), Timestamp('2020-09-29 17:34:00+0000', tz='UTC'), Timestamp('2020-09-30 09:57:00+0000', tz='UTC'), Timestamp('2020-10-19 11:31:00+0000', tz='UTC'), Timestamp('2020-10-20 09:00:00+0000', tz='UTC'), Timestamp('2020-10-22 16:04:00+0000', tz='UTC'), Timestamp('2020-10-27 15:50:00+0000', tz='UTC'), Timestamp('2020-10-30 11:25:00+0000', tz='UTC'), Timestamp('2020-10-30 11:27:00+0000', tz='UTC'), Timestamp('2020-10-30 11:28:00+0000', tz='UTC'), Timestamp('2020-10-30 11:29:00+0000', tz='UTC'), Timestamp('2020-10-

publishTime
2020-10-30 13:41:00+00:00    3
2023-07-17 10:42:04+00:00    3
2024-01-30 09:22:01+00:00    3
2020-10-30 17:03:00+00:00    2
2021-03-20 12:29:00+00:00    2
                            ..
2024-05-03 07:06:33+00:00    1
2024-05-04 16:00:36+00:00    1
2024-05-20 15:01:07+00:00    1
2024-05-21 08:43:01+00:00    1
2024-05-23 09:13:08+00:00    1
Name: id, Length: 248, dtype: int64

In [64]:
# Jede ID kommt nur einmal im Datensatz vor

# Gruppiere den DataFrame 'df' nach 'id' und zähle die Anzahl der 'dataset'-Einträge in jeder Gruppe
df_id = df.groupby(['id'])['dataset'].count()

# Sortiere die Ergebnisse in aufsteigender Reihenfolge
df_id.sort_values()

id
741704    1
741692    1
736095    1
736094    1
734706    1
         ..
742073    1
750547    1
750598    1
750780    1
17114     1
Name: dataset, Length: 269, dtype: int64

In [65]:
# In dieser Spalte ist nur  REMIT

# Gruppiere nach 'dataset' und zähle 'id'
df_dataset = df.groupby(['dataset'])['id'].count()
df_dataset

dataset
REMIT    269
Name: id, dtype: int64

In [66]:
# Gruppiere nach 'mrid' und zähle 'id'
df_mrid = df.groupby(['mrid'])['id'].count()
df_mrid

mrid
11XDONG-PT-----2-NGET-RMT-00000972    3
11XDONG-PT-----2-NGET-RMT-00000978    2
11XDONG-PT-----2-NGET-RMT-00000980    7
11XDONG-PT-----2-NGET-RMT-00000984    2
11XDONG-PT-----2-NGET-RMT-00000986    1
                                     ..
11XDONG-PT-----2-NGET-RMT-00001806    9
11XDONG-PT-----2-NGET-RMT-00001818    3
11XDONG-PT-----2-NGET-RMT-00001820    3
11XDONG-PT-----2-NGET-RMT-00001870    6
11XDONG-PT-----2-NGET-RMT-00001879    1
Name: id, Length: 90, dtype: int64

In [67]:
# Gruppiere nach 'revisionNumber' und zähle 'id'
df_revisionNumber = df.groupby(['revisionNumber'])['id'].count()

# Liste der eindeutigen 'revisionNumber'-Werte ausgibt
print(df_revisionNumber.index.tolist())
df_revisionNumber

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]


revisionNumber
1     90
2     65
3     32
4     22
5     13
6      9
7      7
8      5
9      5
10     3
11     3
12     3
13     3
14     2
15     2
16     2
17     2
18     1
Name: id, dtype: int64

In [68]:
# Gruppiere nach 'publishTime' und zähle 'id'
df_publishTime = df.groupby(['publishTime'])['id'].count()

# Konvertiere den Index von 'df_publishTime' in eine Liste und gib sie aus
print(df_publishTime.index.tolist())

# Sortiere die gezählten Werte absteigend nach der Anzahl der 'id'
df_publishTime.sort_values(ascending=False)


[Timestamp('2020-09-21 11:42:00+0000', tz='UTC'), Timestamp('2020-09-23 07:56:00+0000', tz='UTC'), Timestamp('2020-09-23 12:12:00+0000', tz='UTC'), Timestamp('2020-09-23 13:33:00+0000', tz='UTC'), Timestamp('2020-09-24 07:35:00+0000', tz='UTC'), Timestamp('2020-09-24 08:03:00+0000', tz='UTC'), Timestamp('2020-09-28 09:13:00+0000', tz='UTC'), Timestamp('2020-09-28 10:31:00+0000', tz='UTC'), Timestamp('2020-09-29 13:02:00+0000', tz='UTC'), Timestamp('2020-09-29 16:39:00+0000', tz='UTC'), Timestamp('2020-09-29 17:34:00+0000', tz='UTC'), Timestamp('2020-09-30 09:57:00+0000', tz='UTC'), Timestamp('2020-10-19 11:31:00+0000', tz='UTC'), Timestamp('2020-10-20 09:00:00+0000', tz='UTC'), Timestamp('2020-10-22 16:04:00+0000', tz='UTC'), Timestamp('2020-10-27 15:50:00+0000', tz='UTC'), Timestamp('2020-10-30 11:25:00+0000', tz='UTC'), Timestamp('2020-10-30 11:27:00+0000', tz='UTC'), Timestamp('2020-10-30 11:28:00+0000', tz='UTC'), Timestamp('2020-10-30 11:29:00+0000', tz='UTC'), Timestamp('2020-10-

publishTime
2020-10-30 13:41:00+00:00    3
2023-07-17 10:42:04+00:00    3
2024-01-30 09:22:01+00:00    3
2020-10-30 17:03:00+00:00    2
2021-03-20 12:29:00+00:00    2
                            ..
2024-05-03 07:06:33+00:00    1
2024-05-04 16:00:36+00:00    1
2024-05-20 15:01:07+00:00    1
2024-05-21 08:43:01+00:00    1
2024-05-23 09:13:08+00:00    1
Name: id, Length: 248, dtype: int64

In [69]:
# Gruppiere nach 'createdTime' und zähle 'id' pro Gruppe
df_createdTime = df.groupby(['createdTime'])['id'].count()

# Konvertiere den Index von 'createdTime' in eine Liste und gib sie aus
print(df_createdTime.index.tolist())

# Sortiere die Gruppen basierend auf der Anzahl der 'id' absteigend
df_createdTime.sort_values(ascending=False)


[Timestamp('2020-09-21 11:42:00+0000', tz='UTC'), Timestamp('2020-09-23 07:56:00+0000', tz='UTC'), Timestamp('2020-09-23 12:12:00+0000', tz='UTC'), Timestamp('2020-09-23 13:33:00+0000', tz='UTC'), Timestamp('2020-09-24 07:35:00+0000', tz='UTC'), Timestamp('2020-09-24 08:03:00+0000', tz='UTC'), Timestamp('2020-09-28 09:13:00+0000', tz='UTC'), Timestamp('2020-09-28 10:31:00+0000', tz='UTC'), Timestamp('2020-09-29 13:02:00+0000', tz='UTC'), Timestamp('2020-09-29 16:39:00+0000', tz='UTC'), Timestamp('2020-09-29 17:34:00+0000', tz='UTC'), Timestamp('2020-09-30 09:57:00+0000', tz='UTC'), Timestamp('2020-10-19 11:31:00+0000', tz='UTC'), Timestamp('2020-10-20 09:00:00+0000', tz='UTC'), Timestamp('2020-10-22 16:04:00+0000', tz='UTC'), Timestamp('2020-10-27 15:50:00+0000', tz='UTC'), Timestamp('2020-10-30 11:25:00+0000', tz='UTC'), Timestamp('2020-10-30 11:27:00+0000', tz='UTC'), Timestamp('2020-10-30 11:28:00+0000', tz='UTC'), Timestamp('2020-10-30 11:29:00+0000', tz='UTC'), Timestamp('2020-10-

createdTime
2020-10-30 13:41:00+00:00    3
2020-10-30 23:41:00+00:00    2
2020-10-30 17:03:00+00:00    2
2020-10-30 23:50:00+00:00    2
2021-03-20 12:29:00+00:00    2
                            ..
2020-10-30 11:25:00+00:00    1
2020-10-30 11:27:00+00:00    1
2020-10-30 11:28:00+00:00    1
2020-10-30 11:29:00+00:00    1
2024-02-23 19:29:00+00:00    1
Name: id, Length: 261, dtype: int64

In [70]:
df['publishTime'] = pd.to_datetime(df['publishTime'])
df['createdTime'] = pd.to_datetime(df['createdTime'])


häufigkeit_publishTime = df['publishTime'].value_counts().sort_index()
häufigkeit_createdTime = df['createdTime'].value_counts().sort_index()

fig = go.Figure()


traces = [
    go.Scatter(
        x=häufigkeit_publishTime.index,
        y=häufigkeit_publishTime.values,
        mode='markers',
        name='publishTime',
        marker=dict(color='blue')  
    ),
    go.Scatter(
        x=häufigkeit_createdTime.index,
        y=häufigkeit_createdTime.values,
        mode='markers',
        name='createdTime',
        marker=dict(color='red')  
    )
]

traces.sort(key=lambda trace: trace.name)

for trace in traces:
    fig.add_trace(trace)

fig.update_layout(
    title='CreatedTime vs publishTime',
    xaxis_title='Datum',
    yaxis_title='Häufigkeit',
    xaxis=dict(
        tickformat='%Y-%m-%d %H:%M:%S',
        tickangle=45,
        rangeslider=dict(visible=True)
    ),
    hovermode='x unified'
)

fig.show()

In [71]:
# Gruppiere nach 'publishTime' und zähle 'id' pro Gruppe
df_publishTime = df.groupby(['publishTime'])['id'].count()

# Konvertiere den Index von 'publishTime' in eine Liste und gib sie aus
print(df_publishTime.index.tolist())

# Ergebnis nach der Anzahl der 'publishTime' absteigend sortieren
df_publishTime.sort_values(ascending=False)


[Timestamp('2020-09-21 11:42:00+0000', tz='UTC'), Timestamp('2020-09-23 07:56:00+0000', tz='UTC'), Timestamp('2020-09-23 12:12:00+0000', tz='UTC'), Timestamp('2020-09-23 13:33:00+0000', tz='UTC'), Timestamp('2020-09-24 07:35:00+0000', tz='UTC'), Timestamp('2020-09-24 08:03:00+0000', tz='UTC'), Timestamp('2020-09-28 09:13:00+0000', tz='UTC'), Timestamp('2020-09-28 10:31:00+0000', tz='UTC'), Timestamp('2020-09-29 13:02:00+0000', tz='UTC'), Timestamp('2020-09-29 16:39:00+0000', tz='UTC'), Timestamp('2020-09-29 17:34:00+0000', tz='UTC'), Timestamp('2020-09-30 09:57:00+0000', tz='UTC'), Timestamp('2020-10-19 11:31:00+0000', tz='UTC'), Timestamp('2020-10-20 09:00:00+0000', tz='UTC'), Timestamp('2020-10-22 16:04:00+0000', tz='UTC'), Timestamp('2020-10-27 15:50:00+0000', tz='UTC'), Timestamp('2020-10-30 11:25:00+0000', tz='UTC'), Timestamp('2020-10-30 11:27:00+0000', tz='UTC'), Timestamp('2020-10-30 11:28:00+0000', tz='UTC'), Timestamp('2020-10-30 11:29:00+0000', tz='UTC'), Timestamp('2020-10-

publishTime
2020-10-30 13:41:00+00:00    3
2023-07-17 10:42:04+00:00    3
2024-01-30 09:22:01+00:00    3
2020-10-30 17:03:00+00:00    2
2021-03-20 12:29:00+00:00    2
                            ..
2024-05-03 07:06:33+00:00    1
2024-05-04 16:00:36+00:00    1
2024-05-20 15:01:07+00:00    1
2024-05-21 08:43:01+00:00    1
2024-05-23 09:13:08+00:00    1
Name: id, Length: 248, dtype: int64

In [72]:
# In dieser Spalte ist nur UnavailabilitiesOfElectricityFacilities

# Gruppiere nach 'messageType' und zähle 'id' pro Gruppe
df_messageType = df.groupby(['messageType'])['id'].count()

# Konvertiere den Index von 'messageType' in eine Liste und gib sie aus
print(df_messageType.index.tolist())

df_messageType

['UnavailabilitiesOfElectricityFacilities']


messageType
UnavailabilitiesOfElectricityFacilities    269
Name: id, dtype: int64

In [73]:
# In dieser Spalte ist nur  messageHeading

# Gruppiere nach 'messageHeading' und zähle 'id' pro Gruppe
df_messageHeading = df.groupby(['messageHeading'])['id'].count()

# Konvertiere den Index von 'messageHeading' in eine Liste und gib sie aus
print(df_messageHeading.index.tolist())

df_messageHeading

['REMIT Information']


messageHeading
REMIT Information    269
Name: id, dtype: int64

In [74]:
# In dieser Spalte ist nur Production unavailability

# Gruppiere nach 'eventType' und zähle 'id' pro Gruppe
df_eventType = df.groupby(['eventType'])['id'].count()

# Konvertiere den Index von 'eventType' in eine Liste und gib sie aus
print(df_eventType.index.tolist())

df_eventType

['Production unavailability']


eventType
Production unavailability    269
Name: id, dtype: int64

In [75]:
# Gruppiere nach 'unavailabilityType' und zähle 'id' pro Gruppe
df_unavailabilityType = df.groupby(['unavailabilityType'])['id'].count()

# Konvertiere den Index von 'unavailabilityType' in eine Liste und gib sie aus
print(df_unavailabilityType.index.tolist())

df_unavailabilityType

['Planned', 'Unplanned']


unavailabilityType
Planned      219
Unplanned     50
Name: id, dtype: int64

In [76]:
df['publishTime'] = pd.to_datetime(df['publishTime']).dt.date

df['counter_1'] = 1


df_aggregated = (
    df.groupby(['publishTime', 'unavailabilityType'], as_index=False)
    .agg({'counter_1': 'sum'})
    .sort_values('unavailabilityType')
)

color_discrete_sequence = ['blue', 'red']
color_discrete_map = {
    'Planned': 'blue',
    'Unplanned': 'red'
}

fig = px.bar(
    df_aggregated,
    x='publishTime',
    y='counter_1',
    color='unavailabilityType',
    title='Verteilung zwischen Planned vs Unplanned Type (Tagesbasis)',
    color_discrete_sequence=color_discrete_sequence,
    color_discrete_map=color_discrete_map
)

fig.update_traces(
    marker_line_width=0,
    selector=dict(type="bar")
)

fig.update_layout(
    xaxis=dict(
        title='Datum',
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis_title='Anzahl',
    title_x=0.5, 
    bargap=0,
    bargroupgap=0  
)

fig.show()

In [77]:
# In dieser Spalte ist nur DONG013

# Gruppiere nach 'participantId' und zähle 'id' pro Gruppe
df_participantId = df.groupby(['participantId'])['id'].count()

# Konvertiere den Index von 'participantId' in eine Liste und gib sie aus
print(df_participantId.index.tolist())

df_participantId

['DONG013']


participantId
DONG013    269
Name: id, dtype: int64

In [78]:
# In dieser Spalte ist nur 11XDONG-PT-----2

# Gruppiere nach 'registrationCode' und zähle 'id' pro Gruppe
df_registrationCode = df.groupby(['registrationCode'])['id'].count()

# Konvertiere den Index von 'registrationCode' in eine Liste und gib sie aus
print(df_registrationCode.index.tolist())

df_registrationCode

['11XDONG-PT-----2']


registrationCode
11XDONG-PT-----2    269
Name: id, dtype: int64

In [79]:
# Gruppiere nach 'assetId' und zähle 'id' pro Gruppe
df_assetId = df.groupby(['assetId'])['id'].count()

# Konvertiere den Index von 'assetId' in eine Liste und gib sie aus
print(df_assetId.index.tolist())
df_assetId


['T_HOWAO-1', 'T_HOWAO-2', 'T_HOWAO-3']


assetId
T_HOWAO-1    109
T_HOWAO-2     84
T_HOWAO-3     76
Name: id, dtype: int64

In [80]:
df['publishTime'] = pd.to_datetime(df['publishTime']).dt.date

df_aggregated = (
    df.groupby(['publishTime', 'assetId'], as_index=False)
    .agg({'counter_1': 'sum'})
    .sort_values('assetId')
)


color_discrete_sequence = ['blue', 'orange','green'] 

color_discrete_map = {
    'T_HOWAO-1': 'blue',
    'T_HOWAO-2': 'orange',
    'T_HOWAO-3': 'green', 
}

fig = px.bar(
    df_aggregated,
    x='publishTime',
    y='counter_1',
    color='assetId',
    color_discrete_sequence=color_discrete_sequence,  
    color_discrete_map=color_discrete_map,  
    title='Ausfälle der verschiedenen Windanlagen (Tagesbasis)'
)

fig.update_traces(
    marker_line_width=0,
    selector=dict(type="bar")
)

fig.update_layout(
    xaxis=dict(
        title='Datum',
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis_title='Anzahl',
    title_x=0.5,  
    bargap=0,  
    bargroupgap=0  
)

fig.show()


In [81]:
# Gruppiere nach 'publishTime' und zähle 'id' pro Gruppe
df_publishTime = df.groupby(['publishTime'])['id'].count()

# Konvertiere den Index von 'publishTime' in eine Liste und gib sie aus
print(df_publishTime.index.tolist())

# Ergebnis nach der Anzahl der 'publishTime' absteigend sortieren
df_publishTime.sort_values(ascending=False)


[datetime.date(2020, 9, 21), datetime.date(2020, 9, 23), datetime.date(2020, 9, 24), datetime.date(2020, 9, 28), datetime.date(2020, 9, 29), datetime.date(2020, 9, 30), datetime.date(2020, 10, 19), datetime.date(2020, 10, 20), datetime.date(2020, 10, 22), datetime.date(2020, 10, 27), datetime.date(2020, 10, 30), datetime.date(2020, 10, 31), datetime.date(2020, 11, 16), datetime.date(2020, 11, 17), datetime.date(2020, 11, 19), datetime.date(2020, 11, 20), datetime.date(2020, 11, 24), datetime.date(2020, 11, 26), datetime.date(2020, 12, 7), datetime.date(2020, 12, 10), datetime.date(2020, 12, 13), datetime.date(2020, 12, 14), datetime.date(2020, 12, 22), datetime.date(2021, 1, 1), datetime.date(2021, 1, 2), datetime.date(2021, 1, 5), datetime.date(2021, 1, 6), datetime.date(2021, 1, 8), datetime.date(2021, 1, 9), datetime.date(2021, 1, 10), datetime.date(2021, 1, 13), datetime.date(2021, 1, 18), datetime.date(2021, 1, 19), datetime.date(2021, 1, 20), datetime.date(2021, 1, 21), datetime.

publishTime
2020-10-30    17
2021-03-20    11
2023-04-24     9
2021-01-10     7
2024-01-23     7
              ..
2024-01-22     1
2024-02-21     1
2024-05-20     1
2024-05-21     1
2024-05-23     1
Name: id, Length: 112, dtype: int64

In [82]:
# In dieser Spalte ist nur Production

# Gruppiere nach 'assetType' und zähle 'id' pro Gruppe
df_assetType = df.groupby(['assetType'])['id'].count()

# Konvertiere den Index von 'assetType' in eine Liste und gib sie aus
print(df_assetType.index.tolist())

df_assetType

['Production']


assetType
Production    269
Name: id, dtype: int64

In [83]:
# Gruppiere nach 'affectedUnit' und zähle 'id' pro Gruppe
df_affectedUnit = df.groupby(['affectedUnit'])['id'].count()

# Konvertiere den Index von 'affectedUnit' in eine Liste und gib sie aus
print(df_affectedUnit.index.tolist())

df_affectedUnit

['HOWAO-1', 'HOWAO-2', 'HOWAO-3']


affectedUnit
HOWAO-1    109
HOWAO-2     84
HOWAO-3     76
Name: id, dtype: int64

In [84]:
# Gruppiere nach 'affectedUnitEIC' und zähle 'id' pro Gruppe
df_affectedUnitEIC = df.groupby(['affectedUnitEIC'])['id'].count()

# Konvertiere den Index von 'affectedUnitEIC' in eine Liste und gib sie aus
print(df_affectedUnitEIC.index.tolist())

df_affectedUnitEIC

['48W00000HOWAO-1M', '48W00000HOWAO-2K', '48W00000HOWAO-3I']


affectedUnitEIC
48W00000HOWAO-1M    109
48W00000HOWAO-2K     84
48W00000HOWAO-3I     76
Name: id, dtype: int64

In [85]:
# Ist dieser Spalte ist nur B7

# Gruppiere nach 'affectedArea' und zähle 'id' pro Gruppe
df_affectedArea = df.groupby(['affectedArea'])['id'].count()

# Konvertiere den Index von 'affectedArea' in eine Liste und gib sie aus
print(df_affectedArea.index.tolist())

df_affectedArea

['B7']


affectedArea
B7    269
Name: id, dtype: int64

In [86]:
# Ist dieser Spalte ist nur 10YGB----------A

# Gruppiere nach 'biddingZone' und zähle 'id' pro Gruppe
df_biddingZone = df.groupby(['biddingZone'])['id'].count()

# Liste der eindeutigen 'biddingZone'-Werte ausgeben
print(df_biddingZone.index.tolist())

df_biddingZone

['10YGB----------A']


biddingZone
10YGB----------A    269
Name: id, dtype: int64

In [87]:
# Ist dieser Spalte ist nur Wind Offshore

# Gruppiere nach 'fuelType' und zähle 'id' pro Gruppe
df_fuelType = df.groupby(['fuelType'])['id'].count()

# Liste der eindeutigen 'fuelType'-Werte ausgeben
print(df_fuelType.index.tolist())

df_fuelType

['Wind Offshore']


fuelType
Wind Offshore    269
Name: id, dtype: int64

In [88]:
# Gruppiere nach 'normalCapacity' und zähle 'id' pro Gruppe
df_normalCapacity = df.groupby(['normalCapacity'])['id'].count()

# Liste der eindeutigen 'normalCapacity'-Werte ausgeben
print(df_normalCapacity.index.tolist())

df_normalCapacity

[400]


normalCapacity
400    269
Name: id, dtype: int64

In [89]:
# Gruppiere nach 'availableCapacity' und zähle 'id' pro Gruppe
df_availableCapacity = df.groupby(['availableCapacity','assetId'])[['id']].count().reset_index()

# Liste der eindeutigen 'availableCapacity'-Werte ausgeben
print(df_availableCapacity.index.tolist())

df_availableCapacity

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]


Unnamed: 0,availableCapacity,assetId,id
0,0,T_HOWAO-1,37
1,0,T_HOWAO-2,43
2,0,T_HOWAO-3,49
3,75,T_HOWAO-3,11
4,100,T_HOWAO-2,3
5,140,T_HOWAO-1,3
6,150,T_HOWAO-1,11
7,150,T_HOWAO-3,3
8,173,T_HOWAO-1,1
9,173,T_HOWAO-3,1


In [90]:
# Gruppiere nach 'availableCapacity' und zähle 'id' pro Gruppe
df_availableCapacity = df.groupby(['unavailableCapacity','assetId'])[['id']].count().reset_index()

# Liste der eindeutigen 'availableCapacity'-Werte ausgeben
print(df_availableCapacity.index.tolist())

df_availableCapacity

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]


Unnamed: 0,unavailableCapacity,assetId,id
0,100,T_HOWAO-1,1
1,106,T_HOWAO-1,9
2,106,T_HOWAO-2,5
3,106,T_HOWAO-3,1
4,113,T_HOWAO-1,7
5,120,T_HOWAO-1,5
6,120,T_HOWAO-2,2
7,127,T_HOWAO-1,9
8,127,T_HOWAO-3,2
9,134,T_HOWAO-1,13


In [91]:
# Gruppiere nach 'availableCapacity' und zähle 'id' pro Gruppe
df_availableCapacity = df.groupby(['normalCapacity'])[['id']].count().reset_index()

# Liste der eindeutigen 'availableCapacity'-Werte ausgeben
print(df_availableCapacity.index.tolist())

df_availableCapacity

[0]


Unnamed: 0,normalCapacity,id
0,400,269


In [92]:
df_e = pd.read_csv(r'C:\Users\Michael Jäckle\Desktop\DoPro2_akt\DoPro2\reasearch\Energy_Data_20200920_20231027.csv')
df_e.head()

Unnamed: 0,dtm,MIP,Solar_MW,Solar_capacity_mwp,Solar_installedcapacity_mwp,Wind_MW,SS_Price,boa_MWh,DA_Price
0,2020-09-20T00:00:00Z,20.06,0.0,2108.489754,2206.064655,996.284,2.5,0.0,32.17
1,2020-09-20T00:30:00Z,19.77,0.0,2108.489754,2206.064655,957.576,15.0,0.0,32.17
2,2020-09-20T01:00:00Z,28.68,0.0,2108.489754,2206.064655,941.044,47.95,0.0,32.0
3,2020-09-20T01:30:00Z,28.97,0.0,2108.489754,2206.064655,964.366,29.13,0.0,32.0
4,2020-09-20T02:00:00Z,28.19,0.0,2108.489754,2206.064655,918.432,28.95,0.0,31.99


In [93]:
df.groupby(['availableCapacity','assetId']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,id,dataset,mrid,revisionNumber,publishTime,createdTime,messageType,messageHeading,eventType,unavailabilityType,...,normalCapacity,unavailableCapacity,eventStatus,eventStartTime,eventEndTime,cause,relatedInformation,outageProfile,eventTime,counter_1
availableCapacity,assetId,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,T_HOWAO-1,37,37,37,37,37,37,37,37,37,37,...,37,37,0,37,37,37,14,37,37,37
0,T_HOWAO-2,43,43,43,43,43,43,43,43,43,43,...,43,43,0,43,43,43,28,43,43,43
0,T_HOWAO-3,49,49,49,49,49,49,49,49,49,49,...,49,49,0,49,49,49,15,49,49,49
75,T_HOWAO-3,11,11,11,11,11,11,11,11,11,11,...,11,11,0,11,11,11,0,11,11,11
100,T_HOWAO-2,3,3,3,3,3,3,3,3,3,3,...,3,3,0,3,3,3,0,3,3,3
140,T_HOWAO-1,3,3,3,3,3,3,3,3,3,3,...,3,3,0,3,3,3,0,3,3,3
150,T_HOWAO-1,11,11,11,11,11,11,11,11,11,11,...,11,11,0,11,11,11,6,11,11,11
150,T_HOWAO-3,3,3,3,3,3,3,3,3,3,3,...,3,3,0,3,3,3,3,3,3,3
173,T_HOWAO-1,1,1,1,1,1,1,1,1,1,1,...,1,1,0,1,1,1,0,1,1,1
173,T_HOWAO-3,1,1,1,1,1,1,1,1,1,1,...,1,1,0,1,1,1,0,1,1,1


In [94]:
df['createdTime'] = pd.to_datetime(df['createdTime'])

color_discrete_map = {
    'T_HOWAO-1': 'blue',
    'T_HOWAO-2': 'orange',
    'T_HOWAO-3': 'green',
}

traces = []
for asset_id, group in df.groupby('assetId'):
    traces.append(
        go.Scatter(
            x=group['createdTime'],
            y=group['availableCapacity'],
            mode='markers',
            name=asset_id,
            marker=dict(color=color_discrete_map[asset_id])  # Farben zuweisen
        )
    )

traces.sort(key=lambda trace: trace.name)

fig = go.Figure()

for trace in traces:
    fig.add_trace(trace)

fig.update_layout(
    title='Die verfügbare Kapazität der einzelnen Hornsea-Anlagen',
    xaxis_title='Datum',
    yaxis_title='Verfügbare Kapazität',
    title_x=0.5  
)

fig.show()

In [95]:
df['publishTime'] = pd.to_datetime(df['publishTime']).dt.date

df_filter = df.filter(['publishTime', 'availableCapacity','counter_1'])

df_aggregated = df_filter.groupby(['publishTime', 'availableCapacity'], as_index=False).agg({'counter_1': 'count'})

available_capacities_sorted = sorted(df_aggregated['availableCapacity'].unique())

fig = go.Figure()

dropdown_buttons = []

for i, available_capacity in enumerate(available_capacities_sorted):
    filtered_df = df_aggregated[df_aggregated['availableCapacity'] == available_capacity]
    
    fig.add_trace(go.Bar(
        x=filtered_df['publishTime'],
        y=filtered_df['counter_1'],
        name=f'availableCapacity: {available_capacity}',
        visible=False 
    ))
    
    dropdown_buttons.append({
        'label': f'availableCapacity: {available_capacity}',
        'method': 'update',
        'args': [{'visible': [i == j for j in range(len(available_capacities_sorted))]}]  
    })

dropdown_buttons.insert(0, {
    'label': 'Alle Kapazitäten',
    'method': 'update',
    'args': [{'visible': [True] * len(available_capacities_sorted)}]
})

fig.data[0].visible = True

fig.update_layout(
    title='availableCapacity',
    xaxis=dict(
        title='Datum',
        rangeslider=dict(visible=True),
        type='date',
        rangeselector=dict(
            buttons=list([
                dict(count=7, label='Woche', step='day', stepmode='backward'),
                dict(count=1, label='Monat', step='month', stepmode='backward'),
                dict(count=3, label='Quartal', step='month', stepmode='backward'),  
                dict(count=1, label='Jahr', step='year', stepmode='backward'),
                dict(step='all')  
            ])
        )
    ),
    yaxis_title='Anzahl',
    yaxis=dict(
        title='Anzahl',
        autorange=False,  
        range=[0, df_aggregated['counter_1'].max() * 1.1]  
    ),
    title_x=0.5, 
    updatemenus=[{
        'buttons': dropdown_buttons,
        'direction': 'down',
        'showactive': True,
        'x': 1.15,  
        'xanchor': 'left',
        'y': 1.15,
        'yanchor': 'top'
    }]
)

fig.show()



In [96]:
# Gruppiere nach 'unavailableCapacity' und zähle 'id' pro Gruppe
df_unavailableCapacity = df.groupby(['unavailableCapacity'])['id'].count()

# Liste der eindeutigen 'unavailableCapacity'-Werte ausgeben
# print(df_unavailableCapacity.index.tolist())

df_unavailableCapacity

unavailableCapacity
100      1
106     15
113      7
120      7
127     11
134     13
140      5
148      1
150     12
160      4
200     31
227      2
250     14
260      3
300      3
325     11
400    129
Name: id, dtype: int64

In [97]:
# Gruppiere nach 'eventStartTime' und zähle 'id' pro Gruppe
df_eventStartTime = df.groupby(['eventStartTime'])['id'].count()

# Liste der eindeutigen 'eventStartTime'-Werte ausgeben
print(df_eventStartTime.index.tolist())

# Ergebnis nach der Anzahl der 'eventStartTime' absteigend sortieren
df_eventStartTime.sort_values(ascending=False)


[Timestamp('2020-09-23 09:00:00+0000', tz='UTC'), Timestamp('2020-09-24 09:00:00+0000', tz='UTC'), Timestamp('2020-09-28 09:00:00+0000', tz='UTC'), Timestamp('2020-09-28 10:00:00+0000', tz='UTC'), Timestamp('2020-09-29 09:00:00+0000', tz='UTC'), Timestamp('2020-10-22 07:00:00+0000', tz='UTC'), Timestamp('2020-10-27 09:00:00+0000', tz='UTC'), Timestamp('2020-10-28 11:30:00+0000', tz='UTC'), Timestamp('2020-10-30 11:00:00+0000', tz='UTC'), Timestamp('2020-10-30 23:00:00+0000', tz='UTC'), Timestamp('2020-10-31 00:00:00+0000', tz='UTC'), Timestamp('2020-11-18 07:00:00+0000', tz='UTC'), Timestamp('2020-11-20 07:00:00+0000', tz='UTC'), Timestamp('2020-11-24 08:00:00+0000', tz='UTC'), Timestamp('2020-11-26 09:00:00+0000', tz='UTC'), Timestamp('2020-12-11 06:00:00+0000', tz='UTC'), Timestamp('2020-12-11 08:00:00+0000', tz='UTC'), Timestamp('2020-12-14 07:30:00+0000', tz='UTC'), Timestamp('2020-12-22 02:00:00+0000', tz='UTC'), Timestamp('2021-01-02 07:00:00+0000', tz='UTC'), Timestamp('2021-01-

eventStartTime
2024-01-23 11:20:00+00:00    17
2024-01-19 16:55:00+00:00    16
2020-10-30 11:00:00+00:00    15
2021-03-19 23:00:00+00:00    14
2021-01-24 07:00:00+00:00    13
                             ..
2023-07-28 06:00:00+00:00     1
2023-07-28 07:00:00+00:00     1
2024-05-03 08:00:00+00:00     1
2024-05-04 08:00:00+00:00     1
2024-05-09 08:00:00+00:00     1
Name: id, Length: 75, dtype: int64

In [98]:
# Gruppiere nach 'eventEndTime' und zähle 'id' pro Gruppe
df_eventEndTime = df.groupby(['eventEndTime'])['id'].count()

# Liste der eindeutigen 'eventEndTime'-Werte ausgeben
print(df_eventEndTime.index.tolist())

# Ergebnis nach der Anzahl der 'eventEndTime' absteigend sortieren
df_eventEndTime.sort_values(ascending=False)


[Timestamp('2020-09-23 12:00:00+0000', tz='UTC'), Timestamp('2020-09-23 12:30:00+0000', tz='UTC'), Timestamp('2020-09-23 14:00:00+0000', tz='UTC'), Timestamp('2020-09-24 14:00:00+0000', tz='UTC'), Timestamp('2020-09-28 14:00:00+0000', tz='UTC'), Timestamp('2020-09-29 14:00:00+0000', tz='UTC'), Timestamp('2020-09-29 17:00:00+0000', tz='UTC'), Timestamp('2020-09-29 18:00:00+0000', tz='UTC'), Timestamp('2020-09-30 11:00:00+0000', tz='UTC'), Timestamp('2020-09-30 17:00:00+0000', tz='UTC'), Timestamp('2020-10-22 17:00:00+0000', tz='UTC'), Timestamp('2020-10-27 18:00:00+0000', tz='UTC'), Timestamp('2020-10-28 14:30:00+0000', tz='UTC'), Timestamp('2020-10-30 15:00:00+0000', tz='UTC'), Timestamp('2020-10-30 18:00:00+0000', tz='UTC'), Timestamp('2020-10-30 22:00:00+0000', tz='UTC'), Timestamp('2020-10-30 23:00:00+0000', tz='UTC'), Timestamp('2020-10-31 00:00:00+0000', tz='UTC'), Timestamp('2020-10-31 01:00:00+0000', tz='UTC'), Timestamp('2020-10-31 02:00:00+0000', tz='UTC'), Timestamp('2020-11-

eventEndTime
2024-01-31 16:00:00+00:00    12
2021-01-15 16:00:00+00:00     6
2024-01-27 16:00:00+00:00     5
2024-02-25 12:00:00+00:00     5
2020-10-31 00:00:00+00:00     5
                             ..
2024-05-23 15:00:00+00:00     1
2024-05-24 15:00:00+00:00     1
2024-05-25 15:00:00+00:00     1
2024-05-26 17:00:00+00:00     1
2024-05-26 19:00:00+00:00     1
Name: id, Length: 157, dtype: int64

In [99]:
# Gruppiere nach 'cause' und zähle 'id' pro Gruppe
df_cause= df.groupby(['cause'])['id'].count()

# Liste der eindeutigen 'cause'-Werte ausgeben
print(df_cause.index.tolist())

# Ergebnis nach der Anzahl der 'cause' absteigend sortieren
df_cause.sort_values(ascending=False)

['Forced Outage', 'Planned Outage', 'Turbine', 'Under Investigation']


cause
Planned Outage         216
Under Investigation     49
Turbine                  3
Forced Outage            1
Name: id, dtype: int64

In [100]:
df.groupby(['cause']).agg({ 'eventTime': ['min','median', 'mean', 'max'],})


Unnamed: 0_level_0,eventTime,eventTime,eventTime,eventTime
Unnamed: 0_level_1,min,median,mean,max
cause,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Forced Outage,0 days 01:24:00,0 days 01:24:00,0 days 01:24:00,0 days 01:24:00
Planned Outage,0 days 01:00:00,0 days 20:00:00,11 days 09:48:43.888888888,124 days 05:40:00
Turbine,0 days 04:00:00,0 days 04:00:00,0 days 04:00:00,0 days 04:00:00
Under Investigation,0 days 01:00:00,0 days 11:00:00,5 days 07:44:34.285714285,37 days 06:31:00


In [101]:
df.dtypes

id                                   int64
dataset                             object
mrid                                object
revisionNumber                       int64
publishTime                         object
createdTime            datetime64[ns, UTC]
messageType                         object
messageHeading                      object
eventType                           object
unavailabilityType                  object
participantId                       object
registrationCode                    object
assetId                             object
assetType                           object
affectedUnit                        object
affectedUnitEIC                     object
affectedArea                        object
biddingZone                         object
fuelType                            object
normalCapacity                       int64
availableCapacity                    int64
unavailableCapacity                  int64
eventStatus                 datetime64[ns]
eventStartT

In [102]:
df.groupby(['eventTime','assetId',])[['id']].count().tail(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,id
eventTime,assetId,Unnamed: 2_level_1
5 days 10:00:00,T_HOWAO-2,1
5 days 11:00:00,T_HOWAO-2,1
6 days 11:00:00,T_HOWAO-1,1
7 days 11:00:00,T_HOWAO-1,3
7 days 23:05:00,T_HOWAO-1,1
7 days 23:05:00,T_HOWAO-3,2
8 days 04:40:00,T_HOWAO-1,1
8 days 09:00:00,T_HOWAO-1,2
8 days 10:31:00,T_HOWAO-2,2
8 days 23:15:00,T_HOWAO-3,1


In [103]:
df.groupby(['eventTime','assetId','cause'])[['id']].count().tail(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id
eventTime,assetId,cause,Unnamed: 3_level_1
5 days 10:00:00,T_HOWAO-2,Planned Outage,1
5 days 11:00:00,T_HOWAO-2,Planned Outage,1
6 days 11:00:00,T_HOWAO-1,Planned Outage,1
7 days 11:00:00,T_HOWAO-1,Planned Outage,3
7 days 23:05:00,T_HOWAO-1,Planned Outage,1
7 days 23:05:00,T_HOWAO-3,Planned Outage,2
8 days 04:40:00,T_HOWAO-1,Planned Outage,1
8 days 09:00:00,T_HOWAO-1,Planned Outage,2
8 days 10:31:00,T_HOWAO-2,Under Investigation,2
8 days 23:15:00,T_HOWAO-3,Planned Outage,1


In [104]:
color_map = {
    'Planned Outage': 'green',
    'NonOperator Alert': 'yellow',
    'Under Investigation': 'orange',
    'Forced Outage': 'red',
    'Turbine': 'blue'
}

startpunkte = df['eventStartTime']
endpunkte = df['eventEndTime']
status = df['cause']  

fig = go.Figure()
gezeigte_legenden = {}  
traces = [] 

desired_order = ['Forced Outage', 'NonOperator Alert', 'Planned Outage', 'Turbine', 'Under Investigation']

for stat in desired_order: 
    for i, (start, end, stat) in enumerate(zip(startpunkte, endpunkte, status), start=1):
        color = color_map.get(stat, 'gray') 
        show_legend = stat not in gezeigte_legenden  
        gezeigte_legenden[stat] = True 

        trace = go.Scatter(
            x=[start, end], 
            y=[i, i], 
            mode='lines+markers',
            name=stat if show_legend else '',  
            line=dict(color=color),
            legendgroup=stat,  
            showlegend=show_legend  
        )
        traces.append(trace)  

traces.sort(key=lambda trace: trace.name)

for trace in traces:
    fig.add_trace(trace)

fig.update_layout(
    title='Art der Verursachung und die benötigte Zeit',
    xaxis=dict(
        title='Datum',
        rangeslider=dict(visible=True),  
        type='date'
    ),
    yaxis_title='Anzahl der Meldung',
    title_x=0.5  
)

fig.show()

In [105]:
df.groupby(['assetId']).agg({ 'eventTime': ['min','median', 'mean', 'max'],})

Unnamed: 0_level_0,eventTime,eventTime,eventTime,eventTime
Unnamed: 0_level_1,min,median,mean,max
assetId,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
T_HOWAO-1,0 days 01:00:00,1 days 12:00:00,14 days 07:46:50.091743119,124 days 05:40:00
T_HOWAO-2,0 days 01:00:00,0 days 11:09:30,4 days 09:00:43.571428571,37 days 06:31:00
T_HOWAO-3,0 days 01:00:00,0 days 09:45:00,10 days 11:37:12.631578947,118 days 16:50:00


In [106]:
df_plot_event_time = df

color_map = {
    'T_HOWAO-1': 'green',
    'T_HOWAO-2': 'orange',
    'T_HOWAO-3': 'blue'
}

startpunkte = df_plot_event_time['eventStartTime']
endpunkte = df_plot_event_time['eventEndTime']
status = df_plot_event_time['assetId']
namen = df_plot_event_time['assetId']  

fig = go.Figure()
gezeigte_legenden = {} 

desired_order = ['T_HOWAO-1', 'T_HOWAO-2', 'T_HOWAO-3']

for stat in desired_order:  
    filtered_data = df_plot_event_time[df_plot_event_time['assetId'] == stat]
    
    for i, (start, end) in enumerate(zip(filtered_data['eventStartTime'], filtered_data['eventEndTime']), start=1):
        color = color_map.get(stat, 'gray')  
        show_legend = color not in gezeigte_legenden  
        gezeigte_legenden[color] = True  

        fig.add_trace(go.Scatter(
            x=[start, end],  
            y=[i, i], 
            mode='lines+markers',
            name=stat if show_legend else '',  
            line=dict(color=color),
            legendgroup=stat,  
            showlegend=show_legend  
        ))

fig.update_layout(
    title='Art der Anlage und die benötigte Zeit',
    xaxis=dict(
        title='Datum',
        rangeslider=dict(visible=True),  
        type='date'
    ),
    yaxis_title='Anzahl der Meldung',
    title_x=0.5, 
)

fig.show()


In [None]:
df_t = df
df_t['eventStartTime'] = pd.to_datetime(df_t['eventStartTime'])
df_t['eventEndTime'] = pd.to_datetime(df_t['eventEndTime'])
df_t['zeitspanne'] = df_t['eventEndTime'] - df_t['eventStartTime']
df_t = df_t.sort_values('zeitspanne')
df_t['Count'] = range(1, len(df_t) + 1)


df_t['Count'] = range(1, len(df_t) + 1)
# Umwandlung der 'zeitspanne' in Timedelta
df_t['zeitspanne'] = pd.to_timedelta(df_t['zeitspanne'])

# Umwandlung in Gesamtstunden für den Plot
df_t['stunden'] = df_t['zeitspanne'].dt.total_seconds() / 3600  # Gesamtstunden

# Erstelle das Diagramm
fig = go.Figure(
    data=[go.Bar(y=df_t['stunden'], x=df_t['Count'], text=df_t['Count'], textposition='auto')],
    layout_title_text="Werte über Zeitspanne"
)

# Layout anpassen
fig.update_layout(
    yaxis_title='Zeit in Stunden',
    xaxis_title='Count',
    xaxis=dict(range=[0, max(df_t['Count']) + 1]),  # Anpassen des Y-Achsenbereichs
    yaxis=dict(tickvals=df_t['stunden'], ticktext=[f"{int(hour)}h" for hour in df_t['stunden']], type='log'),
    height=700 
)

# Zeige das Diagramm an
fig.show()


In [None]:

df_t['Count'] = range(1, len(df_t) + 1)

# Umwandlung der 'zeitspanne' in Timedelta
df_t['zeitspanne'] = pd.to_timedelta(df_t['zeitspanne'])

# Umwandlung in Gesamttage für den Plot
df_t['tage'] = df_t['zeitspanne'].dt.total_seconds() / (3600 * 24)  # Gesamtstunden in Tage umrechnen

# Erstelle das Diagramm
fig = go.Figure(
    data=[go.Bar(y=df_t['tage'], x=df_t['Count'], text=df_t['Count'], textposition='auto')],
    layout_title_text="Werte über Zeitspanne"
)

# Layout anpassen
fig.update_layout(
    yaxis_title='Zeit in Tagen',
    xaxis_title='Count',
    xaxis=dict(range=[0, max(df_t['Count']) + 1]),  # Anpassen des Y-Achsenbereichs
    yaxis=dict(tickvals=df_t['tage'], ticktext=[f"{round(day, 2)}d" for day in df_t['tage']],type = 'log'),
    height=800 
)

# Zeige das Diagramm an
fig.show()
