In [4]:
import pandas as pd
import numpy as np

In [5]:
df_forecast = pd.read_parquet("/home2/s5549329/windAI_rug/WindAi/given_datasets/met_forecast.parquet")
df_nowcasting = pd.read_parquet("/home2/s5549329/windAI_rug/WindAi/given_datasets/met_nowcast.parquet")
df_power = pd.read_parquet("/home2/s5549329/windAI_rug/WindAi/given_datasets/wind_power_per_bidzone.parquet")
df_metadata = pd.read_csv("/home2/s5549329/windAI_rug/WindAi/given_datasets/windparks_bidzone.csv")

In [7]:
df_power_long = df_power.reset_index().melt(
    id_vars="index", 
    var_name="bidding_area", 
    value_name="power_MW"
)
df_power_long = df_power_long.rename(columns={"index" : "time"})
df_power_long["bidding_area"] = df_power_long["bidding_area"].str.replace("ELSPOT ", "")

df_power_long.head()

Unnamed: 0,time,bidding_area,power_MW
0,2020-01-01 00:00:00,NO1,149.285262
1,2020-01-01 01:00:00,NO1,152.634024
2,2020-01-01 02:00:00,NO1,151.163256
3,2020-01-01 03:00:00,NO1,150.223341
4,2020-01-01 04:00:00,NO1,157.415142


In [8]:
start = max(
    df_forecast['time_ref'].min(),
    df_nowcasting.index.min(),
    df_power_long['time'].min()
)

end = min(
    df_forecast['time_ref'].max(),
    df_nowcasting.index.max(),
    df_power_long['time'].max()
)

In [9]:
df_forecast = df_forecast[(df_forecast['time_ref'] >= start) & (df_forecast['time_ref'] <= end)]
df_nowcasting = df_nowcasting[(df_nowcasting.index >= start) & (df_nowcasting.index <= end)]
df_power_long = df_power_long[(df_power_long['time'] >= start) & (df_power_long['time'] <= end)]

In [11]:
meta_set = set(df_metadata["substation_name"])
nowcast_set = set(df_nowcasting["windpark"])
forecast_set = set(df_forecast["sid"])

common = meta_set & nowcast_set & forecast_set
print(f"Common windparks in all datasets: {len(common)}")
print(sorted(common))

to_drop_from_meta = meta_set - common
to_drop_from_nowcast = nowcast_set - common
to_drop_from_forecast = forecast_set - common

print("\n To drop from Meta_data:")
print(sorted(to_drop_from_meta))

print("\n To drop from MET_nowcast:")
print(sorted(to_drop_from_nowcast))

print("\n To drop from MET_forecast:")
print(sorted(to_drop_from_forecast))

Common windparks in all datasets: 59
['Bessakerfjellet', 'Bjerk_VK Vindpark', 'Buheii Vindpark', 'Dønnesfjord Vind', 'Egersund Vindkrv', 'Einarsdalen', 'Engerfjellet', 'Fakken', 'Frøya Vindpark', 'Geitfjellet', 'Gismarvik Vindpark', 'Guleslettene Vindpark', 'Hamnefjell', 'Haraheia', 'Haram Kraft', 'Harbaksfjellet', 'Havøygavlen', 'Hennøy', 'Hitra', 'Hundhammerfjelle', 'Hån Vindpark', 'Høg Jæren', 'Kjølberget', 'Kjøllefjord vindpark', 'Kvenndalsfjellet', 'Kvitfjell vindpark', 'Lista VK', 'Lutelandet', 'Marker Vindpark', 'Mehuken', 'Midtfjellet', 'Måkaknuten', 'Nygårdsfjellet', 'Okla Vindkraftverk', 'Raggovidda', 'Raskiftet', 'Raudfjell Vindpark', 'Sandøy Vindkraft', 'Skomakerfjellet', 'Skudeneshavn', 'Smøla', 'Songkjølen', 'Stokkeland', 'Stokkfjellet', 'Storheia', 'Storøy vindpark', 'Svåheia', 'Sørmarkfjellet', 'Tellenes', 'Tindafjellet', 'Tysvær Vindpark', 'Valsn_Vimle', 'Valsneset', 'Vardafjell', 'Ytre Vikna', 'Ånstadblåheia', 'Øie', 'Øyfjell1', 'Øyfjell2']

 To drop from Meta_data:
[

In [12]:
df_metadata = df_metadata[df_metadata["substation_name"].isin(common)]
df_nowcasting = df_nowcasting[df_nowcasting["windpark"].isin(common)]
df_forecast = df_forecast[df_forecast["sid"].isin(common)]

In [20]:
df_forecast.head(10)

Unnamed: 0,sid,time_ref,time,lt,ws10m_00,ws10m_01,ws10m_02,ws10m_03,ws10m_04,ws10m_05,...,mslp_mean,mslp_std,mslp_min,mslp_max,mslp_median,g10m_mean,g10m_std,g10m_min,g10m_max,g10m_median
0,Engerfjellet,2020-02-15 12:00:00,2020-02-15 12:00:00,0,2.096957,2.110652,2.304629,1.942044,1.447011,2.288181,...,100502.5326,38.8795,100421.97,100556.3,100504.93,6.050247,1.032772,4.830834,8.110546,5.778911
1,Engerfjellet,2020-02-15 12:00:00,2020-02-15 13:00:00,1,3.449402,3.436193,3.205872,3.450461,3.143307,4.278822,...,100555.149267,21.675226,100520.78,100584.84,100549.89,8.121981,0.954943,6.240438,9.914339,8.091939
2,Engerfjellet,2020-02-15 12:00:00,2020-02-15 14:00:00,2,3.348926,2.903557,2.779971,3.733802,3.148597,2.912592,...,100519.4782,44.229292,100451.266,100590.41,100507.06,8.276285,0.832413,7.15308,10.031385,8.2643
3,Engerfjellet,2020-02-15 12:00:00,2020-02-15 15:00:00,3,3.095971,2.560161,2.448126,2.267286,3.050008,3.310141,...,100468.608667,49.885909,100383.75,100544.17,100466.37,7.532237,0.792371,6.25536,8.821701,7.6002
4,Engerfjellet,2020-02-15 12:00:00,2020-02-15 16:00:00,4,2.821545,2.965743,3.235484,3.21273,3.064931,2.42039,...,100395.238133,66.165161,100277.03,100478.2,100410.734,7.929002,1.163573,6.151643,10.11644,7.432707
5,Engerfjellet,2020-02-15 12:00:00,2020-02-15 17:00:00,5,4.114142,4.149489,4.104244,4.065797,3.809343,4.873203,...,100289.338733,73.893959,100156.32,100396.57,100298.71,10.285796,0.801055,8.964025,11.855978,9.993366
6,Engerfjellet,2020-02-15 12:00:00,2020-02-15 18:00:00,6,3.79005,4.21273,3.893336,3.688228,3.41074,4.357417,...,100149.909667,84.097432,99984.59,100267.44,100146.05,10.75146,1.063748,8.63798,12.40716,10.93178
7,Engerfjellet,2020-02-15 12:00:00,2020-02-15 19:00:00,7,4.555474,4.861906,4.477373,4.838147,4.630666,5.141986,...,99981.2138,85.766356,99811.836,100146.41,99993.54,11.008561,1.170151,8.603417,13.478138,10.690941
8,Engerfjellet,2020-02-15 12:00:00,2020-02-15 20:00:00,8,5.318298,5.352931,5.288188,5.70717,5.265142,4.461141,...,99780.5706,103.07579,99587.47,99974.6,99794.484,12.23378,1.194848,9.949709,14.675722,12.271126
9,Engerfjellet,2020-02-15 12:00:00,2020-02-15 21:00:00,9,6.120933,4.955779,6.08468,5.510438,6.465667,5.140792,...,99524.449333,114.105489,99333.28,99741.18,99517.34,13.409425,1.393946,9.488266,15.050922,13.812285


In [10]:
print(df_forecast.columns[:20])
print(df_forecast.columns[20:40])
print(df_forecast.columns[40:60])
print(df_forecast.columns[60:80])
print(df_forecast.columns[80:])

Index(['sid', 'time_ref', 'time', 'lt', 'ws10m_00', 'ws10m_01', 'ws10m_02',
       'ws10m_03', 'ws10m_04', 'ws10m_05', 'ws10m_06', 'ws10m_07', 'ws10m_08',
       'ws10m_09', 'ws10m_10', 'ws10m_11', 'ws10m_12', 'ws10m_13', 'ws10m_14',
       'wd10m_00'],
      dtype='object')
Index(['wd10m_01', 'wd10m_02', 'wd10m_03', 'wd10m_04', 'wd10m_05', 'wd10m_06',
       'wd10m_07', 'wd10m_08', 'wd10m_09', 'wd10m_10', 'wd10m_11', 'wd10m_12',
       'wd10m_13', 'wd10m_14', 't2m_00', 't2m_01', 't2m_02', 't2m_03',
       't2m_04', 't2m_05'],
      dtype='object')
Index(['t2m_06', 't2m_07', 't2m_08', 't2m_09', 't2m_10', 't2m_11', 't2m_12',
       't2m_13', 't2m_14', 'rh2m_00', 'rh2m_01', 'rh2m_02', 'rh2m_03',
       'rh2m_04', 'rh2m_05', 'rh2m_06', 'rh2m_07', 'rh2m_08', 'rh2m_09',
       'rh2m_10'],
      dtype='object')
Index(['rh2m_11', 'rh2m_12', 'rh2m_13', 'rh2m_14', 'mslp_00', 'mslp_01',
       'mslp_02', 'mslp_03', 'mslp_04', 'mslp_05', 'mslp_06', 'mslp_07',
       'mslp_08', 'mslp_09', 'mslp_10

In [17]:
df_forecast.groupby(["sid", "time_ref"]).size().value_counts()

62    74989
65    30267
56      118
49      118
12       59
19       59
27       59
7        59
23       59
5        59
59       59
37       59
25       59
1        59
Name: count, dtype: int64

In [15]:
df_forecast.shape

(6648828, 94)

In [35]:
forecast_cols_w_speed = [col for col in df_forecast.columns if "ws10m_" in col]
forecast_cols_w_direction = [col for col in df_forecast.columns if "wd10m_" in col]
forecast_cols_t = [col for col in df_forecast.columns if "t2m_" in col]
forecast_cols_rh = [col for col in df_forecast.columns if "rh2m_" in col]
forecast_cols_mslp = [col for col in df_forecast.columns if "mslp_" in col]
forecast_cols_g = [col for col in df_forecast.columns if "g10m_" in col]

df_forecast["ws10m_mean"]   = df_forecast[forecast_cols_w_speed].mean(axis=1)
df_forecast["ws10m_std"]    = df_forecast[forecast_cols_w_speed].std(axis=1)

angles_rad = np.radians(df_forecast[forecast_cols_w_direction])

mean_angle_rad = np.arctan2(
    np.mean(np.sin(angles_rad), axis=1),
    np.mean(np.cos(angles_rad), axis=1)
)
df_forecast["wd10m_mean"] = (np.degrees(mean_angle_rad) + 360) % 360

df_forecast["wd10m_std"] = df_forecast[forecast_cols_w_direction].std(axis=1)


df_forecast["t2m_mean"]    = df_forecast[forecast_cols_t].mean(axis=1)
df_forecast["t2m_std"]     = df_forecast[forecast_cols_t].std(axis=1)


df_forecast["rh2m_mean"]   = df_forecast[forecast_cols_rh].mean(axis=1)
df_forecast["rh2m_std"]    = df_forecast[forecast_cols_rh].std(axis=1)

df_forecast["mslp_mean"]   = df_forecast[forecast_cols_mslp].mean(axis=1)
df_forecast["mslp_std"]    = df_forecast[forecast_cols_mslp].std(axis=1)

df_forecast["g10m_mean"]   = df_forecast[forecast_cols_g].mean(axis=1)
df_forecast["g10m_std"]    = df_forecast[forecast_cols_g].std(axis=1)

summary_cols = [
    "sid",	"time_ref",	"time",	"lt",
    "ws10m_mean", "ws10m_std", 
    "wd10m_mean", "wd10m_std",
    "t2m_mean", "t2m_std", 
    "rh2m_mean", "rh2m_std",
    "mslp_mean", "mslp_std", 
    "g10m_mean", "g10m_std",
]

# df_forecast_avg = df_forecast.groupby(["sid", "time", "time_ref"])[summary_cols].mean().reset_index()
# df_forecast_avg["time"] = pd.to_datetime(df_forecast_avg["time"])
df_forecast_new = df_forecast[summary_cols]



In [100]:
df_forecast_new[40:80]


Unnamed: 0,sid,time_ref,time,lt,ws10m_mean,ws10m_std,wd10m_mean,wd10m_std,t2m_mean,t2m_std,rh2m_mean,rh2m_std,mslp_mean,mslp_std,g10m_mean,g10m_std
40,Engerfjellet,2020-02-15 12:00:00,2020-02-17 04:00:00,40,5.577845,1.621073,202.725326,39.921877,251.406278,64.689279,0.860523,0.225679,89943.515328,23139.655631,13.112347,3.773433
41,Engerfjellet,2020-02-15 12:00:00,2020-02-17 05:00:00,41,5.689439,1.651892,202.268048,39.83337,251.581722,64.739665,0.858045,0.224327,89963.43087,23146.487632,13.29907,3.867043
42,Engerfjellet,2020-02-15 12:00:00,2020-02-17 06:00:00,42,5.878479,1.683037,201.805097,39.691918,251.667127,64.765998,0.859224,0.224052,89983.827591,23153.26847,13.800872,3.927934
43,Engerfjellet,2020-02-15 12:00:00,2020-02-17 07:00:00,43,6.104576,1.716436,200.919825,39.593112,251.79115,64.800619,0.853749,0.222153,89997.086245,23157.828566,14.262961,4.010886
44,Engerfjellet,2020-02-15 12:00:00,2020-02-17 08:00:00,44,6.338974,1.711021,200.719141,39.617384,252.157427,64.890011,0.842989,0.219298,90005.870984,23161.202001,14.867508,4.015089
45,Engerfjellet,2020-02-15 12:00:00,2020-02-17 09:00:00,45,6.609488,1.779394,202.308495,39.881631,252.586794,64.994227,0.835418,0.217656,90016.605158,23165.33018,15.476441,4.163867
46,Engerfjellet,2020-02-15 12:00:00,2020-02-17 10:00:00,46,6.823486,1.845768,204.077905,40.210512,252.971209,65.08889,0.831645,0.218305,90016.800563,23166.516044,16.000679,4.324424
47,Engerfjellet,2020-02-15 12:00:00,2020-02-17 11:00:00,47,6.854095,1.891254,206.938315,40.715917,253.40112,65.203527,0.815623,0.216427,90022.029686,23168.267555,16.258437,4.460161
48,Engerfjellet,2020-02-15 12:00:00,2020-02-17 12:00:00,48,6.976447,1.888503,209.276372,41.475052,254.093312,65.392538,0.774008,0.209849,90029.22177,23169.02197,16.644307,4.617059
49,Engerfjellet,2020-02-15 12:00:00,2020-02-17 13:00:00,49,7.042866,1.869944,211.791704,42.355239,254.591007,65.523715,0.73312,0.201357,90045.952708,23171.743775,16.964853,4.517865


In [97]:
df_forecast_new["time_ref"] = pd.to_datetime(df_forecast_new["time_ref"], errors="coerce")

# Global range
start = df_forecast_new["time_ref"].min()
end   = df_forecast_new["time_ref"].max()
print("Global time_ref range:", start, "→", end, "| span:", end - start)
fc_rows_total   = len(df_forecast_new["time_ref"].unique())
print(fc_rows_total)

# Global range
start = df_nowcasting.index.min()
end   = df_nowcasting.index.max()
nc_rows_total   = len(df_nowcasting.index.unique())
print("Global time range:", start, "→", end, "| span:", end - start)
print(nc_rows_total)

Global time_ref range: 2020-02-15 12:00:00 → 2025-03-24 09:00:00 | span: 1863 days 21:00:00
1798
Global time range: 2020-02-15 12:00:00 → 2025-03-24 09:00:00 | span: 1863 days 21:00:00
44733


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_forecast_new["time_ref"] = pd.to_datetime(df_forecast_new["time_ref"], errors="coerce")


In [104]:
forecast_61 = pd.read_parquet("/home2/s5549329/windAI_rug/WindAi/deep_learning/created_datasets/met_forecast_with_leads_H61.parquet")
forecast_61.head(20)

Unnamed: 0,sid,time_ref,ws10m_mean_0,ws10m_mean_1,ws10m_mean_2,ws10m_mean_3,ws10m_mean_4,ws10m_mean_5,ws10m_mean_6,ws10m_mean_7,...,g10m_std_51,g10m_std_52,g10m_std_53,g10m_std_54,g10m_std_55,g10m_std_56,g10m_std_57,g10m_std_58,g10m_std_59,g10m_std_60
0,Bessakerfjellet,2020-02-15 12:00:00,7.294972,6.639898,5.024071,3.518165,4.532025,5.560911,6.848884,9.472562,...,6.094175,6.831852,7.117222,6.123054,5.48007,6.022764,5.669324,4.861687,4.030315,4.148357
1,Bessakerfjellet,2020-02-16 12:00:00,4.953945,5.710693,5.919473,5.874915,5.695876,5.731377,7.197757,8.983572,...,4.603957,4.668656,5.38443,4.961266,4.84409,4.109278,3.07721,2.950645,3.102819,3.683643
2,Bessakerfjellet,2020-02-17 12:00:00,6.44675,6.037915,5.891667,6.126029,5.68579,4.208697,4.758615,7.542913,...,1.260243,1.334078,1.766119,2.06967,2.183306,2.165596,1.966034,2.05518,2.251314,2.298304
3,Bessakerfjellet,2020-02-18 12:00:00,7.14739,7.180537,7.08053,6.619458,5.932155,5.452873,4.673819,4.491698,...,2.411113,2.418086,2.790038,3.265137,3.830096,3.970761,3.77516,4.091057,3.852074,4.270032
4,Bessakerfjellet,2020-02-19 12:00:00,1.270761,1.323647,1.11147,0.972133,2.059055,2.815914,3.492006,4.263374,...,2.941091,2.896981,2.34655,3.116223,3.24311,2.688352,2.838423,2.380705,2.776905,2.942892
5,Bessakerfjellet,2020-02-20 12:00:00,7.684442,7.880573,8.023482,7.244879,6.909192,6.941263,6.49433,5.724519,...,4.124628,3.582701,4.005215,4.487528,4.770156,4.352493,4.18762,4.931399,4.256845,5.742807
6,Bessakerfjellet,2020-02-21 12:00:00,7.923422,8.976622,10.052862,10.733857,12.26336,12.483198,12.443856,13.1854,...,4.177667,3.671205,3.203532,3.599895,5.267495,3.913475,4.285461,4.154516,4.951018,4.106176
7,Bessakerfjellet,2020-02-22 12:00:00,8.466386,9.087441,9.084126,8.925029,8.129742,8.26217,7.609765,8.704932,...,4.883778,5.18684,5.05419,4.632434,4.395582,2.513876,3.244794,2.428022,2.151371,2.168583
8,Bessakerfjellet,2020-02-23 12:00:00,6.418722,6.167782,8.261014,8.495838,8.076169,7.593651,7.163591,6.844626,...,3.249451,3.382872,3.116343,3.065032,3.073093,3.151372,3.154881,3.141805,3.136744,3.142357
9,Bessakerfjellet,2020-02-24 12:00:00,2.277085,2.773014,2.535106,2.843889,2.400955,2.754668,2.766204,3.067266,...,2.185335,2.187525,2.125165,1.933145,1.867292,1.841713,1.80006,1.825846,1.769692,1.73952


In [78]:
lt_max_series = df_forecast_new.groupby(["sid","time_ref"])["lt"].max()

min_val = lt_max_series.min()

mins = lt_max_series[lt_max_series.eq(min_val)]
print(mins)



sid                    time_ref           
Bessakerfjellet        2022-12-26 12:00:00    4
Bjerk_VK Vindpark      2022-12-26 12:00:00    4
Buheii Vindpark        2022-12-26 12:00:00    4
Dønnesfjord Vind       2022-12-26 12:00:00    4
Egersund Vindkrv       2022-12-26 12:00:00    4
Einarsdalen            2022-12-26 12:00:00    4
Engerfjellet           2022-12-26 12:00:00    4
Fakken                 2022-12-26 12:00:00    4
Frøya Vindpark         2022-12-26 12:00:00    4
Geitfjellet            2022-12-26 12:00:00    4
Gismarvik Vindpark     2022-12-26 12:00:00    4
Guleslettene Vindpark  2022-12-26 12:00:00    4
Hamnefjell             2022-12-26 12:00:00    4
Haraheia               2022-12-26 12:00:00    4
Haram Kraft            2022-12-26 12:00:00    4
Harbaksfjellet         2022-12-26 12:00:00    4
Havøygavlen            2022-12-26 12:00:00    4
Hennøy                 2022-12-26 12:00:00    4
Hitra                  2022-12-26 12:00:00    4
Hundhammerfjelle       2022-12-26 12:00:00   

In [84]:
lt_max_series

H = 10

bad_idx = lt_max_series[lt_max_series < H].index
df_keep = (
    df_forecast_new
      .set_index(["sid", "time_ref"])
      .drop(index=bad_idx)          # drop whole issuances
      .reset_index()
)
df_keep.groupby(["sid", "time_ref"])["lt"].max().mean()

61.73942093541203

In [105]:
df_nowcasting = df_nowcasting.rename(columns={"windpark": "sid"})
df_nowcasting = df_nowcasting.reset_index()
df_nowcasting["time"] = pd.to_datetime(df_nowcasting["time"])
df_nowcasting.head(20)

Unnamed: 0,time,air_temperature_2m,air_pressure_at_sea_level,relative_humidity_2m,precipitation_amount,wind_speed_10m,wind_direction_10m,sid
0,2020-02-15 12:00:00,276.335876,99795.664062,0.821845,0.0002238679,8.104369,246.912506,Bessakerfjellet
1,2020-02-15 13:00:00,276.076935,99832.359375,0.8917,0.0,6.657547,233.088379,Bessakerfjellet
2,2020-02-15 14:00:00,275.843689,99830.40625,0.881385,9.209055e-05,5.876413,229.860733,Bessakerfjellet
3,2020-02-15 15:00:00,274.957886,99853.375,0.837362,0.0,3.329724,203.135254,Bessakerfjellet
4,2020-02-15 16:00:00,273.522888,99767.03125,0.812765,8.627482e-15,3.800424,142.939713,Bessakerfjellet
5,2020-02-15 17:00:00,272.60141,99576.359375,0.809347,0.0,5.041376,112.551399,Bessakerfjellet
6,2020-02-15 18:00:00,272.448273,99489.25,0.76856,0.0,6.641575,120.135864,Bessakerfjellet
7,2020-02-15 19:00:00,272.523743,99326.703125,0.750021,0.0,8.362316,124.843414,Bessakerfjellet
8,2020-02-15 20:00:00,272.793854,99098.59375,0.66445,0.0,10.146371,128.849121,Bessakerfjellet
9,2020-02-15 21:00:00,273.264313,98712.90625,0.615699,0.0,12.543638,130.298584,Bessakerfjellet


In [106]:
df_nowcasting = df_nowcasting.rename(columns={
    "air_temperature_2m": "t2m_now",
    "air_pressure_at_sea_level": "mslp_now",
    "relative_humidity_2m": "rh2m_now",
    "wind_speed_10m": "ws10m_now",
    "wind_direction_10m": "wd10m_now",
    "precipitation_amount": "precip_now",
})

In [None]:
def hourly_to_daily_anchored_at_noon_no_tempcols(df_now, strict_24=True):
    """
    Collapse hourly nowcasting to one row per (sid, time_ref @ 12:00).
    - Uses simple means for scalars
    - Uses circular mean for wd10m_now (done inline; no temp columns)
    - If strict_24=True, keep only days with >=24 hourly samples
    """
    df = df_now.copy()

    # ensure 'time' column exists
    if 'time' not in df.columns and getattr(df.index, "name", None) == 'time':
        df = df.reset_index()

    df['time'] = pd.to_datetime(df['time'], errors='coerce')

    # anchor each hour to the day whose reference is 12:00
    anchor = pd.Timedelta(hours=12)
    df['time_ref'] = (df['time'] - anchor).dt.floor('D') + anchor

    agg = (
        df.groupby(['sid', 'time_ref'])
          .agg(
              t2m_now    = ('t2m_now',    'mean'),
              mslp_now   = ('mslp_now',   'mean'),
              rh2m_now   = ('rh2m_now',   'mean'),
              precip_now = ('precip_now', 'mean'),  # keep as mean (unchanged)
              ws10m_now  = ('ws10m_now',  'mean'),
              wd10m_now  = ('wd10m_now',  "mean"),
              n_hours    = ('time',       'size'),
          )
          .reset_index()
    )

    if strict_24:
        agg = agg[agg['n_hours'] >= 24].drop(columns='n_hours')
    else:
        agg = agg.drop(columns='n_hours')

    return agg.sort_values(['sid', 'time_ref']).reset_index(drop=True)

In [107]:
df_nowcasting.head()

Unnamed: 0,time,t2m_now,mslp_now,rh2m_now,precip_now,ws10m_now,wd10m_now,sid
0,2020-02-15 12:00:00,276.335876,99795.664062,0.821845,0.0002238679,8.104369,246.912506,Bessakerfjellet
1,2020-02-15 13:00:00,276.076935,99832.359375,0.8917,0.0,6.657547,233.088379,Bessakerfjellet
2,2020-02-15 14:00:00,275.843689,99830.40625,0.881385,9.209055e-05,5.876413,229.860733,Bessakerfjellet
3,2020-02-15 15:00:00,274.957886,99853.375,0.837362,0.0,3.329724,203.135254,Bessakerfjellet
4,2020-02-15 16:00:00,273.522888,99767.03125,0.812765,8.627482e-15,3.800424,142.939713,Bessakerfjellet


In [18]:
df_merged_weather = pd.merge(df_forecast_avg, df_nowcasting, on=["sid", "time"], how="inner")
df_merged_weather.head()

Unnamed: 0,sid,time,ws10m_mean,ws10m_std,wd10m_mean,wd10m_std,t2m_mean,t2m_std,rh2m_mean,rh2m_std,mslp_mean,mslp_std,g10m_mean,g10m_std,t2m_now,mslp_now,rh2m_now,precip_now,ws10m_now,wd10m_now
0,Bessakerfjellet,2020-02-15 12:00:00,7.294972,1.315213,235.690861,6.591162,277.660277,0.497662,0.860875,0.055132,99775.850667,44.163305,14.636166,1.523008,276.335876,99795.664062,0.821845,0.0002238679,8.104369,246.912506
1,Bessakerfjellet,2020-02-15 13:00:00,6.639898,0.825916,227.075603,6.880623,277.61752,0.535993,0.876358,0.041548,99816.510933,41.808119,13.252052,1.81868,276.076935,99832.359375,0.8917,0.0,6.657547,233.088379
2,Bessakerfjellet,2020-02-15 14:00:00,5.024071,0.778648,216.288069,16.480253,277.159103,0.600454,0.876495,0.033818,99825.021933,48.331877,11.903856,1.58932,275.843689,99830.40625,0.881385,9.209055e-05,5.876413,229.860733
3,Bessakerfjellet,2020-02-15 15:00:00,3.518165,0.369989,174.336054,16.726134,276.441382,0.502003,0.871451,0.037151,99804.551333,45.440402,8.558784,1.124682,274.957886,99853.375,0.837362,0.0,3.329724,203.135254
4,Bessakerfjellet,2020-02-15 16:00:00,4.532025,0.414117,140.536212,6.787815,275.231637,0.345054,0.870454,0.027691,99732.513333,53.689166,7.365164,0.674284,273.522888,99767.03125,0.812765,8.627482e-15,3.800424,142.939713


In [19]:
df_merged_weather.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ws10m_mean,2628686.0,5.042511,3.080494,0.2163493,2.711206,4.314413,6.701969,27.741219
ws10m_std,2628686.0,1.028123,0.569205,0.02620698,0.62733,0.902428,1.28774,8.003559
wd10m_mean,2628686.0,187.460442,78.022339,2.893837,128.76944,184.671647,248.556912,357.572138
wd10m_std,2628686.0,31.785038,35.899724,0.4788954,7.993889,16.101267,41.576451,184.855365
t2m_mean,2628686.0,278.475022,7.032232,249.5424,273.688618,278.244952,283.657917,304.872186
t2m_std,2628686.0,0.738235,0.422825,0.06846008,0.451539,0.63722,0.905126,5.786043
rh2m_mean,2628686.0,0.813164,0.132957,0.1624328,0.732471,0.835163,0.918755,1.000007
rh2m_std,2628686.0,0.049903,0.027279,1.55972e-07,0.030287,0.045695,0.065304,0.264658
mslp_mean,2628686.0,101025.466478,1263.913994,94522.04,100229.624692,101115.302739,101894.767117,105103.114833
mslp_std,2628686.0,103.82103,53.100382,14.18981,66.757817,90.73286,126.81677,834.418348


In [20]:
df_metadata = df_metadata.rename(columns={"substation_name": "sid"})
df_metadata["bidding_area"] = df_metadata["bidding_area"].str.replace("ELSPOT ", "")
df_metadata.head()

Unnamed: 0,bidding_area,sid,operating_power_max,prod_start_new,eic_code
0,NO1,Engerfjellet,52.8,2022-09-19 13:00:00,50WP00000002158R
1,NO1,Hån Vindpark,21.0,2022-10-26 22:00:00,50WP000000022251
2,NO1,Kjølberget,55.9,2020-07-07 09:00:00,50WP00000002085S
3,NO1,Marker Vindpark,54.0,2020-01-01 00:00:00,50WP00000002048Y
4,NO1,Raskiftet,111.6,2020-01-01 00:00:00,50WP00000001718K


In [21]:
df_power_long.head()

Unnamed: 0,time,bidding_area,power_MW
1056,2020-02-15 12:00:00,NO1,72.052233
1057,2020-02-15 13:00:00,NO1,44.666173
1058,2020-02-15 14:00:00,NO1,65.059451
1059,2020-02-15 15:00:00,NO1,86.562093
1060,2020-02-15 16:00:00,NO1,77.71218


In [22]:
df_power_long_sid = pd.merge(
    df_power_long,
    df_metadata[["bidding_area", "sid"]],
    on="bidding_area",
    how="left"
)
df_power_long_sid.head()

Unnamed: 0,time,bidding_area,power_MW,sid
0,2020-02-15 12:00:00,NO1,72.052233,Engerfjellet
1,2020-02-15 12:00:00,NO1,72.052233,Hån Vindpark
2,2020-02-15 12:00:00,NO1,72.052233,Kjølberget
3,2020-02-15 12:00:00,NO1,72.052233,Marker Vindpark
4,2020-02-15 12:00:00,NO1,72.052233,Raskiftet


In [24]:
df_final_sid = pd.merge(
    df_merged_weather,
    df_power_long_sid[["time", "sid", "power_MW", "bidding_area"]],
    on=["time", "sid"],
    how="inner"
)

df_final_sid.head()

Unnamed: 0,sid,time,ws10m_mean,ws10m_std,wd10m_mean,wd10m_std,t2m_mean,t2m_std,rh2m_mean,rh2m_std,...,g10m_mean,g10m_std,t2m_now,mslp_now,rh2m_now,precip_now,ws10m_now,wd10m_now,power_MW,bidding_area
0,Bessakerfjellet,2020-02-15 12:00:00,7.294972,1.315213,235.690861,6.591162,277.660277,0.497662,0.860875,0.055132,...,14.636166,1.523008,276.335876,99795.664062,0.821845,0.0002238679,8.104369,246.912506,588.636703,NO3
1,Bessakerfjellet,2020-02-15 13:00:00,6.639898,0.825916,227.075603,6.880623,277.61752,0.535993,0.876358,0.041548,...,13.252052,1.81868,276.076935,99832.359375,0.8917,0.0,6.657547,233.088379,460.42402,NO3
2,Bessakerfjellet,2020-02-15 14:00:00,5.024071,0.778648,216.288069,16.480253,277.159103,0.600454,0.876495,0.033818,...,11.903856,1.58932,275.843689,99830.40625,0.881385,9.209055e-05,5.876413,229.860733,297.167215,NO3
3,Bessakerfjellet,2020-02-15 15:00:00,3.518165,0.369989,174.336054,16.726134,276.441382,0.502003,0.871451,0.037151,...,8.558784,1.124682,274.957886,99853.375,0.837362,0.0,3.329724,203.135254,314.183239,NO3
4,Bessakerfjellet,2020-02-15 16:00:00,4.532025,0.414117,140.536212,6.787815,275.231637,0.345054,0.870454,0.027691,...,7.365164,0.674284,273.522888,99767.03125,0.812765,8.627482e-15,3.800424,142.939713,488.289472,NO3


In [32]:
regions = ['NO1', 'NO2', 'NO3', 'NO4']

dfs_by_region = {
    region: df_final_sid[df_final_sid['bidding_area'] == region].copy()
    for region in regions
}

# Optionally assign each to a variable
df_NO1 = dfs_by_region['NO1']
df_NO2 = dfs_by_region['NO2']
df_NO3 = dfs_by_region['NO3']
df_NO4 = dfs_by_region['NO4']

In [33]:
df_NO1.head()

Unnamed: 0,sid,time,ws10m_mean,ws10m_std,wd10m_mean,wd10m_std,t2m_mean,t2m_std,rh2m_mean,rh2m_std,...,g10m_mean,g10m_std,t2m_now,mslp_now,rh2m_now,precip_now,ws10m_now,wd10m_now,power_MW,bidding_area
311752,Engerfjellet,2020-02-15 12:00:00,2.139608,0.448171,197.466405,11.766175,276.379494,0.395486,0.823404,0.051589,...,6.050247,1.032772,275.69696,100518.625,0.844108,0.0,2.051558,193.094101,72.052233,NO1
311753,Engerfjellet,2020-02-15 13:00:00,3.384011,0.476801,207.005511,6.122282,276.422947,0.375587,0.825629,0.047831,...,8.121981,0.954943,275.59491,100569.164062,0.846076,1.011751e-12,4.104134,213.308594,44.666173,NO1
311754,Engerfjellet,2020-02-15 14:00:00,3.116065,0.38097,197.767234,7.315636,276.095935,0.374416,0.84762,0.059136,...,8.276285,0.832413,275.363647,100541.710938,0.870398,0.001087279,4.097149,206.835739,65.059451,NO1
311755,Engerfjellet,2020-02-15 15:00:00,2.856627,0.388535,186.196795,7.583082,275.603674,0.339881,0.886108,0.062451,...,7.532237,0.792371,275.035309,100471.039062,0.908804,0.0,2.759731,192.723495,86.562093,NO1
311756,Engerfjellet,2020-02-15 16:00:00,3.303811,0.59979,185.264509,4.739498,274.942599,0.471217,0.942954,0.04761,...,7.929002,1.163573,274.119812,100361.8125,0.968054,0.0,2.770049,185.444046,77.71218,NO1


In [34]:
df_final_sid_sorted = df_NO1.sort_values("time")

df_final_sid_sorted = df_final_sid_sorted.reset_index(drop=True)

df_final_sid_sorted.head()

Unnamed: 0,sid,time,ws10m_mean,ws10m_std,wd10m_mean,wd10m_std,t2m_mean,t2m_std,rh2m_mean,rh2m_std,...,g10m_mean,g10m_std,t2m_now,mslp_now,rh2m_now,precip_now,ws10m_now,wd10m_now,power_MW,bidding_area
0,Engerfjellet,2020-02-15 12:00:00,2.139608,0.448171,197.466405,11.766175,276.379494,0.395486,0.823404,0.051589,...,6.050247,1.032772,275.69696,100518.625,0.844108,0.0,2.051558,193.094101,72.052233,NO1
1,Marker Vindpark,2020-02-15 12:00:00,3.623393,0.597573,221.412773,5.837665,278.827723,0.40863,0.861682,0.062364,...,9.070228,1.157618,278.657776,100649.882812,0.858711,0.0004665708,3.400307,214.018539,72.052233,NO1
2,Kjølberget,2020-02-15 12:00:00,2.844588,0.641755,241.523584,8.134762,274.222099,0.391442,0.832552,0.037155,...,8.612595,0.790866,274.429047,100517.117188,0.852363,5.648359e-05,2.650362,241.739349,72.052233,NO1
3,Raskiftet,2020-02-15 12:00:00,3.048176,0.668024,223.848515,6.317409,273.155895,0.517255,0.855875,0.041512,...,8.389608,0.620932,274.314148,100501.5625,0.871207,1.548093e-11,2.429872,225.234009,72.052233,NO1
4,Songkjølen,2020-02-15 12:00:00,2.607449,0.459566,209.322322,6.38272,276.276455,0.481296,0.857556,0.053489,...,6.645694,0.842453,276.17868,100547.359375,0.865516,0.0005105352,2.676929,201.159042,72.052233,NO1


In [36]:
df_aggregated = df_NO1.groupby("time").mean().reset_index()

In [37]:
df_final_sid_sorted = df_aggregated.sort_values("time")

df_final_sid_sorted = df_final_sid_sorted.reset_index(drop=True)

df_final_sid_sorted.head()

Unnamed: 0,time,ws10m_mean,ws10m_std,wd10m_mean,wd10m_std,t2m_mean,t2m_std,rh2m_mean,rh2m_std,mslp_mean,mslp_std,g10m_mean,g10m_std,t2m_now,mslp_now,rh2m_now,precip_now,ws10m_now,wd10m_now,power_MW
0,2020-02-15 12:00:00,2.9706,0.56288,219.089599,7.411888,276.28225,0.43307,0.848453,0.051379,100543.824778,40.157405,7.942286,0.929867,276.305115,100563.515625,0.858436,0.000186,2.752213,214.877258,72.052233
1,2020-02-15 13:00:00,3.358135,0.378822,218.735674,5.881298,276.304744,0.411686,0.842491,0.051295,100579.730611,31.46275,8.381703,0.991343,276.314178,100598.023438,0.852978,0.000617,3.71113,217.242462,44.666173
2,2020-02-15 14:00:00,3.122371,0.452259,206.372581,6.159813,275.975155,0.384912,0.867678,0.05523,100564.250167,44.3714,8.208556,0.789677,275.94693,100593.203125,0.884543,0.000181,3.48318,207.492828,65.059451
3,2020-02-15 15:00:00,2.897094,0.368524,191.98583,6.705309,275.442468,0.356906,0.911628,0.054637,100517.015378,55.889691,7.577428,0.77154,275.529144,100512.46875,0.918824,3.8e-05,2.808272,196.660538,86.562093
4,2020-02-15 16:00:00,3.035758,0.494459,183.04583,6.086777,274.831068,0.454263,0.958786,0.038495,100458.559889,63.529883,7.640261,0.91611,274.937225,100429.398438,0.980287,0.00454,2.647012,183.282181,77.71218
