In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)
print("t",torch.__version__)

torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

cuda
t 2.1.0+cu118


In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import math

# Transformer Model
class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, dropout=0.1):
        super(TimeSeriesTransformer, self).__init__()
        self.linear = nn.Linear(input_dim, model_dim)
        self.transformer = nn.Transformer(d_model=model_dim, nhead=num_heads,
                                          num_encoder_layers=num_layers,
                                          dropout=dropout)
        self.fc = nn.Linear(model_dim, input_dim)

    def forward(self, src, tgt=None):
        src = self.linear(src)
        if tgt is None:
            tgt = src
        else:
            tgt = self.linear(tgt)
        output = self.transformer(src, tgt)
        return self.fc(output)

In [3]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import math

# Load data
data = pd.read_csv('filtered_data.csv')

# Preprocess data
data['tavg'] = pd.to_numeric(data['tavg'], errors='coerce')
data['prcp'] = pd.to_numeric(data['prcp'], errors='coerce')
data.dropna(subset=['tavg', 'prcp'], inplace=True)

# Normalize data
tavg_mean, tavg_std = data['tavg'].mean(), data['tavg'].std()
prcp_mean, prcp_std = data['prcp'].mean(), data['prcp'].std()
data['tavg_norm'] = (data['tavg'] - tavg_mean) / tavg_std
data['prcp_norm'] = (data['prcp'] - prcp_mean) / prcp_std


results_normalised = []
results=[]
stations = data['station'].unique()

for station in stations:
    train_mask = (data['station'] == station) & (data['year'].between(1977, 2019))
    val_mask = (data['station'] == station) & (data['year'] == 2020)
    train_data = data[train_mask][['tavg_norm', 'prcp_norm']].values
    val_data = data[val_mask][['tavg_norm', 'prcp_norm']].values

    train_src = torch.tensor(train_data, dtype=torch.float32).unsqueeze(0).to(device)
    # Decoder target should have the same length as train_src but ending in val_data (2020 data)
    train_tgt = np.vstack([train_data[:-12], val_data])
    train_tgt = torch.tensor(train_tgt, dtype=torch.float32).unsqueeze(0).to(device)

    model = TimeSeriesTransformer(2, 512, 8, 3).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

    epochs = 500
    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(train_src, train_tgt)  # Encoder uses train_data, Decoder tries to predict val_data
        loss = criterion(output[:, -12:], train_tgt[:, -12:])  # Focus loss on the last 12 months (2020 data)
        loss.backward()
        optimizer.step()

        if (epoch+1) % 100 == 0:
            print(f'Station {station}, Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

    model.eval()
    with torch.no_grad():
        # For prediction, provide only the encoder input. Let the decoder generate predictions.
        pred_2021 = model(train_src)
        pred_2021 = pred_2021[:, -12:]

    # Retrieve last 12 months for predictions
    pred_values = pred_2021[0, -12:].tolist()

    actual_tavg_norm_2021 = data[(data['station'] == station) & (data['year'] == 2021)][['tavg_norm']].values.flatten().tolist()
    actual_prcp_norm_2021 = data[(data['station'] == station) & (data['year'] == 2021)][['prcp_norm']].values.flatten().tolist()

    pred_tavg_norm = [x[0] for x in pred_values]
    pred_prcp_norm = [x[1] for x in pred_values]

    station_results = [station] + pred_tavg_norm + pred_prcp_norm + actual_tavg_norm_2021 + actual_prcp_norm_2021
    results_normalised.append(station_results)

    #### Inverse normalization
    pred_tavg = [x[0] * tavg_std + tavg_mean for x in pred_values]
    pred_prcp = [x[1] * prcp_std + prcp_mean for x in pred_values]
    actual_tavg_2021 = data[(data['station'] == station) & (data['year'] == 2021)][['tavg']].values.flatten().tolist()
    actual_prcp_2021 = data[(data['station'] == station) & (data['year'] == 2021)][['prcp']].values.flatten().tolist()

    station_results = [station] + pred_tavg + pred_prcp + actual_tavg_2021 + actual_prcp_2021
    results.append(station_results)



# Create final CSV with origin data
columns = ['station'] + [f'predicted_tavg_{i}' for i in range(1, 13)] + [f'predicted_prcp_{i}' for i in range(1, 13)] + [f'actual_tavg_{i}' for i in range(1, 13)] + [f'actual_prcp_{i}' for i in range(1, 13)]
df_results = pd.DataFrame(results, columns=columns)
df_results.to_csv('predictions_vs_actual.csv', index=False)



Station 02935, Epoch [100/500], Loss: 0.0450
Station 02935, Epoch [200/500], Loss: 0.0230
Station 02935, Epoch [300/500], Loss: 0.0160
Station 02935, Epoch [400/500], Loss: 0.0218
Station 02935, Epoch [500/500], Loss: 0.0123




Station 01025, Epoch [100/500], Loss: 0.0896
Station 01025, Epoch [200/500], Loss: 0.0328
Station 01025, Epoch [300/500], Loss: 0.0341
Station 01025, Epoch [400/500], Loss: 0.0309
Station 01025, Epoch [500/500], Loss: 0.0219




Station D4592, Epoch [100/500], Loss: 0.0588
Station D4592, Epoch [200/500], Loss: 0.0294
Station D4592, Epoch [300/500], Loss: 0.0217
Station D4592, Epoch [400/500], Loss: 0.0213
Station D4592, Epoch [500/500], Loss: 0.0133




Station 11035, Epoch [100/500], Loss: 0.0636
Station 11035, Epoch [200/500], Loss: 0.0414
Station 11035, Epoch [300/500], Loss: 0.0352
Station 11035, Epoch [400/500], Loss: 0.0232
Station 11035, Epoch [500/500], Loss: 0.0333




Station D2444, Epoch [100/500], Loss: 0.0498
Station D2444, Epoch [200/500], Loss: 0.0225
Station D2444, Epoch [300/500], Loss: 0.0254
Station D2444, Epoch [400/500], Loss: 0.0094
Station D2444, Epoch [500/500], Loss: 0.0205




Station D4169, Epoch [100/500], Loss: 0.0461
Station D4169, Epoch [200/500], Loss: 0.0148
Station D4169, Epoch [300/500], Loss: 0.0171
Station D4169, Epoch [400/500], Loss: 0.0288
Station D4169, Epoch [500/500], Loss: 0.0178




Station 10513, Epoch [100/500], Loss: 0.0316
Station 10513, Epoch [200/500], Loss: 0.0312
Station 10513, Epoch [300/500], Loss: 0.0170
Station 10513, Epoch [400/500], Loss: 0.0136
Station 10513, Epoch [500/500], Loss: 0.0198




Station 10261, Epoch [100/500], Loss: 0.0570
Station 10261, Epoch [200/500], Loss: 0.0199
Station 10261, Epoch [300/500], Loss: 0.0297
Station 10261, Epoch [400/500], Loss: 0.0143
Station 10261, Epoch [500/500], Loss: 0.0270




Station 72745, Epoch [100/500], Loss: 0.0746
Station 72745, Epoch [200/500], Loss: 0.0548
Station 72745, Epoch [300/500], Loss: 0.0381
Station 72745, Epoch [400/500], Loss: 0.0315
Station 72745, Epoch [500/500], Loss: 0.0168




Station D5664, Epoch [100/500], Loss: 0.0418
Station D5664, Epoch [200/500], Loss: 0.0442
Station D5664, Epoch [300/500], Loss: 0.0214
Station D5664, Epoch [400/500], Loss: 0.0197
Station D5664, Epoch [500/500], Loss: 0.0222




Station 72546, Epoch [100/500], Loss: 0.0498
Station 72546, Epoch [200/500], Loss: 0.0256
Station 72546, Epoch [300/500], Loss: 0.0606
Station 72546, Epoch [400/500], Loss: 0.0338
Station 72546, Epoch [500/500], Loss: 0.0275




Station 10499, Epoch [100/500], Loss: 0.0674
Station 10499, Epoch [200/500], Loss: 0.0280
Station 10499, Epoch [300/500], Loss: 0.0249
Station 10499, Epoch [400/500], Loss: 0.0377
Station 10499, Epoch [500/500], Loss: 0.0473




Station 10738, Epoch [100/500], Loss: 0.0564
Station 10738, Epoch [200/500], Loss: 0.0352
Station 10738, Epoch [300/500], Loss: 0.0211
Station 10738, Epoch [400/500], Loss: 0.0140
Station 10738, Epoch [500/500], Loss: 0.0152




Station 03026, Epoch [100/500], Loss: 0.0365
Station 03026, Epoch [200/500], Loss: 0.0518
Station 03026, Epoch [300/500], Loss: 0.0232
Station 03026, Epoch [400/500], Loss: 0.0146
Station 03026, Epoch [500/500], Loss: 0.0249




Station D0320, Epoch [100/500], Loss: 0.0242
Station D0320, Epoch [200/500], Loss: 0.0493
Station D0320, Epoch [300/500], Loss: 0.0167
Station D0320, Epoch [400/500], Loss: 0.0274
Station D0320, Epoch [500/500], Loss: 0.0227




Station 10338, Epoch [100/500], Loss: 0.0382
Station 10338, Epoch [200/500], Loss: 0.0303
Station 10338, Epoch [300/500], Loss: 0.0155
Station 10338, Epoch [400/500], Loss: 0.0316
Station 10338, Epoch [500/500], Loss: 0.0142




Station 10113, Epoch [100/500], Loss: 0.0251
Station 10113, Epoch [200/500], Loss: 0.0273
Station 10113, Epoch [300/500], Loss: 0.0283
Station 10113, Epoch [400/500], Loss: 0.0163
Station 10113, Epoch [500/500], Loss: 0.0336




Station 10675, Epoch [100/500], Loss: 0.0344
Station 10675, Epoch [200/500], Loss: 0.0178
Station 10675, Epoch [300/500], Loss: 0.0355
Station 10675, Epoch [400/500], Loss: 0.0111
Station 10675, Epoch [500/500], Loss: 0.0150




Station 10852, Epoch [100/500], Loss: 0.0365
Station 10852, Epoch [200/500], Loss: 0.0230
Station 10852, Epoch [300/500], Loss: 0.0281
Station 10852, Epoch [400/500], Loss: 0.0280
Station 10852, Epoch [500/500], Loss: 0.0126




Station 10488, Epoch [100/500], Loss: 0.0292
Station 10488, Epoch [200/500], Loss: 0.0370
Station 10488, Epoch [300/500], Loss: 0.0145
Station 10488, Epoch [400/500], Loss: 0.0148
Station 10488, Epoch [500/500], Loss: 0.0245




Station 91182, Epoch [100/500], Loss: 0.0526
Station 91182, Epoch [200/500], Loss: 0.0565
Station 91182, Epoch [300/500], Loss: 0.0409
Station 91182, Epoch [400/500], Loss: 0.0173
Station 91182, Epoch [500/500], Loss: 0.0187




Station 72219, Epoch [100/500], Loss: 0.0981
Station 72219, Epoch [200/500], Loss: 0.0236
Station 72219, Epoch [300/500], Loss: 0.0355
Station 72219, Epoch [400/500], Loss: 0.0490
Station 72219, Epoch [500/500], Loss: 0.0413




Station 72386, Epoch [100/500], Loss: 0.0563
Station 72386, Epoch [200/500], Loss: 0.0773
Station 72386, Epoch [300/500], Loss: 0.0283
Station 72386, Epoch [400/500], Loss: 0.0138
Station 72386, Epoch [500/500], Loss: 0.0113




Station 10729, Epoch [100/500], Loss: 0.0405
Station 10729, Epoch [200/500], Loss: 0.0207
Station 10729, Epoch [300/500], Loss: 0.0195
Station 10729, Epoch [400/500], Loss: 0.0188
Station 10729, Epoch [500/500], Loss: 0.0140




Station D3402, Epoch [100/500], Loss: 0.0427
Station D3402, Epoch [200/500], Loss: 0.0231
Station D3402, Epoch [300/500], Loss: 0.0357
Station D3402, Epoch [400/500], Loss: 0.0147
Station D3402, Epoch [500/500], Loss: 0.0182




Station 10688, Epoch [100/500], Loss: 0.0551
Station 10688, Epoch [200/500], Loss: 0.0402
Station 10688, Epoch [300/500], Loss: 0.0176
Station 10688, Epoch [400/500], Loss: 0.0181
Station 10688, Epoch [500/500], Loss: 0.0153




Station 72597, Epoch [100/500], Loss: 0.0285
Station 72597, Epoch [200/500], Loss: 0.0347
Station 72597, Epoch [300/500], Loss: 0.0156
Station 72597, Epoch [400/500], Loss: 0.0194
Station 72597, Epoch [500/500], Loss: 0.0121




Station 72434, Epoch [100/500], Loss: 0.0258
Station 72434, Epoch [200/500], Loss: 0.0863
Station 72434, Epoch [300/500], Loss: 0.0210
Station 72434, Epoch [400/500], Loss: 0.0388
Station 72434, Epoch [500/500], Loss: 0.0196




Station D2750, Epoch [100/500], Loss: 0.0292
Station D2750, Epoch [200/500], Loss: 0.0287
Station D2750, Epoch [300/500], Loss: 0.0236
Station D2750, Epoch [400/500], Loss: 0.0199
Station D2750, Epoch [500/500], Loss: 0.0131




Station D4651, Epoch [100/500], Loss: 0.0293
Station D4651, Epoch [200/500], Loss: 0.0208
Station D4651, Epoch [300/500], Loss: 0.0357
Station D4651, Epoch [400/500], Loss: 0.0152
Station D4651, Epoch [500/500], Loss: 0.0275




Station D5229, Epoch [100/500], Loss: 0.0393
Station D5229, Epoch [200/500], Loss: 0.0262
Station D5229, Epoch [300/500], Loss: 0.0389
Station D5229, Epoch [400/500], Loss: 0.0241
Station D5229, Epoch [500/500], Loss: 0.0223




Station 08314, Epoch [100/500], Loss: 0.0684
Station 08314, Epoch [200/500], Loss: 0.0397
Station 08314, Epoch [300/500], Loss: 0.0141
Station 08314, Epoch [400/500], Loss: 0.0278
Station 08314, Epoch [500/500], Loss: 0.0085




Station 10567, Epoch [100/500], Loss: 0.0462
Station 10567, Epoch [200/500], Loss: 0.0164
Station 10567, Epoch [300/500], Loss: 0.0229
Station 10567, Epoch [400/500], Loss: 0.0277
Station 10567, Epoch [500/500], Loss: 0.0127




Station 06700, Epoch [100/500], Loss: 0.0765
Station 06700, Epoch [200/500], Loss: 0.0256
Station 06700, Epoch [300/500], Loss: 0.0188
Station 06700, Epoch [400/500], Loss: 0.0225
Station 06700, Epoch [500/500], Loss: 0.0147




Station 10162, Epoch [100/500], Loss: 0.0360
Station 10162, Epoch [200/500], Loss: 0.0251
Station 10162, Epoch [300/500], Loss: 0.0333
Station 10162, Epoch [400/500], Loss: 0.0181
Station 10162, Epoch [500/500], Loss: 0.0146




Station 11240, Epoch [100/500], Loss: 0.0478
Station 11240, Epoch [200/500], Loss: 0.0389
Station 11240, Epoch [300/500], Loss: 0.0267
Station 11240, Epoch [400/500], Loss: 0.0341
Station 11240, Epoch [500/500], Loss: 0.0213




Station 10348, Epoch [100/500], Loss: 0.0437
Station 10348, Epoch [200/500], Loss: 0.0299
Station 10348, Epoch [300/500], Loss: 0.0110
Station 10348, Epoch [400/500], Loss: 0.0097
Station 10348, Epoch [500/500], Loss: 0.0119




Station D3739, Epoch [100/500], Loss: 0.1543
Station D3739, Epoch [200/500], Loss: 0.0208
Station D3739, Epoch [300/500], Loss: 0.0232
Station D3739, Epoch [400/500], Loss: 0.0355
Station D3739, Epoch [500/500], Loss: 0.0141




Station 06660, Epoch [100/500], Loss: 0.0611
Station 06660, Epoch [200/500], Loss: 0.0276
Station 06660, Epoch [300/500], Loss: 0.0348
Station 06660, Epoch [400/500], Loss: 0.0236
Station 06660, Epoch [500/500], Loss: 0.0351




Station D1266, Epoch [100/500], Loss: 0.0425
Station D1266, Epoch [200/500], Loss: 0.0250
Station D1266, Epoch [300/500], Loss: 0.0183
Station D1266, Epoch [400/500], Loss: 0.0117
Station D1266, Epoch [500/500], Loss: 0.0188




Station 10028, Epoch [100/500], Loss: 0.0327
Station 10028, Epoch [200/500], Loss: 0.0267
Station 10028, Epoch [300/500], Loss: 0.0225
Station 10028, Epoch [400/500], Loss: 0.0233
Station 10028, Epoch [500/500], Loss: 0.0086




Station 72450, Epoch [100/500], Loss: 0.0538
Station 72450, Epoch [200/500], Loss: 0.0307
Station 72450, Epoch [300/500], Loss: 0.0293
Station 72450, Epoch [400/500], Loss: 0.0277
Station 72450, Epoch [500/500], Loss: 0.0337




Station 11120, Epoch [100/500], Loss: 0.0828
Station 11120, Epoch [200/500], Loss: 0.0481
Station 11120, Epoch [300/500], Loss: 0.0273
Station 11120, Epoch [400/500], Loss: 0.0173
Station 11120, Epoch [500/500], Loss: 0.0230




Station 10410, Epoch [100/500], Loss: 0.0389
Station 10410, Epoch [200/500], Loss: 0.0463
Station 10410, Epoch [300/500], Loss: 0.0337
Station 10410, Epoch [400/500], Loss: 0.0222
Station 10410, Epoch [500/500], Loss: 0.0134




Station 72445, Epoch [100/500], Loss: 0.0466
Station 72445, Epoch [200/500], Loss: 0.0461
Station 72445, Epoch [300/500], Loss: 0.0547
Station 72445, Epoch [400/500], Loss: 0.0379
Station 72445, Epoch [500/500], Loss: 0.0159




Station 10015, Epoch [100/500], Loss: 0.0801
Station 10015, Epoch [200/500], Loss: 0.0313
Station 10015, Epoch [300/500], Loss: 0.0203
Station 10015, Epoch [400/500], Loss: 0.0212
Station 10015, Epoch [500/500], Loss: 0.0152




Station EDXU0, Epoch [100/500], Loss: 0.0694
Station EDXU0, Epoch [200/500], Loss: 0.0393
Station EDXU0, Epoch [300/500], Loss: 0.0227
Station EDXU0, Epoch [400/500], Loss: 0.0197
Station EDXU0, Epoch [500/500], Loss: 0.0192




Station D4323, Epoch [100/500], Loss: 0.0163
Station D4323, Epoch [200/500], Loss: 0.0097
Station D4323, Epoch [300/500], Loss: 0.0161
Station D4323, Epoch [400/500], Loss: 0.0161
Station D4323, Epoch [500/500], Loss: 0.0125




Station D0151, Epoch [100/500], Loss: 0.0281
Station D0151, Epoch [200/500], Loss: 0.0260
Station D0151, Epoch [300/500], Loss: 0.0331
Station D0151, Epoch [400/500], Loss: 0.0254
Station D0151, Epoch [500/500], Loss: 0.0143




Station 10763, Epoch [100/500], Loss: 0.0394
Station 10763, Epoch [200/500], Loss: 0.0184
Station 10763, Epoch [300/500], Loss: 0.0191
Station 10763, Epoch [400/500], Loss: 0.0164
Station 10763, Epoch [500/500], Loss: 0.0130




Station 72327, Epoch [100/500], Loss: 0.0454
Station 72327, Epoch [200/500], Loss: 0.0637
Station 72327, Epoch [300/500], Loss: 0.0189
Station 72327, Epoch [400/500], Loss: 0.0110
Station 72327, Epoch [500/500], Loss: 0.0197




Station 08306, Epoch [100/500], Loss: 0.0339
Station 08306, Epoch [200/500], Loss: 0.0554
Station 08306, Epoch [300/500], Loss: 0.0346
Station 08306, Epoch [400/500], Loss: 0.0248
Station 08306, Epoch [500/500], Loss: 0.0186




Station 10170, Epoch [100/500], Loss: 0.0407
Station 10170, Epoch [200/500], Loss: 0.0357
Station 10170, Epoch [300/500], Loss: 0.0205
Station 10170, Epoch [400/500], Loss: 0.0118
Station 10170, Epoch [500/500], Loss: 0.0195




Station 10400, Epoch [100/500], Loss: 0.0293
Station 10400, Epoch [200/500], Loss: 0.0464
Station 10400, Epoch [300/500], Loss: 0.0266
Station 10400, Epoch [400/500], Loss: 0.0166
Station 10400, Epoch [500/500], Loss: 0.0165




Station 72681, Epoch [100/500], Loss: 0.0744
Station 72681, Epoch [200/500], Loss: 0.0403
Station 72681, Epoch [300/500], Loss: 0.0285
Station 72681, Epoch [400/500], Loss: 0.0202
Station 72681, Epoch [500/500], Loss: 0.0158




Station EDMW0, Epoch [100/500], Loss: 0.0180
Station EDMW0, Epoch [200/500], Loss: 0.0322
Station EDMW0, Epoch [300/500], Loss: 0.0230
Station EDMW0, Epoch [400/500], Loss: 0.0182
Station EDMW0, Epoch [500/500], Loss: 0.0150




Station 72250, Epoch [100/500], Loss: 0.0840
Station 72250, Epoch [200/500], Loss: 0.0558
Station 72250, Epoch [300/500], Loss: 0.0609
Station 72250, Epoch [400/500], Loss: 0.0334
Station 72250, Epoch [500/500], Loss: 0.0244




Station 72278, Epoch [100/500], Loss: 0.0557
Station 72278, Epoch [200/500], Loss: 0.0248
Station 72278, Epoch [300/500], Loss: 0.0311
Station 72278, Epoch [400/500], Loss: 0.0240
Station 72278, Epoch [500/500], Loss: 0.0142




Station 01241, Epoch [100/500], Loss: 0.0549
Station 01241, Epoch [200/500], Loss: 0.0327
Station 01241, Epoch [300/500], Loss: 0.0124
Station 01241, Epoch [400/500], Loss: 0.0304
Station 01241, Epoch [500/500], Loss: 0.0182




Station D4287, Epoch [100/500], Loss: 0.0799
Station D4287, Epoch [200/500], Loss: 0.0205
Station D4287, Epoch [300/500], Loss: 0.0226
Station D4287, Epoch [400/500], Loss: 0.0144
Station D4287, Epoch [500/500], Loss: 0.0138




Station 10628, Epoch [100/500], Loss: 0.0490
Station 10628, Epoch [200/500], Loss: 0.0176
Station 10628, Epoch [300/500], Loss: 0.0165
Station 10628, Epoch [400/500], Loss: 0.0163
Station 10628, Epoch [500/500], Loss: 0.0105




Station 10946, Epoch [100/500], Loss: 0.0569
Station 10946, Epoch [200/500], Loss: 0.0500
Station 10946, Epoch [300/500], Loss: 0.0337
Station 10946, Epoch [400/500], Loss: 0.0241
Station 10946, Epoch [500/500], Loss: 0.0346




Station D3137, Epoch [100/500], Loss: 0.0589
Station D3137, Epoch [200/500], Loss: 0.0339
Station D3137, Epoch [300/500], Loss: 0.0143
Station D3137, Epoch [400/500], Loss: 0.0267
Station D3137, Epoch [500/500], Loss: 0.0128




Station 10427, Epoch [100/500], Loss: 0.0443
Station 10427, Epoch [200/500], Loss: 0.0327
Station 10427, Epoch [300/500], Loss: 0.0280
Station 10427, Epoch [400/500], Loss: 0.0149
Station 10427, Epoch [500/500], Loss: 0.0235




Station 10382, Epoch [100/500], Loss: 0.0416
Station 10382, Epoch [200/500], Loss: 0.0224
Station 10382, Epoch [300/500], Loss: 0.0172
Station 10382, Epoch [400/500], Loss: 0.0216
Station 10382, Epoch [500/500], Loss: 0.0116




Station 72315, Epoch [100/500], Loss: 0.0660
Station 72315, Epoch [200/500], Loss: 0.0657
Station 72315, Epoch [300/500], Loss: 0.0212
Station 72315, Epoch [400/500], Loss: 0.0252
Station 72315, Epoch [500/500], Loss: 0.0212




Station 10962, Epoch [100/500], Loss: 0.0737
Station 10962, Epoch [200/500], Loss: 0.0421
Station 10962, Epoch [300/500], Loss: 0.0142
Station 10962, Epoch [400/500], Loss: 0.0391
Station 10962, Epoch [500/500], Loss: 0.0186




Station 10948, Epoch [100/500], Loss: 0.1055
Station 10948, Epoch [200/500], Loss: 0.0381
Station 10948, Epoch [300/500], Loss: 0.0465
Station 10948, Epoch [400/500], Loss: 0.0453
Station 10948, Epoch [500/500], Loss: 0.0321




Station 72698, Epoch [100/500], Loss: 0.0307
Station 72698, Epoch [200/500], Loss: 0.0668
Station 72698, Epoch [300/500], Loss: 0.0365
Station 72698, Epoch [400/500], Loss: 0.0207
Station 72698, Epoch [500/500], Loss: 0.0191




Station 10035, Epoch [100/500], Loss: 0.0498
Station 10035, Epoch [200/500], Loss: 0.0271
Station 10035, Epoch [300/500], Loss: 0.0269
Station 10035, Epoch [400/500], Loss: 0.0384
Station 10035, Epoch [500/500], Loss: 0.0367




Station 08487, Epoch [100/500], Loss: 0.0483
Station 08487, Epoch [200/500], Loss: 0.0446
Station 08487, Epoch [300/500], Loss: 0.0432
Station 08487, Epoch [400/500], Loss: 0.0277
Station 08487, Epoch [500/500], Loss: 0.0242




Station 10815, Epoch [100/500], Loss: 0.0468
Station 10815, Epoch [200/500], Loss: 0.0221
Station 10815, Epoch [300/500], Loss: 0.0372
Station 10815, Epoch [400/500], Loss: 0.0313
Station 10815, Epoch [500/500], Loss: 0.0116




Station D3257, Epoch [100/500], Loss: 0.0364
Station D3257, Epoch [200/500], Loss: 0.0313
Station D3257, Epoch [300/500], Loss: 0.0206
Station D3257, Epoch [400/500], Loss: 0.0263
Station D3257, Epoch [500/500], Loss: 0.0143




Station 10430, Epoch [100/500], Loss: 0.0321
Station 10430, Epoch [200/500], Loss: 0.0167
Station 10430, Epoch [300/500], Loss: 0.0298
Station 10430, Epoch [400/500], Loss: 0.0179
Station 10430, Epoch [500/500], Loss: 0.0195




Station 10381, Epoch [100/500], Loss: 0.0498
Station 10381, Epoch [200/500], Loss: 0.0202
Station 10381, Epoch [300/500], Loss: 0.0195
Station 10381, Epoch [400/500], Loss: 0.0164
Station 10381, Epoch [500/500], Loss: 0.0117




Station 10544, Epoch [100/500], Loss: 0.0767
Station 10544, Epoch [200/500], Loss: 0.0276
Station 10544, Epoch [300/500], Loss: 0.0157
Station 10544, Epoch [400/500], Loss: 0.0337
Station 10544, Epoch [500/500], Loss: 0.0144




Station 70273, Epoch [100/500], Loss: 0.0712
Station 70273, Epoch [200/500], Loss: 0.0674
Station 70273, Epoch [300/500], Loss: 0.0324
Station 70273, Epoch [400/500], Loss: 0.0254
Station 70273, Epoch [500/500], Loss: 0.0097




Station 10020, Epoch [100/500], Loss: 0.0205
Station 10020, Epoch [200/500], Loss: 0.0365
Station 10020, Epoch [300/500], Loss: 0.0272
Station 10020, Epoch [400/500], Loss: 0.0243
Station 10020, Epoch [500/500], Loss: 0.0198




Station 10961, Epoch [100/500], Loss: 0.0957
Station 10961, Epoch [200/500], Loss: 0.0731
Station 10961, Epoch [300/500], Loss: 0.0765
Station 10961, Epoch [400/500], Loss: 0.0590
Station 10961, Epoch [500/500], Loss: 0.0211




Station 10742, Epoch [100/500], Loss: 0.0528
Station 10742, Epoch [200/500], Loss: 0.0225
Station 10742, Epoch [300/500], Loss: 0.0279
Station 10742, Epoch [400/500], Loss: 0.0195
Station 10742, Epoch [500/500], Loss: 0.0235




Station 10554, Epoch [100/500], Loss: 0.0581
Station 10554, Epoch [200/500], Loss: 0.0183
Station 10554, Epoch [300/500], Loss: 0.0346
Station 10554, Epoch [400/500], Loss: 0.0302
Station 10554, Epoch [500/500], Loss: 0.0209




Station 10385, Epoch [100/500], Loss: 0.0540
Station 10385, Epoch [200/500], Loss: 0.0325
Station 10385, Epoch [300/500], Loss: 0.0213
Station 10385, Epoch [400/500], Loss: 0.0216
Station 10385, Epoch [500/500], Loss: 0.0156




Station 10637, Epoch [100/500], Loss: 0.0403
Station 10637, Epoch [200/500], Loss: 0.0209
Station 10637, Epoch [300/500], Loss: 0.0180
Station 10637, Epoch [400/500], Loss: 0.0091
Station 10637, Epoch [500/500], Loss: 0.0138




Station D0217, Epoch [100/500], Loss: 0.0553
Station D0217, Epoch [200/500], Loss: 0.0354
Station D0217, Epoch [300/500], Loss: 0.0348
Station D0217, Epoch [400/500], Loss: 0.0271
Station D0217, Epoch [500/500], Loss: 0.0208




Station 10384, Epoch [100/500], Loss: 0.0273
Station 10384, Epoch [200/500], Loss: 0.0417
Station 10384, Epoch [300/500], Loss: 0.0194
Station 10384, Epoch [400/500], Loss: 0.0150
Station 10384, Epoch [500/500], Loss: 0.0204




Station D2947, Epoch [100/500], Loss: 0.0409
Station D2947, Epoch [200/500], Loss: 0.0261
Station D2947, Epoch [300/500], Loss: 0.0280
Station D2947, Epoch [400/500], Loss: 0.0182
Station D2947, Epoch [500/500], Loss: 0.0139




Station 10379, Epoch [100/500], Loss: 0.0480
Station 10379, Epoch [200/500], Loss: 0.0215
Station 10379, Epoch [300/500], Loss: 0.0316
Station 10379, Epoch [400/500], Loss: 0.0195
Station 10379, Epoch [500/500], Loss: 0.0108




Station 10184, Epoch [100/500], Loss: 0.0381
Station 10184, Epoch [200/500], Loss: 0.0255
Station 10184, Epoch [300/500], Loss: 0.0269
Station 10184, Epoch [400/500], Loss: 0.0274
Station 10184, Epoch [500/500], Loss: 0.0083




Station 10147, Epoch [100/500], Loss: 0.0383
Station 10147, Epoch [200/500], Loss: 0.0230
Station 10147, Epoch [300/500], Loss: 0.0286
Station 10147, Epoch [400/500], Loss: 0.0200
Station 10147, Epoch [500/500], Loss: 0.0167




Station 72270, Epoch [100/500], Loss: 0.0944
Station 72270, Epoch [200/500], Loss: 0.0225
Station 72270, Epoch [300/500], Loss: 0.0197
Station 72270, Epoch [400/500], Loss: 0.0192
Station 72270, Epoch [500/500], Loss: 0.0217




Station 10393, Epoch [100/500], Loss: 0.0330
Station 10393, Epoch [200/500], Loss: 0.0236
Station 10393, Epoch [300/500], Loss: 0.0175
Station 10393, Epoch [400/500], Loss: 0.0119
Station 10393, Epoch [500/500], Loss: 0.0214




Station 10542, Epoch [100/500], Loss: 0.0342
Station 10542, Epoch [200/500], Loss: 0.0399
Station 10542, Epoch [300/500], Loss: 0.0303
Station 10542, Epoch [400/500], Loss: 0.0186
Station 10542, Epoch [500/500], Loss: 0.0125




Station 71816, Epoch [100/500], Loss: 0.0353
Station 71816, Epoch [200/500], Loss: 0.0612
Station 71816, Epoch [300/500], Loss: 0.0248
Station 71816, Epoch [400/500], Loss: 0.0340
Station 71816, Epoch [500/500], Loss: 0.0292




Station 10224, Epoch [100/500], Loss: 0.0235
Station 10224, Epoch [200/500], Loss: 0.0240
Station 10224, Epoch [300/500], Loss: 0.0270
Station 10224, Epoch [400/500], Loss: 0.0216
Station 10224, Epoch [500/500], Loss: 0.0237




Station 72476, Epoch [100/500], Loss: 0.0483
Station 72476, Epoch [200/500], Loss: 0.0457
Station 72476, Epoch [300/500], Loss: 0.0226
Station 72476, Epoch [400/500], Loss: 0.0159
Station 72476, Epoch [500/500], Loss: 0.0136




Station 70261, Epoch [100/500], Loss: 0.0654
Station 70261, Epoch [200/500], Loss: 0.0487
Station 70261, Epoch [300/500], Loss: 0.0202
Station 70261, Epoch [400/500], Loss: 0.0401
Station 70261, Epoch [500/500], Loss: 0.0233




Station D4377, Epoch [100/500], Loss: 0.0479
Station D4377, Epoch [200/500], Loss: 0.0351
Station D4377, Epoch [300/500], Loss: 0.0175
Station D4377, Epoch [400/500], Loss: 0.0224
Station D4377, Epoch [500/500], Loss: 0.0141




Station 03377, Epoch [100/500], Loss: 0.0493
Station 03377, Epoch [200/500], Loss: 0.0227
Station 03377, Epoch [300/500], Loss: 0.0227
Station 03377, Epoch [400/500], Loss: 0.0165
Station 03377, Epoch [500/500], Loss: 0.0131




Station 10685, Epoch [100/500], Loss: 0.0454
Station 10685, Epoch [200/500], Loss: 0.0344
Station 10685, Epoch [300/500], Loss: 0.0272
Station 10685, Epoch [400/500], Loss: 0.0157
Station 10685, Epoch [500/500], Loss: 0.0053




Station 10875, Epoch [100/500], Loss: 0.0359
Station 10875, Epoch [200/500], Loss: 0.0426
Station 10875, Epoch [300/500], Loss: 0.0461
Station 10875, Epoch [400/500], Loss: 0.0176
Station 10875, Epoch [500/500], Loss: 0.0152




Station 10444, Epoch [100/500], Loss: 0.0648
Station 10444, Epoch [200/500], Loss: 0.0358
Station 10444, Epoch [300/500], Loss: 0.0184
Station 10444, Epoch [400/500], Loss: 0.0155
Station 10444, Epoch [500/500], Loss: 0.0285




Station 72202, Epoch [100/500], Loss: 0.0482
Station 72202, Epoch [200/500], Loss: 0.0475
Station 72202, Epoch [300/500], Loss: 0.0400
Station 72202, Epoch [400/500], Loss: 0.0375
Station 72202, Epoch [500/500], Loss: 0.0546




Station 10929, Epoch [100/500], Loss: 0.0505
Station 10929, Epoch [200/500], Loss: 0.0465
Station 10929, Epoch [300/500], Loss: 0.0487
Station 10929, Epoch [400/500], Loss: 0.0437
Station 10929, Epoch [500/500], Loss: 0.0197




Station 03969, Epoch [100/500], Loss: 0.0334
Station 03969, Epoch [200/500], Loss: 0.0272
Station 03969, Epoch [300/500], Loss: 0.0173
Station 03969, Epoch [400/500], Loss: 0.0237
Station 03969, Epoch [500/500], Loss: 0.0133




Station D0853, Epoch [100/500], Loss: 0.0491
Station D0853, Epoch [200/500], Loss: 0.0182
Station D0853, Epoch [300/500], Loss: 0.0303
Station D0853, Epoch [400/500], Loss: 0.0305
Station D0853, Epoch [500/500], Loss: 0.0355




Station 10526, Epoch [100/500], Loss: 0.0320
Station 10526, Epoch [200/500], Loss: 0.0310
Station 10526, Epoch [300/500], Loss: 0.0389
Station 10526, Epoch [400/500], Loss: 0.0174
Station 10526, Epoch [500/500], Loss: 0.0066




Station 72572, Epoch [100/500], Loss: 0.0432
Station 72572, Epoch [200/500], Loss: 0.0433
Station 72572, Epoch [300/500], Loss: 0.0426
Station 72572, Epoch [400/500], Loss: 0.0140
Station 72572, Epoch [500/500], Loss: 0.0138




Station 71906, Epoch [100/500], Loss: 0.1424
Station 71906, Epoch [200/500], Loss: 0.0385
Station 71906, Epoch [300/500], Loss: 0.0299
Station 71906, Epoch [400/500], Loss: 0.0311
Station 71906, Epoch [500/500], Loss: 0.0234




Station 72764, Epoch [100/500], Loss: 0.0969
Station 72764, Epoch [200/500], Loss: 0.0647
Station 72764, Epoch [300/500], Loss: 0.0274
Station 72764, Epoch [400/500], Loss: 0.0355
Station 72764, Epoch [500/500], Loss: 0.0098




Station 23933, Epoch [100/500], Loss: 0.0849
Station 23933, Epoch [200/500], Loss: 0.0467
Station 23933, Epoch [300/500], Loss: 0.0240
Station 23933, Epoch [400/500], Loss: 0.0326
Station 23933, Epoch [500/500], Loss: 0.0166




Station 25563, Epoch [100/500], Loss: 0.0921
Station 25563, Epoch [200/500], Loss: 0.0516
Station 25563, Epoch [300/500], Loss: 0.0265
Station 25563, Epoch [400/500], Loss: 0.0255
Station 25563, Epoch [500/500], Loss: 0.0181




Station D2700, Epoch [100/500], Loss: 0.0210
Station D2700, Epoch [200/500], Loss: 0.0164
Station D2700, Epoch [300/500], Loss: 0.0154
Station D2700, Epoch [400/500], Loss: 0.0236
Station D2700, Epoch [500/500], Loss: 0.0175




Station D1590, Epoch [100/500], Loss: 0.0890
Station D1590, Epoch [200/500], Loss: 0.0419
Station D1590, Epoch [300/500], Loss: 0.0138
Station D1590, Epoch [400/500], Loss: 0.0144
Station D1590, Epoch [500/500], Loss: 0.0266




Station 72365, Epoch [100/500], Loss: 0.0423
Station 72365, Epoch [200/500], Loss: 0.0424
Station 72365, Epoch [300/500], Loss: 0.0272
Station 72365, Epoch [400/500], Loss: 0.0360
Station 72365, Epoch [500/500], Loss: 0.0230




Station 06235, Epoch [100/500], Loss: 0.0708
Station 06235, Epoch [200/500], Loss: 0.0392
Station 06235, Epoch [300/500], Loss: 0.0495
Station 06235, Epoch [400/500], Loss: 0.0195
Station 06235, Epoch [500/500], Loss: 0.0170




Station 10655, Epoch [100/500], Loss: 0.0207
Station 10655, Epoch [200/500], Loss: 0.0283
Station 10655, Epoch [300/500], Loss: 0.0243
Station 10655, Epoch [400/500], Loss: 0.0244
Station 10655, Epoch [500/500], Loss: 0.0204




Station 72562, Epoch [100/500], Loss: 0.0975
Station 72562, Epoch [200/500], Loss: 0.0275
Station 72562, Epoch [300/500], Loss: 0.0377
Station 72562, Epoch [400/500], Loss: 0.0342
Station 72562, Epoch [500/500], Loss: 0.0313




Station D2932, Epoch [100/500], Loss: 0.0574
Station D2932, Epoch [200/500], Loss: 0.0198
Station D2932, Epoch [300/500], Loss: 0.0299
Station D2932, Epoch [400/500], Loss: 0.0185
Station D2932, Epoch [500/500], Loss: 0.0137




Station 10708, Epoch [100/500], Loss: 0.0483
Station 10708, Epoch [200/500], Loss: 0.0314
Station 10708, Epoch [300/500], Loss: 0.0314
Station 10708, Epoch [400/500], Loss: 0.0280
Station 10708, Epoch [500/500], Loss: 0.0077




Station 10091, Epoch [100/500], Loss: 0.0520
Station 10091, Epoch [200/500], Loss: 0.0258
Station 10091, Epoch [300/500], Loss: 0.0213
Station 10091, Epoch [400/500], Loss: 0.0140
Station 10091, Epoch [500/500], Loss: 0.0132




Station 07510, Epoch [100/500], Loss: 0.0377
Station 07510, Epoch [200/500], Loss: 0.0256
Station 07510, Epoch [300/500], Loss: 0.0312
Station 07510, Epoch [400/500], Loss: 0.0237
Station 07510, Epoch [500/500], Loss: 0.0264




Station 10291, Epoch [100/500], Loss: 0.0694
Station 10291, Epoch [200/500], Loss: 0.0493
Station 10291, Epoch [300/500], Loss: 0.0307
Station 10291, Epoch [400/500], Loss: 0.0323
Station 10291, Epoch [500/500], Loss: 0.0199




Station 72206, Epoch [100/500], Loss: 0.1175
Station 72206, Epoch [200/500], Loss: 0.0222
Station 72206, Epoch [300/500], Loss: 0.0468
Station 72206, Epoch [400/500], Loss: 0.0387
Station 72206, Epoch [500/500], Loss: 0.0602




Station 71727, Epoch [100/500], Loss: 0.0707
Station 71727, Epoch [200/500], Loss: 0.0212
Station 71727, Epoch [300/500], Loss: 0.0275
Station 71727, Epoch [400/500], Loss: 0.0268
Station 71727, Epoch [500/500], Loss: 0.0217




Station D5440, Epoch [100/500], Loss: 0.0460
Station D5440, Epoch [200/500], Loss: 0.0409
Station D5440, Epoch [300/500], Loss: 0.0202
Station D5440, Epoch [400/500], Loss: 0.0329
Station D5440, Epoch [500/500], Loss: 0.0222




Station D1197, Epoch [100/500], Loss: 0.0383
Station D1197, Epoch [200/500], Loss: 0.0700
Station D1197, Epoch [300/500], Loss: 0.0292
Station D1197, Epoch [400/500], Loss: 0.0212
Station D1197, Epoch [500/500], Loss: 0.0113




Station 10865, Epoch [100/500], Loss: 0.0393
Station 10865, Epoch [200/500], Loss: 0.0361
Station 10865, Epoch [300/500], Loss: 0.0313
Station 10865, Epoch [400/500], Loss: 0.0179
Station 10865, Epoch [500/500], Loss: 0.0276




Station 01028, Epoch [100/500], Loss: 0.0560
Station 01028, Epoch [200/500], Loss: 0.0464
Station 01028, Epoch [300/500], Loss: 0.0463
Station 01028, Epoch [400/500], Loss: 0.0210
Station 01028, Epoch [500/500], Loss: 0.0137




Station D2110, Epoch [100/500], Loss: 0.0455
Station D2110, Epoch [200/500], Loss: 0.0272
Station D2110, Epoch [300/500], Loss: 0.0297
Station D2110, Epoch [400/500], Loss: 0.0229
Station D2110, Epoch [500/500], Loss: 0.0154




Station 10469, Epoch [100/500], Loss: 0.0781
Station 10469, Epoch [200/500], Loss: 0.0237
Station 10469, Epoch [300/500], Loss: 0.0093
Station 10469, Epoch [400/500], Loss: 0.0133
Station 10469, Epoch [500/500], Loss: 0.0248




Station 10496, Epoch [100/500], Loss: 0.0307
Station 10496, Epoch [200/500], Loss: 0.0290
Station 10496, Epoch [300/500], Loss: 0.0188
Station 10496, Epoch [400/500], Loss: 0.0290
Station 10496, Epoch [500/500], Loss: 0.0128




Station 72429, Epoch [100/500], Loss: 0.0584
Station 72429, Epoch [200/500], Loss: 0.0338
Station 72429, Epoch [300/500], Loss: 0.0193
Station 72429, Epoch [400/500], Loss: 0.0265
Station 72429, Epoch [500/500], Loss: 0.0172




Station 03005, Epoch [100/500], Loss: 0.0457
Station 03005, Epoch [200/500], Loss: 0.0433
Station 03005, Epoch [300/500], Loss: 0.0337
Station 03005, Epoch [400/500], Loss: 0.0110
Station 03005, Epoch [500/500], Loss: 0.0153




Station 72617, Epoch [100/500], Loss: 0.0649
Station 72617, Epoch [200/500], Loss: 0.0498
Station 72617, Epoch [300/500], Loss: 0.0449
Station 72617, Epoch [400/500], Loss: 0.0164
Station 72617, Epoch [500/500], Loss: 0.0163


In [9]:
# Compute overall RMSE using norm data
columns = ['station'] + [f'predicted_tavg_{i}' for i in range(1, 13)] + [f'predicted_prcp_{i}' for i in range(1, 13)] + [f'actual_tavg_{i}' for i in range(1, 13)] + [f'actual_prcp_{i}' for i in range(1, 13)]
df_results_normalised = pd.DataFrame(results_normalised, columns=columns)
df_results_normalised.to_csv('predictions_vs_actual_norm.csv', index=False)

predicted_vals = df_results_normalised[[f'predicted_tavg_{i}' for i in range(1, 13)] + [f'predicted_prcp_{i}' for i in range(1, 13)]].values
actual_vals = df_results_normalised[[f'actual_tavg_{i}' for i in range(1, 13)] + [f'actual_prcp_{i}' for i in range(1, 13)]].values

mse = ((predicted_vals - actual_vals) ** 2).mean()
rmse = math.sqrt(mse)

print(f"Overall RMSE: {rmse:.4f}")

mae = abs(predicted_vals - actual_vals).mean()
print(f"Overall MAE: {mae:.4f}")

Overall RMSE: 0.7428
Overall MAE: 0.4857


In [10]:
def koppen(monthlyTemperature, monthlyPrecipitation, hemisphere):
    monthlyTemperatureSorted = sorted(monthlyTemperature)
    monthlyPrecipitationSorted = sorted(monthlyTemperature)
    totalPrecipitation = sum(monthlyPrecipitation)
    precipitationIntermediate = 100 - totalPrecipitation / 25
    # E Category
    if (monthlyTemperatureSorted[11] < 0):
        return 'EF'
    if (monthlyTemperatureSorted[11] < 10 and monthlyTemperatureSorted[11] >= 0):
        return 'ET'
    # A Category
    if (monthlyTemperatureSorted[0] >= 18):
        if (monthlyPrecipitationSorted[0] >= 60):
            return 'Af'
        if (monthlyPrecipitationSorted[0] < 60
            and monthlyPrecipitationSorted[0] >= precipitationIntermediate):
            return 'Am'
        # As / Aw
        driestMonth = monthlyPrecipitation.index(min(monthlyPrecipitation))
        # April - September: North Hemisphere
        if ('N' in hemisphere):
            if (driestMonth >= 3 and driestMonth <= 8):
                return 'As'
            else:
                return 'Aw'
        if ('S' in hemisphere):
            if (driestMonth >= 3 and driestMonth <= 8):
                return 'Aw'
            else:
                return 'As'
    # K Value
    # summerPrecipitation = Precipitation of April - September
    summerPrecipitation = sum(monthlyPrecipitation[3:9])
    if ('S' in hemisphere):
        summerPrecipitation = totalPrecipitation - summerPrecipitation
    K = sum(monthlyTemperature) / 12 * 20
    if (summerPrecipitation >= totalPrecipitation * 0.7):
        K = K + 280
    elif (summerPrecipitation >= totalPrecipitation * 0.3):
        K = K + 140
    # B Category
    # BW
    if (totalPrecipitation < K * 0.5):
        if (sum(monthlyTemperature) >= 216 and monthlyTemperatureSorted[0] < 18):
            return 'BWk'
        if (sum(monthlyTemperature) < 216):
            return 'BWk'
    # BS
    if (totalPrecipitation >= K * 0.5 and totalPrecipitation < K):
        if (sum(monthlyTemperature) >= 216 and monthlyTemperatureSorted[0] < 18):
            return 'BSk'
        if (sum(monthlyTemperature) < 216):
            return 'BSk'
    # C,D Category
    # winter / summer Humidest / Driest Precipitation
    if ('N' in hemisphere):
        winterHumidestPrecipitation = max(max(monthlyPrecipitation[0:3]), max(monthlyPrecipitation[9:12]))
        winterDriestPrecipitation = min(min(monthlyPrecipitation[0:3]), min(monthlyPrecipitation[9:12]))
        summerHumidestPrecipitation = max(monthlyPrecipitation[3:9])
        summerDriestPrecipitation = min(monthlyPrecipitation[3:9])
    if ('S' in hemisphere):
        winterHumidestPrecipitation = max(monthlyPrecipitation[3:9])
        winterDriestPrecipitation = min(monthlyPrecipitation[3:9])
        summerHumidestPrecipitation = max(max(monthlyPrecipitation[0:3]), max(monthlyPrecipitation[9:12]))
        summerDriestPrecipitation = min(min(monthlyPrecipitation[0:3]), min(monthlyPrecipitation[9:12]))
    # C / D
    if (totalPrecipitation >= K and monthlyTemperatureSorted[11] >= 10):
        if (monthlyTemperatureSorted[0] >= 0 and monthlyTemperatureSorted[0] < 18):
            result = 'C'
        if (monthlyTemperatureSorted[0] < 0):
            result = 'D'
        # s / w / f
        if (winterHumidestPrecipitation >= 3 * summerDriestPrecipitation):
            result = result + 's'
        elif (summerHumidestPrecipitation >= 10 * winterDriestPrecipitation):
            result = result + 's'
        else:
            result = result + 's'
        # a / b / c
        if (monthlyTemperatureSorted[0] < -38 and monthlyTemperatureSorted[8] < 10):
            return result + 'd'
        elif (monthlyTemperatureSorted[11] >= 22):
            return result + 'a'
        # at least 4 month temperature >= 10 Celsius
        elif (monthlyTemperatureSorted[8] >= 10):
            return result + 'a'
        else:
            return result + 'a'
    return 'undefined'

In [11]:
import pandas as pd

# Load the data
data = pd.read_csv("predictions_vs_actual.csv")

# Create an empty DataFrame to store the results
results = pd.DataFrame(columns=["station", "actual_climate", "predicted_climate"])

# For each station, compute the actual and predicted climate types
for station in data["station"].unique():
    # Get the data for this station
    station_data = data[data["station"] == station]

    # Get the actual monthly average temperatures and precipitations
    actual_monthly_temperature = station_data[["actual_tavg_" + str(i) for i in range(1, 13)]].values.flatten().tolist()
    actual_monthly_precipitation = station_data[["actual_prcp_" + str(i) for i in range(1, 13)]].values.flatten().tolist()

    # Get the predicted monthly average temperatures and precipitations
    predicted_monthly_temperature = station_data[["predicted_tavg_" + str(i) for i in range(1, 13)]].values.flatten().tolist()
    predicted_monthly_precipitation = station_data[["predicted_prcp_" + str(i) for i in range(1, 13)]].values.flatten().tolist()

    # Calculate the climate type
    actual_climate = koppen(actual_monthly_temperature, actual_monthly_precipitation, "N")  # Assuming all stations are in Northern hemisphere
    predicted_climate = koppen(predicted_monthly_temperature, predicted_monthly_precipitation, "N")

    # Save the results
    results = results.append({"station": station, "actual_climate": actual_climate, "predicted_climate": predicted_climate}, ignore_index=True)


  results = results.append({"station": station, "actual_climate": actual_climate, "predicted_climate": predicted_climate}, ignore_index=True)
  results = results.append({"station": station, "actual_climate": actual_climate, "predicted_climate": predicted_climate}, ignore_index=True)
  results = results.append({"station": station, "actual_climate": actual_climate, "predicted_climate": predicted_climate}, ignore_index=True)
  results = results.append({"station": station, "actual_climate": actual_climate, "predicted_climate": predicted_climate}, ignore_index=True)
  results = results.append({"station": station, "actual_climate": actual_climate, "predicted_climate": predicted_climate}, ignore_index=True)
  results = results.append({"station": station, "actual_climate": actual_climate, "predicted_climate": predicted_climate}, ignore_index=True)
  results = results.append({"station": station, "actual_climate": actual_climate, "predicted_climate": predicted_climate}, ignore_index=True)
  resu

In [12]:
results

Unnamed: 0,station,actual_climate,predicted_climate
0,02935,Dsa,Dsa
1,01025,Dsa,Dsa
2,D4592,Dsa,Csa
3,11035,Csa,BSk
4,D2444,Csa,Csa
...,...,...,...
129,10469,Csa,Csa
130,10496,Csa,BSk
131,72429,Dsa,Dsa
132,03005,Csa,Csa


In [13]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Getting true labels and predictions
y_true = results["actual_climate"].tolist()
y_pred = results["predicted_climate"].tolist()

# Calculate metrics
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Precision: 0.8070
Recall: 0.8431
F1 Score: 0.8161
