# Climate data

In [16]:
import pandas as pd
import os

train_size = 100 # 1000
test_size = 50 # 10
horizon = 10 # 10

path = os.path.join('results','climate_data',f'train_{train_size}_test_{test_size}_horizon_{horizon}_results.csv')

df = pd.read_csv(path)

name_of_file = os.path.basename(path).split('.')[0]
name_of_file = name_of_file + "_climate"
df

Unnamed: 0,DateTime,Predicted,Actual,RollNumber
0,1878-06-02,0.050833,0.227900,1
1,1878-07-02,0.052786,0.187174,1
2,1878-08-01,0.053267,0.165230,1
3,1878-09-01,0.052982,0.153136,1
4,1878-10-01,0.052308,0.170925,1
...,...,...,...,...
405,1882-03-02,0.042736,0.039102,41
406,1882-04-02,0.042082,0.082189,41
407,1882-05-02,0.041534,-0.024801,41
408,1882-06-02,0.041066,-0.153214,41


In [17]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

# lists to store metrics
maes = []
rmses = []
mapes = []

# group by 'RollingOrigin' and calculate metrics for each group
for _, group in df.groupby('RollNumber'):
    y_true = group['Actual']
    y_pred = group['Predicted']
    
    # Calculate metrics
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = mean_absolute_percentage_error(y_true, y_pred)
    
    # Append metrics to lists
    maes.append(mae)
    rmses.append(rmse)
    mapes.append(mape)

# Calculate the mean and standard deviation of the metrics
mae_mean = np.mean(maes)
mae_std = np.std(maes)
rmse_mean = np.mean(rmses)
rmse_std = np.std(rmses)
mape_mean = np.mean(mapes)
mape_std = np.std(mapes)

# Create a DataFrame to store the results
results = pd.DataFrame({
    'Source': [name_of_file],
    'MAE': [mae_mean],
    'MAE_std': [mae_std],
    'RMSE': [rmse_mean],
    'RMSE_std': [rmse_std],
    'MAPE': [mape_mean],
    'MAPE_std': [mape_std]
})
print(results)

# save the results
results.to_csv(os.path.join('error_metrics',f'{name_of_file}.csv'), index=False)

                                         Source       MAE   MAE_std      RMSE  \
0  train_100_test_50_horizon_10_results_climate  0.115589  0.025586  0.132742   

   RMSE_std      MAPE  MAPE_std  
0  0.028978  2.540228   1.90093  


# weather

In [24]:
import pandas as pd
import os

train_size = 100 # 1000
test_size = 50 # 10
horizon = 10 # 10

path = os.path.join('results','weather_data',f'train_{train_size}_test_{test_size}_horizon_{horizon}_results.csv')

df = pd.read_csv(path)

# get name of the file without the .csv
name_of_file = os.path.basename(path).split('.')[0]
name_of_file = name_of_file + "_weather"
df

Unnamed: 0,DateTime,Predicted,Actual,RollNumber
0,2024-03-05 04:00:00,4.126584,1.7,1
1,2024-03-05 05:00:00,4.082923,1.5,1
2,2024-03-05 06:00:00,4.081631,1.5,1
3,2024-03-05 07:00:00,4.104399,1.3,1
4,2024-03-05 08:00:00,4.140087,1.3,1
...,...,...,...,...
405,2024-03-07 01:00:00,3.998065,-0.3,41
406,2024-03-07 02:00:00,4.056930,-0.1,41
407,2024-03-07 03:00:00,4.113675,-1.4,41
408,2024-03-07 04:00:00,4.167965,-1.8,41


In [25]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

# lists to store metrics
maes = []
rmses = []
mapes = []

# Group by 'RollingOrigin' and calculate metrics for each group
for _, group in df.groupby('RollNumber'):
    y_true = group['Actual']
    y_pred = group['Predicted']
    
    # Calculate metrics
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = mean_absolute_percentage_error(y_true, y_pred)
    
    # Append metrics to lists
    maes.append(mae)
    rmses.append(rmse)
    mapes.append(mape)

# Calculate the mean and standard deviation of the metrics
mae_mean = np.mean(maes)
mae_std = np.std(maes)
rmse_mean = np.mean(rmses)
rmse_std = np.std(rmses)
mape_mean = np.mean(mapes)
mape_std = np.std(mapes)

# Create a DataFrame to store the results
results = pd.DataFrame({
    'Source': [name_of_file],
    'MAE': [mae_mean],
    'MAE_std': [mae_std],
    'RMSE': [rmse_mean],
    'RMSE_std': [rmse_std],
    'MAPE': [mape_mean],
    'MAPE_std': [mape_std]
})
print(results)

# save the results
results.to_csv(os.path.join('error_metrics',f'{name_of_file}.csv'), index=False)

                                         Source       MAE   MAE_std      RMSE  \
0  train_100_test_50_horizon_10_results_weather  1.435407  0.892404  1.658501   

   RMSE_std      MAPE  MAPE_std  
0  0.981865  2.660205  2.977935  


# 1

In [3]:
# load all .csv files in the error_metrics folder
import glob
import pandas as pd

# get all the .csv files in the error_metrics folder
files = glob.glob('error_metrics/*.csv')

dfs = []
for file in files:
    df = pd.read_csv(file)
    dfs.append(df)

results = pd.concat(dfs, ignore_index=True)

# save the results
results.to_csv('error_metrics/all_error_metrics.csv', index=False)