# Time series forecasting challenge evaluation notebook
This notebook explains the evaluation process for the time series forecasting challenge. 

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Specify directories to read results from
group_dict = {
    'group1': '../results/2024_10_22_20_45_08',
    'group2': '../results/2024_10_22_20_53_45',
    'group3': '../results/2024_10_22_21_00_23'
}

In [3]:
# Show an example predictions file
df_predictions_example = pd.read_csv(group_dict['group1']+'/df_predictions.csv')
df_predictions_example

Unnamed: 0,time,0_requests,0_cpu,0_pods,0_cost,0_memory,1_requests,1_cpu,1_pods,1_cost,...,46_requests,46_cpu,46_pods,46_cost,46_memory,47_requests,47_cpu,47_pods,47_cost,47_memory
0,2073600,-0.569948,0.307525,-1.068566,0.156908,-0.383051,-0.620558,-0.493449,-0.135562,-0.043065,...,-0.361698,-0.033384,0.046268,0.338337,-0.336655,0.675356,-0.558039,0.980047,0.053053,-0.066575
1,2073660,-0.571036,0.307791,-1.067026,0.156898,-0.383195,-0.618849,-0.977991,-0.134642,-0.043206,...,-0.162882,-0.028468,0.118167,0.136422,-0.336594,0.673695,-0.551886,0.970694,0.052150,-0.066861
2,2073720,-0.572125,0.308054,-1.065478,0.156887,-0.383339,-0.617120,-0.673999,-0.133724,-0.043347,...,-0.161230,-0.023594,0.080355,0.075170,-0.336533,0.670912,-0.545884,0.960321,0.051069,-0.067147
3,2073780,-0.573216,0.308313,-1.063922,0.156876,-0.383482,-0.615372,-0.677903,-0.132809,-0.043488,...,-0.361176,-0.018760,0.060336,0.191304,-0.336472,0.667018,-0.540038,0.948945,0.049807,-0.067434
4,2073840,-0.574308,0.308568,-1.062359,0.156865,-0.383626,-0.613605,-0.972148,-0.131897,-0.043628,...,-0.522570,-0.013968,0.093630,0.015858,-0.336411,0.662026,-0.534353,0.936584,0.048363,-0.067720
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10075,2678100,-5.343069,0.088458,-1.820692,0.382736,-0.819162,-1.542976,-0.691043,-1.079461,0.025739,...,0.392584,-0.187300,0.091935,0.185184,-0.425671,0.427602,-0.764479,0.894399,-0.103221,-2.898250
10076,2678160,-5.341436,0.088491,-1.821044,0.382748,-0.819194,-1.540460,-1.175469,-1.077574,0.025827,...,-0.191541,-0.181683,0.091389,0.142549,-0.425525,0.431309,-0.757860,0.890014,-0.103321,-2.898549
10077,2678220,-5.339798,0.088525,-1.821390,0.382760,-0.819227,-1.537924,-0.871362,-1.075684,0.025914,...,-0.973275,-0.176106,0.079852,0.052476,-0.425379,0.433862,-0.751365,0.884550,-0.103588,-2.898848
10078,2678280,-5.338155,0.088557,-1.821733,0.382772,-0.819259,-1.535368,-0.875150,-1.073792,0.026002,...,0.392245,-0.170569,0.010458,0.185369,-0.425234,0.435263,-0.745001,0.878018,-0.104025,-2.899148


In [4]:
# This cell computes the MAE for each time series from each group

# Placeholder for all results
all_results = []

# Loop through each group
for group in group_dict.keys():
    rt = group_dict[group]

    # Load predictions and test data for the current group
    rt_preds = f'{rt}/df_predictions.csv'

    # For now, just read directly from open data and take the last part of if according to length of predictions
    rt_test = '../data/data.csv'

    df_preds = pd.read_csv(rt_preds)
    print('Prediction timestamps min, max:')
    print(df_preds['time'].min(), df_preds['time'].max())
    time_preds = df_preds.pop('time')

    df_test = pd.read_csv(rt_test)
    df_test = df_test.iloc[-len(df_preds):]
    print('Test timestamps min, max:')
    print(df_test['time'].min(), df_test['time'].max())
    time_test = df_test.pop('time')

    # Check that time columns are the same so we are comparing the correct timestamps
    if np.all(time_preds.values==time_test.values):
        print(f'Timestamps OK')
    else:
        print('Time values not equal!')
        raise ValueError

    print(f'Computing results for df_test.shape={df_test.shape}, which is {df_test.shape[0]/1440} days')

    # Lists to store MAE for each column in current group
    mae_list = []

    # Calculate MAE for each time series in test set
    for col in tqdm(df_test.columns, desc=f'Processing {group}'):
        test_arr = df_test[col]
        pred_arr = df_preds[col]

        mae = mean_absolute_error(test_arr, pred_arr)

        mae_list.append(mae)

    # Store results for this group with column names
    group_results = pd.DataFrame({
        'name': list(df_test.columns),
        f'mae_{group}': mae_list
    })

    # Append to list of all results
    all_results.append(group_results)

# Concatenate results for all groups into one DataFrame, joining on the 'name' column
df_results = pd.concat(all_results, axis=1).loc[:, ~pd.concat(all_results, axis=1).columns.duplicated()]
df_results

Prediction timestamps min, max:
2073600 2678340
Test timestamps min, max:
2073600 2678340
Timestamps OK
Computing results for df_test.shape=(10080, 240), which is 7.0 days


Processing group1: 100%|██████████| 240/240 [00:00<00:00, 1935.82it/s]


Prediction timestamps min, max:
2073600 2678340
Test timestamps min, max:
2073600 2678340
Timestamps OK
Computing results for df_test.shape=(10080, 240), which is 7.0 days


Processing group2: 100%|██████████| 240/240 [00:00<00:00, 1896.46it/s]


Prediction timestamps min, max:
2073600 2678340
Test timestamps min, max:
2073600 2678340
Timestamps OK
Computing results for df_test.shape=(10080, 240), which is 7.0 days


Processing group3: 100%|██████████| 240/240 [00:00<00:00, 1772.14it/s]


Unnamed: 0,name,mae_group1,mae_group2,mae_group3
0,0_requests,2.661987,0.393483,3.309371
1,0_cpu,0.571495,0.570985,0.746590
2,0_pods,1.551293,1.314947,1.765719
3,0_cost,0.231298,0.083981,0.128984
4,0_memory,1.516709,0.957484,0.940815
...,...,...,...,...
235,47_requests,0.567523,0.526054,0.787461
236,47_cpu,0.741789,0.842104,0.638259
237,47_pods,0.525094,0.519007,0.699135
238,47_cost,0.425862,0.407568,0.473375


In [5]:
# Add ranks for each group by MAE
group_cols = [item for item in df_results.columns if 'group' in item]
df_ranks = df_results[group_cols].rank(axis=1, ascending=True).astype(int)
old_colnames = list(df_ranks.columns)
new_colnames = [item+'_rank' for item in old_colnames]
new_colnames_dict = dict(zip(old_colnames, new_colnames))
df_ranks = df_ranks.rename(columns=new_colnames_dict)
df_results_with_rank = pd.concat([df_results, df_ranks], axis=1)
df_results_with_rank

Unnamed: 0,name,mae_group1,mae_group2,mae_group3,mae_group1_rank,mae_group2_rank,mae_group3_rank
0,0_requests,2.661987,0.393483,3.309371,2,1,3
1,0_cpu,0.571495,0.570985,0.746590,2,1,3
2,0_pods,1.551293,1.314947,1.765719,2,1,3
3,0_cost,0.231298,0.083981,0.128984,3,1,2
4,0_memory,1.516709,0.957484,0.940815,3,2,1
...,...,...,...,...,...,...,...
235,47_requests,0.567523,0.526054,0.787461,2,1,3
236,47_cpu,0.741789,0.842104,0.638259,2,3,1
237,47_pods,0.525094,0.519007,0.699135,2,1,3
238,47_cost,0.425862,0.407568,0.473375,2,1,3


In [6]:
# Calculate mean rank for each group
mean_ranks = {
    'group1': df_results_with_rank['mae_group1_rank'].mean(),
    'group2': df_results_with_rank['mae_group2_rank'].mean(),
    'group3': df_results_with_rank['mae_group3_rank'].mean()
}

# Create a DataFrame for the mean ranks and sort it by the highest rank
df_mean_ranks = pd.DataFrame(list(mean_ranks.items()), columns=['group', 'mean_rank'])
df_mean_ranks = df_mean_ranks.sort_values(by='mean_rank')
df_mean_ranks

Unnamed: 0,group,mean_rank
1,group2,1.65
0,group1,1.883333
2,group3,2.466667


In [7]:
# Print results
for i in range(len(df_mean_ranks)):
    group_i = df_mean_ranks.iloc[i]['group']
    mean_rank_i = df_mean_ranks.iloc[i]['mean_rank']
    print(f'Place {i+1}: {group_i} with mean rank {mean_rank_i:.3f}')

Place 1: group2 with mean rank 1.650
Place 2: group1 with mean rank 1.883
Place 3: group3 with mean rank 2.467
