In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [8]:
# Function to calculate overall RMSE and top x% RMSE
def calculate_rmse(df, top_percent=10, start_row=None, end_row=None, override_threshold=None):
    # Filter the rows if a range is provided
    if start_row is not None and end_row is not None:
        df = df.iloc[start_row:end_row]

    # Remove rows with NaN values
    df = df.dropna(subset=['Predicted', 'Actual', 'Raw'])

    # Calculate overall RMSE
    overall_rmse = np.sqrt(mean_squared_error(df['Predicted'], df['Raw']))

    # Calculate RMSE for the top x% of the highest 'Raw' data points
    top_percent_threshold = np.percentile(df['Raw'], 100-top_percent) if override_threshold is None else override_threshold 
    print(f"Threshold: {top_percent_threshold}")
    
    top_percent_data = df[df['Raw'] >= top_percent_threshold]
    top_rmse = np.sqrt(mean_squared_error(top_percent_data['Predicted'], top_percent_data['Raw']))
    
    return overall_rmse, top_rmse

In [9]:
files = ["l3-cutoff-005", "l3-cutoff-01", "l3-cutoff-02", "l3-cutoff-03", "l3-cutoff-04", "l3-cutoff-05"]
top_percent = 0
override_threshold = 0.25

for file in files:
    file_path = f'../../source/l3_prediction_dataset/butterworth/{file}.csv'
    df = pd.read_csv(file_path)

    overall_rmse, top_rmse = calculate_rmse(df, top_percent, start_row=None, end_row=None, override_threshold=override_threshold)

    print(f"Overall RMSE for {file}: {overall_rmse}")
    
    if override_threshold is not None:
        print(f"RMSE for {file} with threshold {override_threshold}: {top_rmse}\n")
    else:
        print(f"Top {top_percent}% RMSE for {file}: {top_rmse}\n")

Threshold: 0.25
Overall RMSE for l3-cutoff-005: 0.05313209438809719
Top 0% RMSE for l3-cutoff-005: 0.08229585914596002

Threshold: 0.25
Overall RMSE for l3-cutoff-01: 0.045828484298954676
Top 0% RMSE for l3-cutoff-01: 0.07912585004180664

Threshold: 0.25
Overall RMSE for l3-cutoff-02: 0.050754842387351146
Top 0% RMSE for l3-cutoff-02: 0.06601860916877725

Threshold: 0.25
Overall RMSE for l3-cutoff-03: 0.0538750874853876
Top 0% RMSE for l3-cutoff-03: 0.0825766942173343

Threshold: 0.25
Overall RMSE for l3-cutoff-04: 0.057998510173104374
Top 0% RMSE for l3-cutoff-04: 0.10536812105179869

Threshold: 0.25
Overall RMSE for l3-cutoff-05: 0.05951853410729308
Top 0% RMSE for l3-cutoff-05: 0.10636169182504353

