In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [5]:
# Function to calculate overall RMSE and top x% RMSE
def calculate_rmse(df, top_percent=10, start_row=None, end_row=None):
    # Filter the rows if a range is provided
    if start_row is not None and end_row is not None:
        df = df.iloc[start_row:end_row]

    # Remove rows with NaN values
    df = df.dropna(subset=['Predicted', 'Actual', 'Raw'])

    # Calculate overall RMSE
    overall_rmse = np.sqrt(mean_squared_error(df['Predicted'], df['Raw']))

    # Calculate RMSE for the top x% of the highest 'Raw' data points
    top_percent_threshold = np.percentile(df['Raw'], 100-top_percent) 
    print(f"Top {top_percent}% threshold: {top_percent_threshold}")
    
    top_percent_data = df[df['Raw'] >= top_percent_threshold]
    top_rmse = np.sqrt(mean_squared_error(top_percent_data['Predicted'], top_percent_data['Raw']))
    
    return overall_rmse, top_rmse

In [6]:
files = ["l3-cutoff-005", "l3-cutoff-01", "l3-cutoff-02", "l3-cutoff-03", "l3-cutoff-04", "l3-cutoff-05"]
top_percent = 20

# Load the CSV file
for file in files:
    file_path = f'../../source/l3_prediction_dataset/butterworth/{file}.csv'
    df = pd.read_csv(file_path)

    overall_rmse, top_rmse = calculate_rmse(df, top_percent, start_row=None, end_row=None)

    print(f"Overall RMSE for {file}: {overall_rmse}")
    print(f"Top {top_percent}% RMSE for {file}: {top_rmse}\n")

Top 20% threshold: 0.22240000000000001
Overall RMSE for l3-cutoff-005: 0.05313209438809719
Top 20% RMSE for l3-cutoff-005: 0.07562412234376661

Top 20% threshold: 0.224
Overall RMSE for l3-cutoff-01: 0.045828484298954676
Top 20% RMSE for l3-cutoff-01: 0.07273187362399709

Top 20% threshold: 0.24080000000000001
Overall RMSE for l3-cutoff-02: 0.050754842387351146
Top 20% RMSE for l3-cutoff-02: 0.06650885120046622

Top 20% threshold: 0.22240000000000001
Overall RMSE for l3-cutoff-03: 0.0538750874853876
Top 20% RMSE for l3-cutoff-03: 0.07784078560900304

Top 20% threshold: 0.22240000000000001
Overall RMSE for l3-cutoff-04: 0.057998510173104374
Top 20% RMSE for l3-cutoff-04: 0.09212075121423789

Top 20% threshold: 0.22240000000000001
Overall RMSE for l3-cutoff-05: 0.05951853410729308
Top 20% RMSE for l3-cutoff-05: 0.09147402434572985

