<a href="https://colab.research.google.com/github/saadoonhammad/IEEECOINS_Data_Imputation/blob/main/IEEE_COINS_Linear_Spline_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install -U kaleido

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import root_mean_squared_error, mean_absolute_percentage_error, mean_absolute_error
import plotly.graph_objects as go

input_folder_path = '/path/to/your/data'
output_folder_path = '/path/to/your/data/results'
output_folder_csv = '/path/to/your/data/csvs'

file_patterns = ['c01m045e01', 'c05m124e01', 'c05m105e08']
gap_sizes = [150, 300, 400, 600]
start_gap = 500

os.makedirs(output_folder_path, exist_ok=True)

linear_results = []
spline_results = []

for file_name in os.listdir(input_folder_path):
    if any(pattern in file_name for pattern in file_patterns) and file_name.endswith('.csv'):
        file_path = os.path.join(input_folder_path, file_name)

        if '_2022' in file_name:
            year = '2022'
        elif '_2023' in file_name:
            year = '2023'
        else:
            year = '2021'

        data = pd.read_csv(file_path)
        data['timestamp'] = pd.to_datetime(data['timestamp'])
        data = data[['timestamp', 'temp_value_imp']]
        data['timestamp'] = data['timestamp'].apply(lambda x: x.replace(year=int(year)))


        test_df_original = data[(data['timestamp'] >= f'{year}-09-01') & (data['timestamp'] <= f'{year}-09-30')].reset_index(drop=True)

        for gap_size in gap_sizes:
            test_df = test_df_original.copy()
            test_df.iloc[start_gap:start_gap + gap_size, test_df.columns.get_loc('temp_value_imp')] = np.nan

            test_interp_linear = test_df.copy()
            test_interp_spline = test_df.copy()


            test_interp_linear['temp_value_imp'] = test_interp_linear['temp_value_imp'].interpolate(method='linear')
            test_interp_spline['temp_value_imp'] = test_interp_spline['temp_value_imp'].interpolate(method='spline', order=3)

            for method_name, df_interp in zip(['Linear', 'Spline'], [test_interp_linear, test_interp_spline]):
                imputed_values = df_interp.loc[start_gap:start_gap + gap_size - 1, 'temp_value_imp']
                original_values = test_df_original.loc[start_gap:start_gap + gap_size - 1, 'temp_value_imp']
                missing_data = test_df.iloc[start_gap:start_gap + gap_size]


                rmse = root_mean_squared_error(original_values, imputed_values)
                mape = mean_absolute_percentage_error(original_values, imputed_values)
                mae = mean_absolute_error(original_values, imputed_values)

                # Save imputed vs original values for future plotting
                result_df = pd.DataFrame({
                    'timestamp': missing_data['timestamp'].values,
                    'original_value': original_values.values,
                    'imputed_value': imputed_values.values
                })

                result_filename = f"{file_name.split('.')[0]}_gap{gap_size}_{method_name.lower()}_imputed.csv"
                result_path = os.path.join(output_folder_csv, result_filename)
                result_df.to_csv(result_path, index=False)

                row = {
                    'Method': method_name,
                    'File Name': file_name,
                    'Gap Size': gap_size,
                    'RMSE': rmse,
                    'MAPE': mape,
                    'MAE': mae
                }
                if method_name == 'Linear':
                    linear_results.append(row)
                else:
                    spline_results.append(row)

                fig = go.Figure()
                fig.add_trace(go.Scatter(
                    x=test_df_original['timestamp'],
                    y=test_df_original['temp_value_imp'],
                    mode='lines',
                    name='Original (Ground Truth)',
                    line=dict(color='blue')
                ))
                fig.add_trace(go.Scatter(
                    x=missing_data['timestamp'],
                    y=imputed_values.values,
                    mode='lines',
                    name='Imputed Region',
                    line=dict(color='red', width=2)
                ))

                fig.add_vrect(
                    x0=missing_data['timestamp'].iloc[0],
                    x1=missing_data['timestamp'].iloc[-1],
                    fillcolor="gray",
                    opacity=0.2,
                    line_width=0,
                    annotation_text="Missing Block",
                    annotation_position="top left"
                )
                fig.update_layout(
                    title=f'{method_name} Interpolation | {file_name} | Gap Size: {gap_size} values',
                    width=1000,
                    height=800,
                    xaxis_title="Timestamp",
                    yaxis_title="Temperature",
                    legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99)
                )
                plot_file_name = f"{file_name.split('.')[0]}_gap{gap_size}_{method_name.lower()}.png"
                fig.write_image(os.path.join(output_folder_path, plot_file_name), scale=2)

linear_df = pd.DataFrame(linear_results)
spline_df = pd.DataFrame(spline_results)

linear_csv_path = os.path.join(output_folder_path, 'linear_interpolation_results.csv')
spline_csv_path = os.path.join(output_folder_path, 'spline_interpolation_results.csv')

linear_df.to_csv(linear_csv_path, index=False)
spline_df.to_csv(spline_csv_path, index=False)

print("\n Linear & Spline interpolation complete. CSVs and plots saved.")


✅ Linear & Spline interpolation complete. CSVs and plots saved.
