In [1]:
import matplotlib
matplotlib.use('Agg')  # Use non-GUI backend to prevent errors

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

PLOT_FOLDER = "static/plots"  # Folder to save plots

def remove_spike(files, window=2, threshold=0.1, abnormal_max=5, abnormal_min=0):
    plot_files = []  # List to store saved plot filenames
    filtered_data = None  # Store filtered data

    for file in files:
        try:
            print(f"Processing file: {file}")  # Debugging line
            print(f"Applied Parameters -> Window: {window}, Threshold: {threshold}, Max: {abnormal_max}, Min: {abnormal_min}")  # Debugging line
            
            fig, ax = plt.subplots(figsize=(16, 6))

            # Read data
            dat = pd.read_csv(file, header=0)
            dat = dat.rename(columns={"Timestamp": "Date", "Hm0": "Hs"})
            dat["Date"] = pd.to_datetime(dat["Date"])

            # Remove abnormal values
            dat = dat[(dat["Hs"] >= abnormal_min) & (dat["Hs"] <= abnormal_max)]

            # Compute rolling mean and filter data
            dat['rolling_mean'] = dat["Hs"].rolling(window=window, center=True).mean()
            dat['diff'] = np.abs(dat["Hs"] - dat['rolling_mean'])
            filtered_data = dat[dat['diff'] <= threshold]  # Filter spikes

            # Plot filtered data
            ax.plot(dat["Date"], dat["Hs"], label="Raw Data", color='red')
            ax.plot(filtered_data["Date"], filtered_data["Hs"], label="Filtered Data", color='blue')

            # Customize plot
            ax.set_xlabel("Date")
            ax.set_ylabel("Hs")
            ax.legend()
            ax.set_title(f"Spike Removal - {os.path.basename(file)}")

            # Save plot to file
            plot_filename = f"plot_{os.path.basename(file)}.png"
            plot_path = os.path.join(PLOT_FOLDER, plot_filename)
            plt.savefig(plot_path)
            plt.close(fig)  # Close figure to free memory

            plot_files.append(plot_filename)
        except Exception as e:files
            print(f"Error processing {file}: {e}")
            continue

    return plot_files, filtered_data  # Return list of saved plot filenames and filtered data


In [9]:
file="Ratnagiri_201004010027.his"
dat = pd.read_csv(file, header=0)
print(dat.columns)
dat = dat.rename(columns={"Timestamp": "Date", " Hm0": "Hs"})

print(dat)

Index(['Timestamp', ' Hm0', ' TI', ' TE', ' T1', ' Tz', ' T3', ' T4', ' Tref',
       ' Tsea', ' Bat'],
      dtype='object')
                     Date   Hm0     TI     TE    T1    Tz    T3    T4   Tref  \
0     2010-04-01 00:27:00  0.68   7.88   7.10  5.69  5.12  4.18  7.46  29.15   
1     2010-04-01 00:57:00  0.71   7.55   6.80  5.54  5.05  4.23  7.68  29.15   
2     2010-04-01 01:27:00  0.71   7.66   6.95  5.66  5.11  4.19  7.49  29.15   
3     2010-04-01 01:57:00  0.70   7.88   7.13  5.71  5.11  4.11  7.18  29.10   
4     2010-04-01 02:27:00  0.66   7.47   6.73  5.48  4.96  4.09  7.19  29.10   
...                   ...   ...    ...    ...   ...   ...   ...   ...    ...   
1435  2010-04-30 21:57:00  0.75  11.04   9.69  6.60  5.51  4.06  7.05  29.75   
1436  2010-04-30 22:27:00  0.70  10.36   9.01  6.22  5.29  4.03  7.04  29.70   
1437  2010-04-30 22:57:00  0.75  11.93  10.68  7.33  6.00  4.25  7.44  29.70   
1438  2010-04-30 23:27:00  0.69  11.61  10.36  7.05  5.76  4.10  7.08  29.