In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d

def detect_peak(file):
    data = file
    time = data['Time'].values
    data['peak_australia'] = 0 
    data['peak_usa'] = 0 
    data['peak_iceland'] = 0 
    data['peak_italy'] = 0 
    data['peak_uk'] = 0 
    data['peak_germany'] = 0 
    for i in range(6):
        A = data[data.columns[i]]

        # Calculation of the baseline A_base
        interval_duration = 20  
        intervals = np.arange(min(time), max(time) + interval_duration, interval_duration)

        # calculate the average values for each interval

        mean_values = [
            np.mean(A[(time >= start) & (time < start + interval_duration)])
            for start in intervals[:-1]
        ]

        f_interp = interp1d(intervals[:-1] + interval_duration / 2, mean_values, kind="linear", fill_value="extrapolate")
        A_base = f_interp(time)

        # Calculation of noise A_noise

        dA = A - A_base

        # Excluding 2% of extreme values for noise estimation
        sorted_dA = np.sort(np.abs(dA))
        percentile = int(0.98 * len(sorted_dA))
        A_noise = sorted_dA[percentile]

        # Peak detection
        r = 3  # threshold for peak detection
        peaks = (A - A_base) / A_noise >= r

        # adding a new column to the DataFrame: 1 - peak, 0 - no peak
        data[data.columns[8+i]] = np.where(peaks, 1, 0)
        
        
    ##################################################################
    #1 | 0
        
   
        
    return data


    

# specify the folder where the CSV files are located

folder_path = 'grb_csv'


#  get the list of all files in the folder

csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

# create a folder to save the processed files
output_folder = os.path.join(folder_path, 'processed')
os.makedirs(output_folder, exist_ok=True)

for file_name in csv_files:
    # full path to the file
    file_path = os.path.join(folder_path, file_name)
    
    data = pd.read_csv(file_path)
    
    # add a new column 'date' with the filename
    data = data.drop(data.columns[0], axis=1)
    data = data.set_axis(['NWC (Australia)', 'NAA (USA)', 'NRK (Iceland)', 'skip this column!', 'ICV (Italy)', 'GQD (UK)', 'DHO (Germany)', 'NN'], axis=1)
    data = data.drop(data.columns[[3, 7]], axis=1)
    ###### сut the data to +- 2 min at the start grb
    data1 = data[9000:21000].copy()
    data1.reset_index(drop=True, inplace=True)
    data1.index += 1
    ######
    data1['Time'] = data1.index * 0.02
    data1['data'] = file_name
    
    data2 = detect_peak(data1)
    #print(f"peaks:{file_name}, {data['peak_australia'].sum()}")
    
    # save the processed file to the new folder
    output_file_path = os.path.join(output_folder, file_name)
    data2.to_csv(output_file_path, index=False)

print(f"file processing completed; results saved to the folder: {output_folder}")


file processing completed; results saved to the folder: grb_csv/processed
