In [2]:
import os
import pandas as pd
from scipy.signal import butter, filtfilt

def butter_bandpass_filter(data, high_cutoff, sampling_freq):
    nyquist_freq = 0.5 * sampling_freq
    high_cutoff_norm = high_cutoff / nyquist_freq
    b, a = butter(N=4, Wn=high_cutoff_norm, btype='lowpass')
    filtered_data = filtfilt(b, a, data)
    return filtered_data

def apply_moving_average(data, window_size):
    return data.rolling(window=window_size, min_periods=1).mean()

# Define the parameters
sampling_freq = 250 # this is sampling frequency for IIST BCI Dataset
high_cutoff = 50 
window_size = 5

# Input and output directories
input_dir = "C:/Users/sgt17/Desktop/Marathi-English Vocal/MUMMY_CSV_Files"  # Path to the folder containing input CSV files
output_dir = "C:/Users/sgt17/Desktop/Marathi-English Vocal/MUMMY_A-N_Filtered"  # Path to the folder where filtered CSV files will be saved

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Process each CSV file in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.csv'):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)
        
        # Load EEG data from CSV file
        eeg_data = pd.read_csv(input_path)
        
        # Ignore columns 1 and 10 as they are not EEG data
        eeg_data = eeg_data.drop(columns=eeg_data.columns[[0, 9]])
        
        # Apply bandpass filter to remove frequencies greater than 50 Hz
        for col in eeg_data.columns:
            eeg_data[col] = butter_bandpass_filter(eeg_data[col], high_cutoff, sampling_freq)
        
        # Apply moving average filter to remove artifacts
        for col in eeg_data.columns:
            eeg_data[col] = apply_moving_average(eeg_data[col], window_size)
        
        # Save the filtered EEG data to a new CSV file
        eeg_data.to_csv(output_path, index=False)
