## Filtering process

#### Chebyshev type I low-pass

In [None]:
import pandas as pd
import numpy as np
from scipy import signal
from pathlib import Path
import matplotlib.pyplot as plt

class ChebyshevFilterCSI:
    #Parameters of Chebyshev Type I filter (low-pass)
    def __init__(self, cutoff_freq=0.05, filter_order=3, ripple_db=0.5):
        self.cutoff_freq = cutoff_freq
        self.filter_order = filter_order
        self.ripple_db = ripple_db
        self.b, self.a = signal.cheby1(filter_order, ripple_db, cutoff_freq, btype='low')

    #Apply filter forwards and backwards to avoid phase distortion
    def apply_filter(self, data):
        filtered_data = signal.filtfilt(self.b, self.a, data, axis=0)
        return filtered_data
    
    def process_csi_data(self, df, amplitude_cols=None, phase_cols=None):
        if amplitude_cols is None:
            amplitude_cols = [col for col in df.columns if col.startswith('amp_')]
        if phase_cols is None:
            phase_cols = [col for col in df.columns if col.startswith('phase_')]
        
        print(f"Processing {len(amplitude_cols)} amplitude columns and {len(phase_cols)} phase columns")
        
        # Create a copy of the DataFrame
        processed_df = df.copy()
        
        # Process amplitude
        if amplitude_cols:
            print("Applying Chebyshev filter to amplitude data...")
            amp_data = df[amplitude_cols].values
            filtered_amp_data = self.apply_filter(amp_data)
            processed_df[amplitude_cols] = filtered_amp_data
        
        # Process phase
        if phase_cols:
            print("Applying Chebyshev filter to phase data...")
            phase_data = df[phase_cols].values
            filtered_phase_data = self.apply_filter(phase_data)
            processed_df[phase_cols] = filtered_phase_data
        
        return processed_df

    #Visualize the frequency response of the filter
    def visualize_frequency_response(self):
        w, h = signal.freqz(self.b, self.a)
        
        plt.figure(figsize=(12, 8))
        
        #Magnitude
        plt.subplot(2, 1, 1)
        plt.plot(w, 20 * np.log10(abs(h)))
        plt.axvline(self.cutoff_freq * np.pi, color='red', linestyle='--', alpha=0.7)
        plt.title(f'Frequency Response - Chebyshev Type I Filter\n'
                 f'Cutoff Freq: {self.cutoff_freq}, Order: {self.filter_order}, Ripple: {self.ripple_db}dB')
        plt.ylabel('Magnitude (dB)')
        plt.grid(True, alpha=0.3)
        plt.ylim(-60, 5)
        
        #Phase
        plt.subplot(2, 1, 2)
        plt.plot(w, np.unwrap(np.angle(h)) * 180 / np.pi)
        plt.axvline(self.cutoff_freq * np.pi, color='red', linestyle='--', alpha=0.7)
        plt.ylabel('Phase (degrees)')
        plt.xlabel('Normalized Frequency (π rad/sample)')
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()

    #Compare before and after filtering
    def compare_before_after(self, original_data, filtered_data, test_column, title="Comparison Before vs After"):
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle(f"{title} - {test_column}", fontsize=16)
        
        #Temporal graph
        time = np.arange(len(original_data))
        axes[0, 0].plot(time, original_data, 'b-', alpha=0.7, label='Original', linewidth=1)
        axes[0, 0].plot(time, filtered_data, 'r-', alpha=0.7, label='Filtered', linewidth=1)
        axes[0, 0].set_title('Temporal Comparison')
        axes[0, 0].set_xlabel('Sample')
        axes[0, 0].set_ylabel('Value')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        #Histogram
        axes[0, 1].hist(original_data, bins=50, alpha=0.7, label='Original', color='blue', density=True)
        axes[0, 1].hist(filtered_data, bins=50, alpha=0.7, label='Filtered', color='red', density=True)
        axes[0, 1].set_title('Value Distribution')
        axes[0, 1].set_xlabel('Value')
        axes[0, 1].set_ylabel('Density')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
        
        #Box plot
        plot_data = [original_data, filtered_data]
        axes[1, 0].boxplot(plot_data, labels=['Original', 'Filtered'])
        axes[1, 0].set_title('Box Plot - Distribution Analysis')
        axes[1, 0].set_ylabel('Value')
        axes[1, 0].grid(True, alpha=0.3)
        
        #Differences between filtered and original data
        difference = filtered_data - original_data
        axes[1, 1].plot(time, difference, 'g-', alpha=0.7)
        axes[1, 1].set_title('Difference (Filtered - Original)')
        axes[1, 1].set_xlabel('Sample')
        axes[1, 1].set_ylabel('Difference')
        axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        #Statistics
        print(f"\nComparison Statistics - {test_column}:")
        print(f"Original - Mean: {np.mean(original_data):.4f}, Std: {np.std(original_data):.4f}")
        print(f"Filtered - Mean: {np.mean(filtered_data):.4f}, Std: {np.std(filtered_data):.4f}")
        print(f"RMSE: {np.sqrt(np.mean((original_data - filtered_data)**2)):.4f}")
        print(f"Noise reduction: {((np.std(original_data) - np.std(filtered_data)) / np.std(original_data) * 100):.2f}%")


#Processing parameters (same defaults as the original script)
input_file = Path('csi_data_processed.csv')
output_file = Path('csi_data_chebyshev.csv')
cutoff_freq = 0.05
filter_order = 3
ripple_db = 0.5
sample_size = 1000

#Check if the input file exists
if not input_file.exists():
    print(f"Error: File {input_file} not found!")
else:
    print(f"Loading data from: {input_file}")
    
    #Load data
    try:
        df = pd.read_csv(input_file)
        print(f"Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")

        #Identify amplitude and phase columns
        amplitude_cols = [col for col in df.columns if col.startswith('amp_')]
        phase_cols = [col for col in df.columns if col.startswith('phase_')]
        
        print(f"Found {len(amplitude_cols)} amplitude columns and {len(phase_cols)} phase columns")

        #Initialize Chebyshev filter
        chebyshev_filter = ChebyshevFilterCSI(
            cutoff_freq=cutoff_freq,
            filter_order=filter_order,
            ripple_db=ripple_db
        )

        #Visualize the frequency response of the filter
        print("Visualizing filter frequency response...")
        chebyshev_filter.visualize_frequency_response()
        
        #Process data
        print("Applying Chebyshev Type I filter...")
        processed_df = chebyshev_filter.process_csi_data(df, amplitude_cols, phase_cols)

        #Save results
        processed_df.to_csv(output_file, index=False)
        print(f"Processed data saved to: {output_file}")

        #Generate comparison visualizations
        if amplitude_cols:
            print("Generating comparative visualizations...")
            
            #Select an amplitude column for visualization
            test_column = amplitude_cols[0]
            sample_size_to_plot = min(sample_size, len(df))
            
            original_data = df[test_column].values[:sample_size_to_plot]
            filtered_data = processed_df[test_column].values[:sample_size_to_plot]
            
            chebyshev_filter.compare_before_after(
                original_data, 
                filtered_data, 
                test_column,
                "Chebyshev Type I Filter - CSI"
            )

        print("Processing complete!")

    except Exception as e:
        print(f"Error executing the notebook: {e}")

#### Butterworth low-pass

In [None]:
import pandas as pd
import numpy as np
from scipy import signal
from pathlib import Path
import matplotlib.pyplot as plt

class ButterworthFilterCSI:
    #Parameters of Butterworth filter (low-pass)
    def __init__(self, cutoff_freq=0.03, filter_order=2):
        self.cutoff_freq = cutoff_freq
        self.filter_order = filter_order
        self.b, self.a = signal.butter(filter_order, cutoff_freq, btype='low')

    #Apply filter forwards and backwards to avoid phase distortion
    def apply_filter(self, data):
        return signal.filtfilt(self.b, self.a, data, axis=0)

    def process_csi_data(self, df, amplitude_cols=None, phase_cols=None):
        if amplitude_cols is None:
            amplitude_cols = [col for col in df.columns if col.startswith('amp_')]
        if phase_cols is None:
            phase_cols = [col for col in df.columns if col.startswith('phase_')]

        print(f"Processing {len(amplitude_cols)} amplitude columns and {len(phase_cols)} phase columns")

        #Create a copy of the DataFrame
        processed_df = df.copy()

        #Process amplitude
        if amplitude_cols:
            print("Applying Butterworth filter to amplitude data...")
            amp_data = df[amplitude_cols].values
            filtered_amp_data = self.apply_filter(amp_data)
            processed_df[amplitude_cols] = filtered_amp_data

        #Process phase
        if phase_cols:
            print("Applying Butterworth filter to phase data...")
            phase_data = df[phase_cols].values
            filtered_phase_data = self.apply_filter(phase_data)
            processed_df[phase_cols] = filtered_phase_data

        return processed_df

    #Visualize the frequency response of the filter
    def visualize_frequency_response(self):
        w, h = signal.freqz(self.b, self.a)
        
        plt.figure(figsize=(12, 8))
        
        #Magnitude
        plt.subplot(2, 1, 1)
        plt.plot(w, 20 * np.log10(abs(h)))
        plt.axvline(self.cutoff_freq * np.pi, color='red', linestyle='--', alpha=0.7)
        plt.title(f'Frequency Response - Butterworth Filter\n'
                 f'Cutoff Freq: {self.cutoff_freq}, Order: {self.filter_order}')
        plt.ylabel('Magnitude (dB)')
        plt.grid(True, alpha=0.3)
        plt.ylim(-60, 5)
        
        #Phase
        plt.subplot(2, 1, 2)
        plt.plot(w, np.unwrap(np.angle(h)) * 180 / np.pi)
        plt.axvline(self.cutoff_freq * np.pi, color='red', linestyle='--', alpha=0.7)
        plt.ylabel('Phase (degrees)')
        plt.xlabel('Normalized Frequency (π rad/sample)')
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()

    #Compare before and after filtering
    def compare_before_after(self, original_data, filtered_data, test_column, title="Comparison Before vs After"):
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle(f"{title} - {test_column}", fontsize=16)
        
        #Temporal graph
        time = np.arange(len(original_data))
        axes[0, 0].plot(time, original_data, 'b-', alpha=0.7, label='Original', linewidth=1)
        axes[0, 0].plot(time, filtered_data, 'r-', alpha=0.7, label='Filtered', linewidth=1)
        axes[0, 0].set_title('Temporal Comparison')
        axes[0, 0].set_xlabel('Sample')
        axes[0, 0].set_ylabel('Value')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        #Histogram
        axes[0, 1].hist(original_data, bins=50, alpha=0.7, label='Original', color='blue', density=True)
        axes[0, 1].hist(filtered_data, bins=50, alpha=0.7, label='Filtered', color='red', density=True)
        axes[0, 1].set_title('Value Distribution')
        axes[0, 1].set_xlabel('Value')
        axes[0, 1].set_ylabel('Density')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
        
        #Box plot
        plot_data = [original_data, filtered_data]
        axes[1, 0].boxplot(plot_data, labels=['Original', 'Filtered'])
        axes[1, 0].set_title('Box Plot - Distribution Analysis')
        axes[1, 0].set_ylabel('Value')
        axes[1, 0].grid(True, alpha=0.3)
        
        #Difference
        difference = filtered_data - original_data
        axes[1, 1].plot(time, difference, 'g-', alpha=0.7)
        axes[1, 1].set_title('Difference (Filtered - Original)')
        axes[1, 1].set_xlabel('Sample')
        axes[1, 1].set_ylabel('Difference')
        axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        #Statistics
        print(f"\nComparison Statistics - {test_column}:")
        print(f"Original - Mean: {np.mean(original_data):.4f}, Std: {np.std(original_data):.4f}")
        print(f"Filtered - Mean: {np.mean(filtered_data):.4f}, Std: {np.std(filtered_data):.4f}")
        print(f"RMSE: {np.sqrt(np.mean((original_data - filtered_data)**2)):.4f}")
        print(f"Noise reduction: {((np.std(original_data) - np.std(filtered_data)) / np.std(original_data) * 100):.2f}%")


#Processing parameters
input_file = Path('csi_data_processed.csv')
output_file = Path('csi_data_butterworth.csv')
cutoff_freq = 0.03
filter_order = 2
sample_size = 1000

#Check if the input file exists
if not input_file.exists():
    print(f"Error: File {input_file} not found!")
else:
    print(f"Loading data from: {input_file}")
    
    #Load data
    try:
        df = pd.read_csv(input_file)
        print(f"Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")

        #Identify amplitude and phase columns
        amplitude_cols = [col for col in df.columns if col.startswith('amp_')]
        phase_cols = [col for col in df.columns if col.startswith('phase_')]
        
        print(f"Found {len(amplitude_cols)} amplitude columns and {len(phase_cols)} phase columns")

        #Initialize Butterworth filter
        butterworth_filter = ButterworthFilterCSI(
            cutoff_freq=cutoff_freq,
            filter_order=filter_order
        )

        #Visualize the frequency response of the filter
        print("Visualizing filter frequency response...")
        butterworth_filter.visualize_frequency_response()
        
        #Process data
        print("Applying Butterworth filter...")
        processed_df = butterworth_filter.process_csi_data(df, amplitude_cols, phase_cols)

        #Save results
        processed_df.to_csv(output_file, index=False)
        print(f"Processed data saved to: {output_file}")

        #Generate comparison visualizations
        if amplitude_cols:
            print("Generating comparative visualizations...")
            
            #Select an amplitude column for visualization
            test_column = amplitude_cols[0]
            sample_size_to_plot = min(sample_size, len(df))
            
            original_data = df[test_column].values[:sample_size_to_plot]
            filtered_data = processed_df[test_column].values[:sample_size_to_plot]
            
            butterworth_filter.compare_before_after(
                original_data, 
                filtered_data, 
                test_column,
                "Butterworth Filter - CSI"
            )

        print("Processing complete!")

    except Exception as e:
        print(f"Error executing the notebook: {e}")