# Getting rid of too noisy spectral data

In [1]:
import numpy as np
import pandas as pd


Loading data (need to drop ID numbers from the columns)

In [2]:
spectral_data =  [
   'SPEK_L1',
#     'SPEK_R1_1',
#     'SPEK_R1_2',
#     'SPEK_L2',
#     'SPEK_R2_1',
#     'SPEK_R2_2',
    'SPEK_L3',
#     'SPEK_R3_1',  # didnt load properly (memory pb)
    'SPEK_R3_2',
    'SPEK_LP', 
#     'SPEK_RP_1',
   'SPEK_RP_2'
]

In [3]:
# import back the csv with spectral data
# loads df spek_data[sensor]
# dict key is sensor names 

spek_data = {}
for sensor in spectral_data:
    temp_df = pd.read_csv(f'D:/capstone/processed/MA_NR-78635_{sensor}.csv')
#     temp_df = temp_df.set_index('PRIMARY')
    temp_df = temp_df.drop(['MA_NR', 'WSG_ID', 'WZD_ID', 'ST_ID','AT_ID'], 1)
    
    print(f'{sensor} - shape: {temp_df.shape}, NAs: {temp_df.isna().sum().loc[temp_df.isna().sum()>0]}')
    spek_data[sensor] = temp_df

SPEK_L1 - shape: (27206, 1230), NAs: Series([], dtype: int64)
SPEK_L3 - shape: (27206, 1230), NAs: Series([], dtype: int64)
SPEK_R3_2 - shape: (27206, 1230), NAs: Series([], dtype: int64)
SPEK_LP - shape: (27206, 513), NAs: Series([], dtype: int64)
SPEK_RP_2 - shape: (27206, 513), NAs: Series([], dtype: int64)


# checking for noise levels in spectral data

Low amplitude cut-off at 0.09 (after talk with Christian)

In [4]:
# returns True if all data is below a threshold
def amp_cut_off(dataframe, amp_thresh = 0.09):
    return np.where((np.abs(dataframe.values) < 0.09).all(), True, False)

If the df has values over 0.09, check for signal-to-noise ratio

In [5]:
def signaltonoise(a, axis=None, ddof=0):
    """
    Returns the signal-to-noise ratio of input
    a : array
    axis : 0 (freq axis) 1 (gear axis) or None (whole dataframe)
    ddof : Degrees of freedom 
    Returns the mean to standard deviation ratio(s) along axis
    """
    a = np.asanyarray(a)
    m = a.mean(axis)
    sd = a.std(axis=axis, ddof=ddof)
    return np.where(sd == 0, 0, m/sd)

mute all frequencies with amplitudes below noise level to increase SNR

In [6]:
# cutting off all amplitudes below 0.09
def threshold(data, amp_thresh=0.09):
    cutoff_data = data.apply(lambda x: np.where(x < amp_thresh, 0, x))
    return cutoff_data

Overall function to apply to all spectral sensors

In [9]:
def clean_spectral(sensors, amp_thresh=0.09, axis=None, cutoff=False):
    #sensors is a list of sensor names (str)
    #cutoff true to put all amplitudes<threshols to 0 (False is more conservative)
    # axis None computes SNR on entire dataframe
    signal_sensors = []
    
    for sensor in sensors:

        if cutoff==True:
            spek_data[sensor] = threshold(spek_data[sensor], amp_thresh=0.09)

        if amp_cut_off(spek_data[sensor], amp_thresh) == False: #the signal is not too low amplitude
            if signaltonoise(spek_data[sensor], axis) > 0.2:
                signal_sensors.append(sensor)
    return sensors, signal_sensors
            
        

In [10]:
clean_spectral(spectral_data, amp_thresh=0.09, axis=None, cutoff=False)

(['SPEK_L1', 'SPEK_L3', 'SPEK_R3_2', 'SPEK_LP', 'SPEK_RP_2'],
 ['SPEK_L3', 'SPEK_R3_2', 'SPEK_LP', 'SPEK_RP_2'])

In [11]:
clean_spectral(spectral_data, amp_thresh=0.09, axis=None, cutoff=True) # cutoff is too conservative

(['SPEK_L1', 'SPEK_L3', 'SPEK_R3_2', 'SPEK_LP', 'SPEK_RP_2'],
 ['SPEK_LP', 'SPEK_RP_2'])