
# Generacion de Dataset


Descargar el dataset **Facebook comment volume** https://archive.ics.uci.edu/ml/datasets/Facebook+Comment+Volume+Dataset. 




# Importar librerías

Importar aquellas librerías que serán utilizadas en el trabajo.

$$P_{rms}=\sqrt{\frac{1}{N}\sum\limits_{i=N}^N {x(i)}^2} $$

In [5]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
import math


# Cargar datos
Cargar los datos de entrenamiento.

In [112]:
PATH_EXPERIMENT = "./archive/2nd_test/2nd_test/"
PATH_DATASET = "./Dataset/dataset_2.csv"

from cmath import sqrt
from fileinput import filename
import os
from os.path import isfile, join
from matplotlib.pyplot import axis
from numpy import divide

# onlyfiles = [f for f in listdir(PATH_EXPERIMENT) if isfile(join(PATH_EXPERIMENT, f))]
single_features = {'aMean': True, 'std': False, 'irq': False, 'skew': False, 'kurtosis': False, 'f0': False, 'Pf0': False, 'Xrms': True, 'zeroX': False, 'p2p': False, 'crest': False, 'clearance': False, 'shape': False, 'impulse': False}    # List of features computed for single bearings
multi_features = {'covar': False, 'autocor': False}   # List of features computed between bearings
n_bearings = 4  # Number of bearings
df_columns = []

## Create list of feature names
# Single channel
for i in range(n_bearings):
    for key in single_features.keys():
        if(single_features[key] == True):
            df_columns.append(key+'_'+str(i))
# Multi channel
for i in range(n_bearings):
    for j in range(n_bearings):
        if(i >= j):
            continue
        else:
            for key in multi_features.keys():
                if(multi_features[key] == True):
                    df_columns.append(key+'_'+str(i)+str(j))


print('Columns of the dataset: ', df_columns)

def GetIQR(df):
    q1 = df.quantile(0.25)
    q3 = df.quantile(0.75)
    iqr = q3 - q1
    return np.array(iqr).flatten().reshape(1,4)

# @param ignore_dc If True, the functions skips the first harmonic if it is DC frequency.
# @return A list of 4 sublists, where the top n_harmonics for each bearing are stored in each sublist. Each harmonic is stored as a tuple (magnitude, frequency)
def GetFundFreq(df, Fs, n_harmonics = 1, ignore_dc = False):
    signal = np.array(df)
    fft_abs = np.abs(np.fft.rfft(signal, axis=0))
    freq = np.fft.rfftfreq(signal.shape[0], d=1/Fs)
    harmonics = [] 
    for column in fft_abs.T:
        # Get top N frequencies
        fsorted = sorted(zip(column, freq), reverse=True)
        skip = 0
        if(ignore_dc and fsorted[0][1] == 0.0):
            skip = 1
        harmonics.append(fsorted[skip:n_harmonics+skip])
    return harmonics

def GetZeroCrossings(df_in):
    crossings = []
    for col in df_in.columns:
        zero_crossings = np.where(np.diff(np.signbit(df_in[col])))[0]
        crossings.append(len(zero_crossings))
    return crossings

def GetRMS(df_in):
    pwr = []
    length = df_in.shape[0]
    for col in df_in.columns:
        pwr.append(np.abs(np.sqrt(np.sum(np.square(df_in[col]))/length)))
    return pwr
    


def GetCovar(df_in):
    covm = np.cov(df_in, rowvar=False)
    # print('cov_01 | cov_02 | cov_03 | cov_12 | cov_13 | cov_23')
    # print(covm[np.triu_indices(4,k=1)])
    return covm[np.triu_indices(4,k=1)]

def AddNewRow(df_out, df_in):
    f_vals = np.empty((1,4))
    abs_mean = np.array(df_in.abs().mean(axis=0)).flatten()
    X_rms = GetRMS(df_in)    # Used for crest factor
    # Get per bearing values
    if(single_features['aMean']):   # Compute absolute mean for each channel
        f_vals = np.vstack((f_vals, abs_mean))  # mean
    if(single_features['std']):
        f_vals = np.vstack((f_vals, np.array(df_in.std(axis=0)).flatten()))  # std
    if(single_features['irq']):
        f_vals = np.vstack((f_vals, GetIQR(df_in))) # irq
    if(single_features['skew']):
        f_vals = np.vstack((f_vals, skew(df_in, axis=0)))
    if(single_features['kurtosis']):
        f_vals = np.vstack((f_vals, kurtosis(df_in, axis=0)))
    if(single_features['f0']):
        freqs = GetFundFreq(df_in, 20000, n_harmonics=1, ignore_dc=True)
        f_vals = np.vstack((f_vals, [i[0][1] for i in freqs]))
        if(single_features['Pf0']):        # CHECK if getting the magnitude is ok. Should it be the magnitude squared??
            f_vals = np.vstack((f_vals, [20*math.log10(i[0][0]) for i in freqs]))
    if(single_features['Xrms']):
        f_vals = np.vstack((f_vals, X_rms))
    if(single_features['zeroX']):
        f_vals = np.vstack((f_vals, GetZeroCrossings(df_in)))
    if(single_features['p2p']):
        peak_to_peak = np.array(np.abs(np.max(df_in, axis=0)) + np.abs(np.min(df_in, axis=0)))
        f_vals = np.vstack((f_vals, peak_to_peak))
    if(single_features['crest']):
        crest = np.array(np.divide(np.max(df_in, axis=0),X_rms))
        f_vals = np.vstack((f_vals, crest))
    if(single_features['clearance']):
        clearance = np.array(np.divide(np.max(df_in), np.square(np.sum(np.sqrt(np.abs(df_in)), axis=0)/df_in.shape[0])))
        f_vals = np.vstack((f_vals, clearance))
    if(single_features['shape']):
        shape = np.array(np.divide(X_rms, abs_mean))
        f_vals = np.vstack((f_vals, shape))
    if(single_features['impulse']):
        impulse = np.array(np.divide(np.max(df_in, axis=0), abs_mean))
        f_vals = np.vstack((f_vals, impulse))


    f_vals = f_vals[1:] # Drop first row which is dummy
    f_vals = f_vals.flatten(order='F')  # Merge rows into single row

    # Now append crossfeatures
    if(multi_features['covar']):
        f_vals = np.append(f_vals, GetCovar(df_in))
    if(multi_features['autocor']):
        pass

    new_row = pd.DataFrame(f_vals.reshape(1,len(df_columns)),columns=df_columns)
    df_out = pd.concat([df_out, new_row], axis=0)
    
    return df_out
    


files_to_process = 1 # if -1, process all files in path folder

file_names = [x for x in os.listdir(PATH_EXPERIMENT) if x.endswith('.39')]
df_in = None
df_out = pd.DataFrame(columns=df_columns)#, 'std_0', 'std_1', 'std_2', 'std_3'])
if(files_to_process == -1):
    files_to_process = len(file_names)

for i in range(files_to_process):
    pathText = PATH_EXPERIMENT + '\\' + file_names[i]
    df_in = pd.read_csv(pathText, delimiter='\t', header=None)
    df_out = AddNewRow(df_out, df_in)

df_out=df_out.reset_index(inplace=False, drop=True)
print(df_out.head())
print("shape:",df_out.shape)



# Forma de nuestro dataset:
# #sample | mean_0 | std_0 | .... | mean_1 | std_1 | ..... | mean_4 | std_4 | ... ||| failure_0 | failure_1 | ... | failure_3
                                                                #                       None    | outer_race| ... | None


Columns of the dataset:  ['aMean_0', 'Xrms_0', 'aMean_1', 'Xrms_1', 'aMean_2', 'Xrms_2', 'aMean_3', 'Xrms_3']
[0.07417899856512623] 0.07417899856512623
[0.07417899856512623, 0.09094388696428075] 0.09094388696428075
[0.07417899856512623, 0.09094388696428075, 0.10940413919878957] 0.10940413919878957
[0.07417899856512623, 0.09094388696428075, 0.10940413919878957, 0.05410346803954669] 0.05410346803954669
    aMean_0    Xrms_0   aMean_1    Xrms_1   aMean_2    Xrms_2   aMean_3  \
0  0.058332  0.074179  0.071832  0.090944  0.083244  0.109404  0.043065   

     Xrms_3  
0  0.054103  
shape: (1, 8)


In [18]:
import matplotlib as plt

def GetFundFreq(df, Fs):
    signal = np.array(df)
    fft_abs = np.abs(np.fft.rfft(signal, axis=0))
    freq = np.fft.rfftfreq(signal.shape[0], d=1/Fs)
    i = 1
    harmonics_to_get = 6
    harmonics = []
    fig = plt.figure()
    fig2 = plt.figure()
    axes_top = fig2.add_axes([0, 0, 1.2, 1.2])
    for column in fft_abs.T:
        axes = fig.add_axes([i, 0, 0.8, 1.2])
        axes.set_xlabel("frequency, Hz")
        axes.set_ylabel("Amplitude, units")
        axes.plot(freq, column)
        i+=1
        # Get top N frequencies
        harmonics.append(sorted(zip(column, freq), reverse=True)[:harmonics_to_get])
        one = np.asarray(harmonics[-1]).T
        axes_top.scatter(one[:][1], one[:][0] )
    plt.show()
    # print(harmonics)
    # one = np.asarray(harmonics[0]).T
    # two = np.asarray(harmonics[1]).T
    # fig = plt.figure()
    # axes = fig.add_axes([0, 0, 1.2, 1.2])
    # axes.scatter(one[:][1], one[:][0] )
    # axes.scatter(two[:][1], two[:][0] )
    

# GetFundFreq(df_in, 20000)

test = np.array([[0, 1,2, 3],[1,2,3,3]])
val = 0
for i in test:
    val = np.sum(np.square(i))
print(val)

23
