In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas
from sklearn.externals import joblib
from sklearn import metrics
import scipy.stats as stats
from scipy.io import loadmat
from scipy.io import savemat


np.set_printoptions(precision=2, suppress=True)

import matplotlib as mpl
mpl.rcParams['lines.linewidth'] = 2
mpl.rcParams['legend.handlelength'] = 3
mpl.rcParams['legend.borderpad'] = 0.3
mpl.rcParams['legend.numpoints'] = 1

datadir = os.getenv('HOME') + '/Dropbox/Dados_bozzi/'


# Data Loading 

In [2]:
fname = datadir + '/ruido.mat'
noise = loadmat(fname)
noise = noise['trecho_ruido']

fname = datadir + '/sinal.mat'
signal = loadmat(fname)
signal = signal['sinal']


# Density Based Random Generator 

In [3]:
def random_from_dist(xmodel, ymodel, ndata, fPlot=False, fMethod = True):
    if fMethod:
        bin_midpoints = xmodel + (xmodel[1] - xmodel[0])/2
        cdf = np.cumsum(ymodel)
        cdf = cdf / cdf[-1]
        values = np.random.rand(ndata)
        value_bins = np.searchsorted(cdf, values)
        random_from_cdf = bin_midpoints[value_bins]
    else:
        random_from_cdf = np.random.choice(xmodel, ndata, p = ymodel/ymodel.sum())
    if fPlot:
        a = plt.hist(random_from_cdf, bins=xmodel, normed=True)
        plt.plot(xmodel, ymodel * a[0].sum() / ymodel.sum())
    return random_from_cdf

# Noise Processing

A analise é feita considerando toda o sinal, sem janelamento. Para cada canal, as amostras nan devem ser substituidas por amostras geradas por um gerador aleatório baseado na pdf estimada das amostras restantes.

In [5]:
new_noise = np.array(noise)

Nx = 4
Ny = int(noise.shape[1]/float(Nx)) + 1
plt.figure(figsize=(6*Nx, 5*Ny))

for ich in range(noise.shape[1]):
    # estimate density
    test_data = noise[:,ich]
    idx_nan = np.isnan(test_data)
    test_data = test_data[~idx_nan]
    idx = np.arange(test_data.shape[0])
    np.random.shuffle(idx)
    test_data = test_data[idx[:int(test_data.shape[0] * 0.40)]]
    X = np.linspace(test_data.min(),test_data.max(), 1000)
    ymodel = stats.gaussian_kde(test_data)(X)
    newvalues = random_from_dist(X, ymodel, idx_nan.sum(), False, False)
    # Replace values
    new_noise[idx_nan, ich] = newvalues
    # Print
    print '=='*10, ' Channel: ', ich + 1
    print '\t', noise[~idx_nan,ich].std()
    print '\t', new_noise[:,ich].std()
    print '\t', 100 *np.abs(noise[~idx_nan,ich].std() - new_noise[:,ich].std()) / noise[~idx_nan,ich].std() 
    
    continue
    
    # Plot
    plt.subplot(Ny, Nx, ich+1)
    # Old data
    test_data = noise[:,ich]
    idx_nan = np.isnan(test_data)
    test_data = test_data[~idx_nan]
    X = np.linspace(test_data.min(),test_data.max(), 1000)
    ymodel = gaussian_kde(test_data)(X)
    plt.plot(X, ymodel/ymodel.sum(), 'r-', label='Antes')
    # New Data
    yapply = gaussian_kde(new_noise[:, ich])(X)
    plt.plot(X, yapply/yapply.sum(), 'k--', label='Depois')
    print '\tKL: ', stats.entropy(ymodel, yapply)
    plt.yscale('log')
    plt.ylabel('PDF')
    plt.xlabel('V')
    plt.grid(True)
    plt.title('Canal %i'%(ich+1))


	0.00337419573831
	0.00337658241327
	0.0707331507733
	0.00350307184277
	0.00350195719045
	0.0318192824074
	0.00351447818007
	0.00351421709829
	0.00742874954294
	0.00366502315532
	0.00366734989288
	0.0634849348321
	0.0037457729526
	0.00374619543569
	0.0112789293914
	0.00389035409052
	0.0038914662475
	0.0285875515717
	0.00394285073019
	0.00394014517919
	0.068619158521
	0.00401480617705
	0.00401427078019
	0.0133355595194
	0.00409719228493
	0.00409183932947
	0.130649359037
	0.00407022564718
	0.00407672805414
	0.159755441674
	0.0040981422761
	0.00410269896093
	0.111189034536
	0.0041068782041
	0.00410323755414
	0.0886476244805
	0.00400593165076
	0.0040087851935
	0.0712329362465
	0.00389435788345
	0.00389679637564
	0.0626160272068
	0.0038974598714
	0.0039021964836
	0.121530749534
	0.00387864468155
	0.00388464955082
	0.154818751509
	0.00377575840381
	0.00377528574149
	0.0125183411818
	0.00380069702238
	0.00380472126539
	0.105881710294
	0.00369355032507
	0.00370308155637
	0.25805066825
	0.00368

<matplotlib.figure.Figure at 0x7f33c2416250>

# Signal Processing


A analise eh feita para os janelamentos de 32ms, 200ms e 1s. Considerando as taxas de amostragem, essas janelas produzem variaveis aleatorias com 1000, 6250 e 31250 amostras respectivamente.

## Window: 32ms 

In [4]:
new_32ms_signal = np.array(signal)

nsamples = 1000


std_perc = np.zeros((signal.shape[1], nsamples))
for ich in range(signal.shape[1]):
    # Window 
    wsignal = np.array(signal[:,ich])
    wsignal = np.reshape(wsignal, (wsignal.shape[0]/nsamples, nsamples)).T
    
    # Loop over random variables
    for ivar in range(nsamples):
        idx_nan = np.isnan(wsignal[ivar])
        test_data = wsignal[ivar, ~idx_nan]
        X = np.linspace(test_data.min(),test_data.max(), 1000)
        ymodel = stats.gaussian_kde(test_data)(X)
        newvalues = random_from_dist(X, ymodel, idx_nan.sum(), False, False)
        # Replace values
        wsignal[ivar, idx_nan] = newvalues
        # Print
        #print '=='*10, ' Channel: ', ich + 1, '\tVar: ', ivar+1
        #print '\t', wsignal[ivar, ~idx_nan].std()
        #print '\t', wsignal[ivar, :].std()
        std_perc[ich, ivar] = 100 *np.abs(wsignal[ivar, ~idx_nan].std() - wsignal[ivar].std()) \
                                   / wsignal[ivar, ~idx_nan].std() 
        #print '\t', std_perc[ivar]
    new_32ms_signal[:,ich] = np.reshape(wsignal.T, (signal.shape[0],))

## Window: 200ms

In [5]:
new_200ms_signal = np.array(signal)

nsamples = 6250


std_perc = np.zeros((signal.shape[1], nsamples))
for ich in range(signal.shape[1]):
    # Window 
    wsignal = np.array(signal[:,ich])
    wsignal = np.reshape(wsignal, (wsignal.shape[0]/nsamples, nsamples)).T
    
    # Loop over random variables
    for ivar in range(nsamples):
        idx_nan = np.isnan(wsignal[ivar])
        test_data = wsignal[ivar, ~idx_nan]
        X = np.linspace(test_data.min(),test_data.max(), 1000)
        ymodel = stats.gaussian_kde(test_data)(X)
        newvalues = random_from_dist(X, ymodel, idx_nan.sum(), False, False)
        # Replace values
        wsignal[ivar, idx_nan] = newvalues
        # Print
        #print '=='*10, ' Channel: ', ich + 1, '\tVar: ', ivar+1
        #print '\t', wsignal[ivar, ~idx_nan].std()
        #print '\t', wsignal[ivar, :].std()
        std_perc[ich, ivar] = 100 *np.abs(wsignal[ivar, ~idx_nan].std() - wsignal[ivar].std()) \
                                   / wsignal[ivar, ~idx_nan].std() 
        #print '\t', std_perc[ivar]
    new_200ms_signal[:,ich] = np.reshape(wsignal.T, (signal.shape[0],))

## Window 1s

In [6]:
new_1s_signal = np.array(signal)

nsamples = 31250


std_perc = np.zeros((signal.shape[1], nsamples))
for ich in range(signal.shape[1]):
    # Window 
    wsignal = np.array(signal[:,ich])
    wsignal = np.reshape(wsignal, (wsignal.shape[0]/nsamples, nsamples)).T
    
    # Loop over random variables
    for ivar in range(nsamples):
        idx_nan = np.isnan(wsignal[ivar])
        test_data = wsignal[ivar, ~idx_nan]
        X = np.linspace(test_data.min(),test_data.max(), 1000)
        ymodel = stats.gaussian_kde(test_data)(X)
        newvalues = random_from_dist(X, ymodel, idx_nan.sum(), False, False)
        # Replace values
        wsignal[ivar, idx_nan] = newvalues
        # Print
        #print '=='*10, ' Channel: ', ich + 1, '\tVar: ', ivar+1
        #print '\t', wsignal[ivar, ~idx_nan].std()
        #print '\t', wsignal[ivar, :].std()
        std_perc[ich, ivar] = 100 *np.abs(wsignal[ivar, ~idx_nan].std() - wsignal[ivar].std()) \
                                   / wsignal[ivar, ~idx_nan].std() 
        #print '\t', std_perc[ivar]
    new_1s_signal[:,ich] = np.reshape(wsignal.T, (signal.shape[0],))

In [8]:
savemat(datadir + '/novo_ruido.mat', {'ruido':new_noise})
savemat(datadir + '/novo_sinal_32ms.mat', {'sinal':new_32ms_signal})
savemat(datadir + '/novo_sinal_200ms.mat', {'sinal':new_200ms_signal})
savemat(datadir + '/novo_sinal_1s.mat', {'sinal':new_1s_signal})