# COURSE: Master Python for scientific programming by solving projects
## PROJECT: Denoising noisy signals
#### TEACHER: Mike X Cohen, sincxpress.com
##### COURSE URL: udemy.com/course/maspy_x/?couponCode=202201

In [None]:
# import all necessary modules
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import detrend
import pandas as pd
import copy

# Smoothing via running-mean filter

In [None]:
# create a noisy signal

N = 10001
time = np.linspace(0,4*np.pi,N)

signal = np.zeros(N)
for i in range(1,4):
  signal += np.cos(i*time)**i

noisysignal = signal + np.random.randn(N)

plt.plot(time,noisysignal, time,signal)
plt.legend(['Noisy','Original'])
plt.xlabel('Time (rad.)')
plt.xticks(np.arange(time[0],time[-1],np.pi/2))
plt.show()

In [None]:
# tangent on copy

a = [4,3]
b = a#copy.deepcopy(a)
b[0] = 5

print(a)
print(b)


In [None]:
k = 15

filtsignal = copy.deepcopy(noisysignal)

for i in range(N):
  lowbnd = np.max((0,i-k))
  uppbnd = np.min((N,i+k))

  filtsignal[i] = np.mean(noisysignal[lowbnd:uppbnd])


# and plot
plt.plot(time,filtsignal, time,signal)
plt.legend(['Filtered','Original'])
plt.xlabel('Time (rad.)')
plt.xticks(np.arange(time[0],time[-1],np.pi/2))
plt.show()

In [None]:
# mean-smoothing function, to be called later
def meansmooth(signalIn,k):
  filtsignal = copy.deepcopy(signalIn)
  for i in range(N):
    filtsignal[i] = np.mean(signalIn[ np.max((0,i-k)) : np.min((N,i+k)) ])
  return filtsignal


# range of smoothing kernels
kvals = np.arange(5,41) # 500,15
signalCorrs = []

for ki in kvals:
  # filter the signal
  newsignal = meansmooth(noisysignal,ki)
  ctmp = np.corrcoef(newsignal,signal)
  signalCorrs.append(ctmp[0,1])
  

# and plot
plt.plot(kvals,signalCorrs,'ks-',markerfacecolor='w',markersize=8)
plt.title('Fit of filtered signal to original')
plt.xlabel('Filter kernel length')
plt.ylabel('Correlation value')
plt.show()

# Smoothing via Gaussian convolution

In [None]:
# create signal
srate = 512
time  = np.arange(-2,2+1/srate,1/srate)
pnts  = len(time)

signal = detrend(time**3 + np.sign(time))
noisysignal = signal + np.random.randn(pnts)*1.1

plt.plot(time,noisysignal, time,signal)
plt.xlabel('Time (s)')
plt.ylabel('Activity')
plt.legend(['Noisy','Original'])
plt.xlim(time[[1,-1]])
plt.show()

In [None]:
# create Gaussian

k = 10
x = np.arange(-k,k+1)/srate
s = .005
gkern = np.exp(-x**2 / (2*s**2))

plt.plot(x,gkern,'o-')
plt.title('n=%g, s=%g'%(2*k+1,s))
plt.xlabel('Time (s)')
plt.show()

In [None]:
# run convolution
gkern = gkern / sum(gkern)
filtsig = np.convolve(noisysignal,gkern,mode='same') # input signal

# plot for comparison
plt.plot(time,noisysignal, time,filtsig, time,signal)
plt.xlabel('Time (s)')
plt.ylabel('Activity')
plt.legend(['Noisy','Filtered','Original'])
plt.xlim(time[[1,-1]])
plt.show()

In [None]:
# length vs. width of kernel

# parameter ranges
krange = np.arange(3,300,20)
srange = np.linspace(.001,.5,60)

# initialize
sseMat = np.zeros((len(krange),len(srange)))
allkernels = [[0]*len(srange) for i in range(len(krange))]

for ki in range(len(krange)):
  for si in range(len(srange)):
    # create Gaussian
    x = np.arange(-krange[ki],krange[ki]+1)/srate
    s = srange[si]
    gkern = np.exp(-x**2 / (2*s**2))

    # filter the signal
    filtsig = np.convolve(noisysignal,gkern/sum(gkern),mode='same')
    
    # compute SSE
    sseMat[ki,si] = np.sum((filtsig-signal)**2)
    
    # store Gaussians for later
    allkernels[ki][si] = gkern


In [None]:
# image of the SSE matrix

plt.imshow(sseMat,vmin=0,vmax=400,
           extent=[srange[0],srange[-1],krange[-1],krange[0]])

plt.gca().set_aspect(1./plt.gca().get_data_ratio())
plt.colorbar()
plt.xlabel('$\sigma$')
plt.ylabel('Kernel size')
plt.title('SSE vs.original signal')
plt.show()

In [None]:
# plot a few non-randomly selected Gaussian kernels

fig,ax = plt.subplots(4,4,figsize=(10,8))

# 4 equally spaced points on the grid
sidx = np.linspace(0,len(srange)-1,4).astype(int)
kidx = np.linspace(0,len(krange)-1,4).astype(int)

for si in range(4):
  for kj in range(4):
    ax[kj,si].plot(allkernels[kidx[kj]][sidx[si]])
    ax[kj,si].set_xticks([])
    ax[kj,si].set_ylim([0,1.1])
    ax[kj,si].set_title('k=%g, $\sigma$=%.2f'%(krange[kidx[kj]],srange[sidx[si]]))
    ax[kj,si].set_aspect(1/ax[kj,si].get_data_ratio())

plt.show()

# Despeckling via median filter

In [None]:
# create the data

pnts = 1234
signal = np.mod(np.linspace(0,5,pnts)**2,5)

# add spike noise
p = int(.1*len(signal))
spiketimes = np.random.randint(0,len(signal),p)

signal[spiketimes] = 10 + 100*np.random.rand(p)

plt.plot(signal)
plt.show()

In [None]:
# try mean-smoothing
k = 15

for i in range(pnts):
  lowbnd = np.max((0,i-k))
  uppbnd = np.min((pnts,i+k))
  signal[i] = np.mean(signal[lowbnd:uppbnd])

# and plot
plt.plot(signal)
plt.title('Mean smoothing')
plt.show()

In [None]:
# find a threshold for unusually large values

plt.hist(signal,80)
plt.ylim([0,10])
plt.xlabel('Data value')
plt.ylabel('Count')
plt.title('Distribution of data values')
plt.show()

In [None]:
# threshold
thresh = 10

# find all data points above threshold
suprathreshpoints = np.where(signal>thresh)[0]
print(suprathreshpoints)
print(100*len(suprathreshpoints)/pnts)


In [None]:
# let's see what it looks like
plt.plot(signal)
plt.plot(suprathreshpoints,signal[suprathreshpoints],'ro')

# zoom in
plt.xlim([200,400])
plt.show()

In [None]:
# apply median filter

k = 3

for i in suprathreshpoints:
  lowbnd = np.max((0,i-k))
  uppbnd = np.min((pnts,i+k))
  signal[i] = np.median(signal[lowbnd:uppbnd])

plt.plot(signal)
plt.title('Median filter with k=' + str(k))
plt.show()

# Denoise these biomedical data!

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
# import data
import pandas as pd
df = pd.read_csv('EKG_signals.csv',sep=',',names=['noisy','orig'])
df

In [None]:
# plot the data
df.plot(fontsize=15)#,marker='o',linewidth=.1)
plt.xlabel('Time (a.u.)')
plt.ylabel('Amplitude (a.u.)')
plt.show()

In [None]:
# copy the noisy version
df['filtered'] = df['noisy'].copy()
print(df)

In [None]:
# median filter for low values

# threshold
thresh = 300

# find all data points above threshold
suprathreshpoints = np.where(df['noisy']<thresh)[0]

# apply median filter
k = 7
for i in suprathreshpoints:
  lowbnd = np.max((0,i-k))
  uppbnd = np.min((len(df),i+k))
  df['filtered'][i] = np.median(df['noisy'][lowbnd:uppbnd])


In [None]:
# plot the data
df[['filtered','orig']].plot()
plt.xlabel('Time (a.u.)')
plt.ylabel('Amplitude (a.u.)')
plt.show()

In [None]:
# now for a mean-smoothing filter
k = 5

for i in range(len(df)):
  lowbnd = np.max((0,i-k))
  uppbnd = np.min((len(df),i+k))
  df['filtered'][i] = np.mean(df['noisy'][lowbnd:uppbnd])


In [None]:
# plot the data
df[['filtered','orig']].plot()
plt.xlabel('Time (a.u.)')
plt.ylabel('Amplitude (a.u.)')
plt.show()

# Bonus: Highlight plot areas

In [None]:
# generate a time series
x = np.cumsum(np.random.randn(1000))

# draw line
fig,ax = plt.subplots(1)
ax.plot(x)

# create and add patch
xlim = [100,300]
ylim = ax.get_ylim()
ax.fill_between(xlim,ylim[0],ylim[1],facecolor='m',alpha=.2)
ax.set_ylim(ylim)
plt.show()

In [None]:
# between two time series
x1 = np.cumsum(np.random.randn(1000))
x2 = np.cumsum(np.random.randn(1000))

fig,ax = plt.subplots(1)
ax.plot(x1)
ax.plot(x2)

xlim = np.arange(300,601)
ax.fill_between(xlim,x1[300:601],x2[300:601],facecolor='k',alpha=.2)

plt.show()


In [None]:
# Note: As I mention in the video, ax.fill_between is easier than explicitly defininig polygons 
# for this particular task. But you may want to know how to create arbitrary polygons, hence the 
# code cells below. 

In [None]:
from matplotlib.patches import Polygon

y = np.array([ [1,1],[2,3],[3,1] ])
p = Polygon(y,facecolor='m',alpha=.3)

fig, ax = plt.subplots()
ax.add_patch(p)
ax.set_ylim([0,4])
ax.set_xlim([0,4])
plt.show()

In [None]:
# generate a time series
x = np.cumsum(np.random.randn(1000))

# draw line
fig,ax = plt.subplots(1)
ax.plot(x)

# create and add patch
xlim = [100,300]
ylim = ax.get_ylim()
y = np.array([ [xlim[0],ylim[0]],[xlim[0],ylim[1]],[xlim[1],ylim[1]],[xlim[1],ylim[0]] ])
p = Polygon(y,facecolor='m',alpha=.2)
ax.add_patch(p)

plt.show()

In [None]:
# generate a time series
xp = np.cumsum(np.random.randn(1000))
xm = -x

# draw line
fig,ax = plt.subplots(1)
ax.plot(xp)
ax.plot(xm)

# create and add patch
xlim = np.arange(300,601)
xlim = np.hstack((xlim,xlim[::-1]))
ylim = np.hstack((xp[300:601],xm[601:300:-1]))
y = np.vstack((xlim,ylim)).T
p = Polygon(y,facecolor='k',alpha=.2) # input y is Nx2
ax.add_patch(p)

plt.show()