# Cross correlation exercises

In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy
import scipy.signal

## Exercise - Finding the signals in the noise

Here are another 4 examples of 4 different signals and corresponding datasets. Can you tell which of these datasets have signals in them, and where in the dataset those signals are?

In [2]:
# Let's generate the datasets. DONT CHANGE THIS.
num_data_samples = 4096
sample_rate = 32
times = np.arange(num_data_samples) / sample_rate

gaussian_width = 10
signal_inst_frequency = 2. + 0.*np.sin(2 * np.pi * 0.1 * times)
phases = [0]
for i in range(1,len(times)):
    phases.append(phases[-1] + 2 * np.pi * signal_inst_frequency[i] * 1./sample_rate)
signal_1 = np.sin(phases)
gaussian = np.exp( - (times - 64)**2 / (2 * gaussian_width))
signal_1 = signal_1 * gaussian
signal_1 = signal_1[48*sample_rate:80*sample_rate]

gaussian_width = 1.
signal_inst_frequency = 2. + 1.*np.sin(2 * np.pi * 0.1 * times)
phases = [0]
for i in range(1,len(times)):
    phases.append(phases[-1] + 2 * np.pi * signal_inst_frequency[i] * 1./sample_rate)
signal_2 = np.sin(phases)
gaussian = np.exp( - (times - 64)**2 / (2 * gaussian_width))
signal_2 = signal_2 * gaussian
signal_2 = signal_2[48*sample_rate:80*sample_rate]

gaussian_width = 8.
signal_inst_frequency = 2. + 4.*np.sin(2 * np.pi * 0.1 * times)
phases = [0]
for i in range(1,len(times)):
    phases.append(phases[-1] + 2 * np.pi * signal_inst_frequency[i] * 1./sample_rate)
signal_3 = np.sin(phases)
gaussian = np.exp( - (times - 64)**2 / (2 * gaussian_width))
signal_3 = signal_3 * gaussian
signal_3 = signal_3[48*sample_rate:80*sample_rate]

gaussian_width = 10.
signal_inst_frequency = 2. + 1.*np.sin(2 * np.pi * 0.5 * times)
phases = [0]
for i in range(1,len(times)):
    phases.append(phases[-1] + 2 * np.pi * signal_inst_frequency[i] * 1./sample_rate)
signal_4 = np.sin(phases)
gaussian = np.exp( - (times - 64)**2 / (2 * gaussian_width))
signal_4 = signal_4 * gaussian
signal_4 = signal_4[48*sample_rate:80*sample_rate]

noise = np.random.normal(size=[num_data_samples])
rndi = np.random.randint(0,sample_rate*96)
data_1 = noise
data_1[rndi:rndi+len(signal_1)] += signal_1
noise = np.random.normal(size=[num_data_samples])*0.5
rndi = np.random.randint(0,sample_rate*30)
data_2 = noise
data_2[rndi:rndi+len(signal_2)] += signal_2
rndi = np.random.randint(sample_rate*60,sample_rate*90)
data_2[rndi:rndi+len(signal_2)] += signal_2
noise = np.random.normal(size=[num_data_samples])
rndi = np.random.randint(0,sample_rate*96)
data_3 = noise
data_3[rndi:rndi+len(signal_3)] -= signal_3
noise = np.random.normal(size=[num_data_samples]) * 0.2
rndi = np.random.randint(0,sample_rate*96)
data_4 = noise
data_4[rndi:rndi+len(signal_4)] += signal_4


In [3]:
# SUMMARIZING THE DATA SETS - Use these when tackling this exercise
# DATA SET 1
signal_1 = signal_1
data_1 = data_1

# DATA SET 2
signal_2 = signal_2
data_2 = data_2

# DATA SET 3
signal_3 = signal_3
data_3 = data_3

# DATA SET 4
signal_4 = signal_4
data_4 = data_4


### Exercise extension - autocorrelation

For each of the 4 signals, cross-correlate the signal with itself (a cross-correlation between a signal and itself is called an "auto-correlation"). This is what you are looking for in the noisy data. How does this change for the different signals and why?

## Exercise - Which signal is in the noise?

In these four datasets one of the signals is hidden in the noise, but which signal is in which dataset??

In [4]:
import random
noise = np.random.normal(size=[num_data_samples])*0.5
rndi = np.random.randint(0,sample_rate*96)
data_11 = noise
data_11[rndi:rndi+len(signal_1)] += random.choice([signal_1, signal_2, signal_3, signal_4])

noise = np.random.normal(size=[num_data_samples])*0.5
rndi = np.random.randint(0,sample_rate*96)
data_12 = noise
data_12[rndi:rndi+len(signal_1)] += random.choice([signal_1, signal_2, signal_3, signal_4])

noise = np.random.normal(size=[num_data_samples])*0.5
rndi = np.random.randint(0,sample_rate*96)
data_13 = noise
data_13[rndi:rndi+len(signal_1)] += random.choice([signal_1, signal_2, signal_3, signal_4])

noise = np.random.normal(size=[num_data_samples])*0.5
rndi = np.random.randint(0,sample_rate*96)
data_14 = noise
data_14[rndi:rndi+len(signal_1)] += random.choice([signal_1, signal_2, signal_3, signal_4])



In [6]:
# SUMMARY - There are 4 datasets below, they each contain one of the signals, but which one contains which signal?
#           The signals are the same as above.
data_11
data_12
data_13
data_14

array([ 0.65623137,  0.28081833,  0.281017  , ...,  0.6814975 ,
       -0.31098926, -0.32647514])