# CNN Classifier for Plant Waves Using Recurrence Plots
----

#### PREPROCESSING: Create Recurrence Plot Images 
In this step, we compute the recurrence plots, create recurrence plot images, reshape it to 97x97 images, and store them to disk with labels corresponding to classes and the number of sample. 

In [1]:
import numpy as np
import torch
import pickle
import os
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset
from scipy import signal
from PIL import Image
from tqdm import tqdm

%run ../src/utils/constants.py

Load raw data that has been split into train, test, and validation sets for the 81k and 191k cases.

In [2]:
path_to_pickle_train81k = DATASETS_DIR / "train-81k-stratified-without-neutral.pkl"
path_to_pickle_train191k = DATASETS_DIR / "train-191k-stratified-without-neutral.pkl"
path_to_pickle_test81k = DATASETS_DIR / "test-81k-stratified-without-neutral.pkl"
path_to_pickle_val81k = DATASETS_DIR / "val-81k-stratified-without-neutral.pkl"

In [3]:
with open(path_to_pickle_train81k, 'rb') as train_81k_file:
    train_81k = pickle.load(train_81k_file)

In [4]:
with open(path_to_pickle_train191k, 'rb') as train_191k_file:
    train_191k = pickle.load(train_191k_file)

In [5]:
with open(path_to_pickle_test81k, 'rb') as test_81k_file:
    test_81k = pickle.load(test_81k_file)

In [6]:
with open(path_to_pickle_val81k, 'rb') as val_81k_file:
    val_81k = pickle.load(val_81k_file)

While ignoring the class `Neutral`, separate wave slices and labels, and create a Tensor dataset. Adjust `for segment in train_191k` for creating a specific dataset - train, test, val. Downsample the signal wave for faster computation of the recurrence plots. Downsample factor is `10`. 

In [7]:
wav_slices = []
labels = []
downsample_factor = 10
for segment in val_81k:  # adjust this iterable for specific dataset.
    downsampled_wav_slice = segment[0][::downsample_factor]
    wav_slices.append(downsampled_wav_slice)
    labels.append(segment[1])

In [8]:
print(wav_slices[0].shape)

torch.Size([1000])


In [9]:
wav_slices = torch.tensor(np.array(wav_slices), dtype=torch.float32)
labels = torch.tensor(np.array(labels), dtype=torch.long)
dataset = TensorDataset(wav_slices, labels)
print(wav_slices)

tensor([[ 2.3000e+01,  5.4000e+01,  6.1000e+01,  ..., -5.2000e+01,
         -1.0000e+02, -1.4200e+02],
        [-1.6400e+03, -2.4120e+03, -3.1230e+03,  ..., -2.6430e+03,
         -2.2570e+03, -1.5440e+03],
        [-4.0000e+00,  1.9000e+01,  3.9000e+01,  ..., -5.9000e+01,
         -7.1000e+01, -7.3000e+01],
        ...,
        [-1.3300e+02, -1.8600e+02, -2.1100e+02,  ...,  7.7000e+01,
          1.3700e+02,  1.7300e+02],
        [-1.4300e+03, -2.1830e+03, -2.4600e+03,  ...,  1.7590e+03,
          2.2750e+03,  2.3290e+03],
        [ 0.0000e+00, -2.0000e+00, -4.0000e+00,  ..., -1.0000e+01,
         -5.0000e+00, -1.0000e+00]])


Normalize the samples using per-sample normalization.

In [10]:
data_tensor, labels_tensor = dataset.tensors
mean = torch.mean(data_tensor, dim=1, keepdim=True)
std_dev = torch.std(data_tensor, dim=1, keepdim=True)
standardized_data = (data_tensor - mean) / (std_dev + 0.00000001)  # smoothing term to prevent zero division

dataset = TensorDataset(standardized_data, labels_tensor)

Compute the spectrograms and save the plots to disk as `.png`. 

In [11]:
train_81k_folder = "train-81k"
train_191k_folder = "train-191k"  # for now, ignore this case.
test_81k_folder = "test-81k"
val_81k_folder = "val-81k"

In [12]:
if not os.path.exists(RECURRENCEPLOT_IMAGE_DIR / train_81k_folder):
    os.makedirs(RECURRENCEPLOT_IMAGE_DIR / train_81k_folder)

if not os.path.exists(RECURRENCEPLOT_IMAGE_DIR / test_81k_folder):
    os.makedirs(RECURRENCEPLOT_IMAGE_DIR / test_81k_folder)

if not os.path.exists(RECURRENCEPLOT_IMAGE_DIR / val_81k_folder):
    os.makedirs(RECURRENCEPLOT_IMAGE_DIR / val_81k_folder)

Compute the recurrence plot by (1) determining element-wise differences between values of the time series, looking at the absolute values, and (2) by creating a binary matrix subject to a threshold. 

In [13]:
def recurrence_plot(time_series, threshold=0.1):
    distance_matrix = np.abs(np.subtract.outer(time_series, time_series))  # element-wise differences
    recurrence_matrix = (distance_matrix < threshold).astype(int)  # binary

    return recurrence_matrix

Adjust the `train_81k_folder` to save the created recurrence plots.

In [14]:
count = 0 
for wav_slice, label in tqdm(dataset):
    rec_matrix = recurrence_plot(wav_slice, threshold=0.15)

    file_name = f"{count}_mfcc_class_{label}.png"
    save_path = os.path.join(RECURRENCEPLOT_IMAGE_DIR / val_81k_folder, file_name)  # Adjust train_191k_folder

    if not os.path.exists(save_path):
        plt.imshow(rec_matrix, cmap='binary_r', interpolation='none')
        plt.axis("off")
        plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
        plt.close()
    
    count += 1

100%|██████████████████████████████████████| 2600/2600 [04:12<00:00, 10.28it/s]
