<a href="https://colab.research.google.com/github/ormorteey/STA-237-Time-Series/blob/main/EDA_%26_Scalogram_STA_237_Project_%5BTime_Series_Anomaly_Detection_Using_Computer_Vision%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
import pywt
import os

In [3]:
%load_ext rpy2.ipython

In [4]:
%%capture
%%R
# Signify cell is an R cell
# Silence Output

h = install.packages("pacman")
h = library(pacman)

In [5]:
%%R
# load packages for data manipulation in R
p_load("tidyverse", "googledrive")

In [None]:
# Silence Output
%%capture
%%R

# killing auth request
drive_deauth()
drive_user()

# retrieving fulll url ID of files
public_file = drive_get(as_id(c("10i8tM37aqHvD-YH2JK1qZAXzYdawIsgh","1eJ4uXiFqGqsXJ2ut5vex_Lg3BLlUAPjT","1B6pbF90ryhJ-kpu0nQ3FKt6RcSavgCMS")) )

# download the files
save_output = 1:3 %>% map(~drive_download(public_file[.,], overwrite = T))

In [None]:
# pulls brian's repo to see latest content from brian
!rm -r /content/time-series-image-embedding/
!git clone https://github.com/briancknight/time-series-image-embedding.git


In [None]:
# file location for HDF5 & csv datasets
airbus_train_path = 'dftrain.h5'
airbus_valid_path = 'dfvalid.h5'
airbus_valid_groundtruth_path = 'dfvalid_groundtruth.csv'
# read hdf files: training data and validation data and ground truth [labels]
train_df = pd.read_hdf(airbus_train_path)
valid_df = pd.read_hdf(airbus_valid_path)
valid_groundtruth_df = pd.read_csv(airbus_valid_groundtruth_path)


In [None]:
# Data loading validation & shape check
print(valid_groundtruth_df.head())
print(train_df.head())
print(valid_df.tail())
print(valid_groundtruth_df.shape)
print(train_df.shape)
print(valid_df.shape)

In [None]:
def prepare_dataset(df, rows = 10, cols = 10):
  
  # cast df as numpy array
  df_tensor = df.to_numpy()
  # reshape df to tensor to rows of df with 120 series with length 512
  df_tensor = np.reshape(df_tensor, (df.shape[0],120, 512))
  # diagnostics: shape of dataframe
  print(df.shape)
  # diagnostics: shape of df tensor
  print(df_tensor.shape)
  # get continuous wavelet transform scales
  SC_scales = np.array([2**(i/4) for i in range(1,65)])

  print(SC_scales.shape)

  # create empty scalograms
  df_scalograms = np.empty([rows, cols, 64, 512])
  # fill up scalogram
  for ii in np.arange(rows):
    for jj in np.arange(cols):
      df_scalograms[ii,jj,:,:] =  get_scalogram(df_tensor[ii,jj], SC_scales)
  reshape_list = df_scalograms.shape
  print(df_scalograms.shape)
  df_scalograms = df_scalograms.reshape(reshape_list[0] * reshape_list[1], reshape_list[2], reshape_list[3])

  return(df_scalograms)


In [None]:
def get_scalogram(x, SC_scales):
  cwtmatr, freqs = pywt.cwt(x, SC_scales, 'mexh')
  return(cwtmatr)

def scalogram_plotter(x, show = False):
  plt.imshow(x, cmap = "jet", aspect = "auto")
  if show == True:
      plt.show()

def prepare_labels(label_Arr, rows = None, cols = None ):

  label_Arr = label_Arr.reshape([rows, 1])
  label_Arr = label_Arr.reshape([rows, 1])
  label_tensor = np.tile(label_Arr, cols).reshape([rows * cols, 1])
  return(label_tensor)

In [None]:
# takes approximately 5 minutes for compute

rows, cols = 20, 10
validation_examples = prepare_dataset(valid_df, rows, cols)
print(validation_examples.shape)
train_examples = prepare_dataset(train_df, rows, cols)
print(train_examples.shape)

In [None]:
scalogram_plotter(validation_examples[1,:,:],  True)
scalogram_plotter(validation_examples[0,:,:],  True)
# train examples
scalogram_plotter(train_examples[1,:,:],  True)
scalogram_plotter(train_examples[0,:,:],  True)

In [None]:
# plot more random scalograms

# plt.figure(figsize= (10,10))
# rng = np.random.default_rng(12345)
# counter_list = [rng.integers(low=0, high=valid_tensor.shape[0]) for ii in np.arange(2)]

# for jj in np.arange(len(counter_list)):
#   ax = plt.subplot(2,1, jj + 1)
#   scalogram_plotter(valid_tensor[jj,:,:],  True)
#   plt.axis("off")

In [None]:
validation_labels = valid_groundtruth_df['anomaly'].head(validation_examples.shape[0]).to_numpy()
validation_labels.shape

In [None]:
#create validation_labels
validation_labels = valid_groundtruth_df['anomaly'].head(rows).to_numpy()
validation_labels = prepare_labels(validation_labels[:rows], rows, cols )
print(validation_labels.shape)

# create train labels
train_labels = np.ones([train_examples.shape[0], 1])
train_labels.shape

In [None]:
# create dataset to serve the model
train_dataset = tf.data.Dataset.from_tensor_slices((train_examples, train_labels))
validation_dataset = tf.data.Dataset.from_tensor_slices((validation_examples, validation_labels))

In [None]:
print(train_dataset)
print(validation_dataset)

In [None]:
# Configure dataset batch size and shuffler
BATCH_SIZE = 120
# SHUFFLE_BUFFER_SIZE = 100

train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
train_dataset = train_dataset.batch(BATCH_SIZE)
validation_dataset = validation_dataset.batch(BATCH_SIZE)

In [None]:
# Configuring dataset performance
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.cache().prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.cache().prefetch(buffer_size=AUTOTUNE)

In [1]:
import tensorflow as tf

num_classes = 1

model = tf.keras.Sequential([
                         
  tf.keras.layers.InputLayer(input_shape = (64,512,1)),                          
  tf.keras.layers.AveragePooling2D(pool_size=(1, 8), strides=(1, 8), padding='valid'),
  tf.keras.layers.Conv2D(64, kernel_size=2 , strides=2, activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
  tf.keras.layers.Conv2D(128, kernel_size=(2,2), strides=(2,2), activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
  tf.keras.layers.Dense(327768,  activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
  tf.keras.layers.Dense(300, activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
  tf.keras.layers.Dense(327768, activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
  tf.keras.layers.Conv2DTranspose(128, kernel_size=1, strides=1, activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
  tf.keras.layers.Conv2DTranspose(64, kernel_size=2 , strides=2, activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
  tf.keras.layers.Conv2DTranspose(1, kernel_size=2 , strides=2, activation=tf.keras.layers.LeakyReLU(alpha=0.3))

])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 average_pooling2d (AverageP  (None, 64, 64, 1)        0         
 ooling2D)                                                       
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 64)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 16, 128)       32896     
                                                                 
 dense (Dense)               (None, 16, 16, 327768)    42282072  
                                                                 
 dense_1 (Dense)             (None, 16, 16, 300)       98330700  
                                                                 
 dense_2 (Dense)             (None, 16, 16, 327768)    98658168  
                                                        

In [None]:
# num_classes = 2

# model = tf.keras.Sequential([
#   tf.keras.layers.Rescaling(1./255),
#   tf.keras.layers.Conv2D(32, 3, activation='relu'),
#   tf.keras.layers.MaxPooling2D(),
#   tf.keras.layers.Conv2D(32, 3, activation='relu'),
#   tf.keras.layers.MaxPooling2D(),
#   tf.keras.layers.Conv2D(32, 3, activation='relu'),
#   tf.keras.layers.MaxPooling2D(),
#   tf.keras.layers.Flatten(),
#   tf.keras.layers.Dense(128, activation='relu'),
#   tf.keras.layers.Dense(num_classes)
# ])

In [None]:
model.compile(
  optimizer='adam',
  loss=tf.losses.BinaryCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [None]:
model.fit(
  train_dataset,
  validation_data=validation_dataset,
  epochs=50
)

In [None]:
results = model.evaluate(validation_dataset)
print( results)