In [None]:
from keras import Input, Model
from keras.layers import Conv3D, MaxPooling3D, Conv3DTranspose, ConvLSTM3D
import os
import cv2
import glob
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tqdm import tqdm
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import random
import shutil
from skimage.metrics import structural_similarity as compare_ssim
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    average_precision_score,
    precision_score,
    recall_score,
    f1_score
)



#Comment this out if You can use GPU instead of CPU
"""
# Set the TensorFlow session to use GPU
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
K.set_session(sess)
"""


#PARAMETERS
optical_flows = []
height, width = 180, 320
desired_no_channels = 3
batch_size = 2
max_frames = 200
max_videos = 1
max_videos_eval = 3
epochs=10
window_size=8
num_windows = max_frames // window_size
input_shape = (window_size, height, width, desired_no_channels) #num_widnows,
anomaly_label = "anomaly"


def resize_frames(frames, target_size):
    resized_frames = []
    for frame in frames:
        frame_data = cv2.imread(frame)
        if frame_data is None:
            print(f"Error loading frame: {frame}")
            continue
        resized_frame = cv2.resize(frame_data, target_size, interpolation=cv2.INTER_LINEAR)
        resized_frames.append(resized_frame)
    return resized_frames


def get_movie_chunk(path, max_frames, start_frame=0):
    frame_files = sorted(file for file in os.listdir(path) if file.endswith('.tif'))
    num_frames = len(frame_files)
    if start_frame >= num_frames:
        return None
    end_frame = min(start_frame + max_frames, num_frames)
    frame_files = frame_files[start_frame:end_frame]
    resized_frames = resize_frames([os.path.join(path, file) for file in frame_files], (320, 180))
    return np.array(resized_frames)


def get_movie(path, max_frames):
    frame_files = sorted(file for file in os.listdir(path) if file.endswith('.tif'))
    num_frames = len(frame_files)
    if num_frames > 0:
      # Duplicate frames if the number of frames is smaller than max_frames
      if num_frames < max_frames:
          duplication_factor = max_frames // num_frames
          remaining_frames = max_frames % num_frames
          duplicated_frames = frame_files * duplication_factor + frame_files[:remaining_frames]
          frame_files += duplicated_frames
      elif num_frames > max_frames:
          frame_files = frame_files[:max_frames]
      else:
          frame_files = frame_files
      resized_frames = resize_frames([os.path.join(path, file) for file in frame_files], (320, 180))
    else:
      resized_frames = []
    return np.array(resized_frames)


def get_data(paths, max_videos, start_video):
    dataset = []
    labels = []
    last_processed_video = None  # Initialize the variable to keep track of the last processed video
    videos_processed = 0  # Keep track of the number of videos processed
    current_path_index = 0  # Keep track of the current path index
    video_paths = []  # Collect all video paths from all directories
    # Collect all video paths from all directories
    for dataset_path in paths:
        video_paths.extend([os.path.join(dataset_path, video_directory) for video_directory in os.listdir(dataset_path)])
    # Find the index of the last processed video in the video paths
    if start_video:
        print(f"File path of {start_video}")
        try:
            last_processed_video_index = video_paths.index(start_video) if start_video != "" else  video_paths[0]
            current_path_index = last_processed_video_index // max_videos if start_video != "" else 0  # Calculate the current path index
            video_paths = video_paths[last_processed_video_index:]  # Adjust the video paths starting from the last processed video
        except ValueError:
            pass
    for video_path in video_paths:
        if last_processed_video is not None and video_path != last_processed_video:
            continue  # Skip the video if it's not the next video to be processed
        video = get_movie(video_path, max_frames)  # Process the entire video at once
        if video is None or len(video) == 0:
            continue  # Skip if the video is empty
        dataset.append(video)
        videos_processed += 1
        video_labels = extract_labels_from_frames(video_path, anomaly_label)
        labels.extend(video_labels)
        if videos_processed >= max_videos:
            break  # Stop processing videos once the desired number is reached
        last_processed_video = video_path
    dataset = [video for video in dataset if video.shape == (max_frames, height, width, desired_no_channels)]
    dataset = np.stack(dataset) if len(dataset) > 0 else []
    # Calculate the next start video based on the current path index
    if current_path_index + 1 < len(paths):
        next_start_video = os.path.join(paths[current_path_index + 1], os.listdir(paths[current_path_index + 1])[0])
    else:
        next_start_video = None
    return dataset, labels, next_start_video


def extract_labels_from_frames(video_path, substring_to_find):
    frame_names = [os.path.basename(frame_path) for frame_path in sorted(glob.glob(os.path.join(video_path, "*.tif")))]
    labels = [1 if substring_to_find in frame_name else 0 for frame_name in frame_names]
    return labels


def calculate_max_dimensions(paths):
    max_height, max_width, max_frames = 0, 0, 0
    for dataset_path in paths:
        for video_directory in os.listdir(dataset_path):
            video_path = os.path.join(dataset_path, video_directory)
            frame_files = [file for file in os.listdir(video_path) if file.endswith('.tif')]
            max_frames = max(max_frames, len(frame_files))
            for frame_file in frame_files:
                frame = cv2.imread(os.path.join(video_path, frame_file), cv2.IMREAD_UNCHANGED)
                if frame is None:
                    print(f"Error loading frame: {frame_file}")
                    continue
                max_height, max_width = max(max_height, frame.shape[0]), max(max_width, frame.shape[1])
    return max_height, max_width, max_frames


def generate_optical_flow(frame_files, max_height, max_width):
    # Create an empty array to store the optical flow map volume
    optical_flow_volume = np.zeros((len(frame_files), max_height, max_width, desired_no_channels), dtype=np.float32)
    print("Shape of a frame: {}".format(frame_files[0].shape))
    prev_gray = cv2.cvtColor(frame_files[0], cv2.COLOR_BGR2GRAY)
    for frame_index in range(1, len(frame_files)):
        frame = frame_files[frame_index]
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        # Store the optical flow in the optical flow map volume
        optical_flow_volume[frame_index - 1, :, :, 0] = flow[..., 0]
        optical_flow_volume[frame_index - 1, :, :, 1] = flow[..., 1]
        # Convert the flow vectors to magnitude and angle
        magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        # Normalize the magnitude to the range [0, 255]
        magnitude_normalized = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
        # Convert the angle to the hue channel in the HSV color space
        hsv = np.zeros_like(frame)
        hsv[..., 0] = angle * (180 / np.pi) / 2  # Hue channel
        hsv[..., 1] = 255  # Saturation channel
        hsv[..., 2] = magnitude_normalized  # Value channel
        # Convert HSV to BGR color representation
        rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
        # Set the third channel with RGB color values
        optical_flow_volume[frame_index - 1, :, :, 2] = rgb[..., 2]
        prev_gray = gray
    # return flow_volume_map_for_a_video
    print(f"Optical flow volume shape: {optical_flow_volume.shape}")
    return optical_flow_volume


def calculate_optical_flows(data, max_frame_height, max_frame_width):
    for video in data:
        optical_flow_video = generate_optical_flow(video, max_frame_height, max_frame_width)
        optical_flows.append(optical_flow_video)
    print(f"dataset shape: {np.array(optical_flows).shape}")
    return np.array(optical_flows)


def update_maximum_dimensions(max_frames, max_height, max_width, optical_flow_video):
    # Update the maximum dimensions
    max_frames = max(max_frames, optical_flow_video.shape[0])
    print("Max frames: {} ".format(max_frames))
    max_height = max(max_height, optical_flow_video.shape[1])
    max_width = max(max_width, optical_flow_video.shape[2])
    channels = optical_flow_video.shape[3]
    return channels, max_frames, max_height, max_width


def divide_into_windows(optical_flow_dataset, window_size):
    windowed_dataset = []
    for optical_flow_video in optical_flow_dataset:
        windows = divide_into_windows_video(optical_flow_video, window_size)
        print(f"Window shape: {windows.shape}")
        windowed_dataset.append(windows)
    return windowed_dataset


def divide_into_windows_video(video, window_size):
    num_frames = video.shape[0]
    windows = [video[i:i + window_size] for i in range(0, num_frames, window_size)]
    compatible_windows = [window for window in windows if window.shape == (window_size, height, width, desired_no_channels)]
    np_windows = np.stack(compatible_windows)
    return np_windows


def divide_data_into_validation_and_test(test_directory, validation_directory, split_ratio):
  subdirectories = os.listdir(test_directory)
  random.shuffle(subdirectories)
  n_subdirectories = len(subdirectories)
  n_validation = int(split_ratio * n_subdirectories)
  n_test = n_subdirectories - n_validation
  if not os.path.exists(validation_directory):
      os.makedirs(validation_directory)
  validation_subdirectories = subdirectories[:n_validation]
  test_subdirectories = subdirectories[n_validation:]
  for subdirectory in validation_subdirectories:
      source = os.path.join(test_directory, subdirectory)
      destination = os.path.join(validation_directory, subdirectory)
      shutil.move(source, destination)
  # Rename "Test" substring to "Validation" in validation directory
  for root, dirs, files in os.walk(validation_directory):
      for dir_name in dirs:
          if "Test" in dir_name:
              new_dir_name = dir_name.replace("Test", "Validation")
              os.rename(os.path.join(root, dir_name), os.path.join(root, new_dir_name))
  print(f"Moved {n_validation} subdirectories to the validation directory: {validation_directory}.")
  print(f"Remaining subdirectories in the test directory: {n_test}.")


# Define the model architecture
def build_lstm_autoencoder(shape):
    print("Shape in the input_data {}".format(shape))
    input_data = Input(shape=shape)
    encoded = Conv3D(filters=128, kernel_size=(10, 10, 3), activation='relu', padding='same', strides=(2, 2, 1))(input_data)
    print("Shape in the after first conv3D layer {}".format(encoded.shape))
    encoded = Conv3D(filters=64, kernel_size=(6, 6, 3), activation='relu', padding='same', strides=(2, 2, 1))(encoded)
    print("Shape in the after second conv3D layer {}".format(encoded.shape))
    encoded = ConvLSTM3D(filters=64, kernel_size=(3, 3, 3), padding='same', return_sequences=True)(encoded[:, None, ...])
    print("Shape in the after first ConvLSTM layer {}".format(encoded.shape))
    encoded = ConvLSTM3D(filters=32, kernel_size=(3, 3, 3), padding='same', return_sequences=True)(encoded)
    print("Shape in the after second ConvLSTM layer {}".format(encoded.shape))
    encoded = ConvLSTM3D(filters=64, kernel_size=(3, 3, 3), padding='same', return_sequences=True)(encoded)
    print("Shape in the after third ConvLSTM layer {}".format(encoded.shape))
    # Decoder
    decoded = Conv3DTranspose(filters=128, kernel_size=(6, 6, 3), strides=(2, 2, 1), padding='same', activation='relu')(encoded[:, 0, ...])
    print("Shape in the after first Conv3DTranspose layer {}".format(decoded.shape))
    decoded = Conv3DTranspose(filters=1, kernel_size=(10, 10, 3), strides=(2, 2, 1), padding='same', activation='relu')(decoded)
    print("Shape in the after second Conv3DTranspose layer {}".format(decoded.shape))
    decoded = Conv3D(filters=3, kernel_size=(3, 3, 3), activation='sigmoid', padding='same')(decoded)
    print("Shape in the after third Conv3DTranspose layer {}".format(decoded.shape))
    model = Model(input_data, decoded)
    model.compile(optimizer="adam", loss="mse")
    # Autoencoder model
    return model


def assign_window_labels(windowed_optical_flows, frame_labels):
    window_labels = []
    for idx, window_frames in enumerate(windowed_optical_flows):
        window_label = 0  # Initialize the window label as normal (0)
        for frame in window_frames:
            if frame_labels[idx] == 1:  # If any frame in the window is abnormal
                window_label = 1  # Set the window label as abnormal (1)
                break
        window_labels.append(window_label)
    return window_labels


def get_windowed_optical_flows_data(paths, max_videos, window_size, start_video):
  x_train, frame_labels , start_video = get_data(paths, max_videos, start_video)
  if len(x_train) == 0:
    return []
  print(f"Shape of training data: {x_train.shape}")
  print(f"Shape of one video: {np.array(x_train[0]).shape}")
  optical_flows = calculate_optical_flows(x_train, 180, 320)
  if len(optical_flows) == 0:
   return []
  windowed_optical_flows = divide_into_windows(optical_flows, window_size)
  if len(windowed_optical_flows) == 0:
    return []
  # Assign window-level labels based on frame-level labels
  window_labels = assign_window_labels(windowed_optical_flows, frame_labels)
  return windowed_optical_flows, window_labels, start_video


def train_model_incremental(model, filename, paths, max_frames, max_videos, batch_size, epochs):
    # Load the previously trained model if it exists
    if os.path.exists(f"{filename}.h5"):
        model.load_weights(f"{filename}.h5")
    start_video=""
    for epoch in tqdm(range(epochs), desc="Epochs"):
      window_size = 8
      windowed_optical_flows, _, start_video = get_windowed_optical_flows_data(paths, max_videos, window_size, start_video)
      if len(windowed_optical_flows) == 0:
        print("No more data available, break the iteration loop")
        break
      # Train the model on the current chunk of data
      model.fit(x=windowed_optical_flows, y=windowed_optical_flows, batch_size=batch_size, epochs=1)
      # Save the updated model
      model.save(f"{filename}.h5")
      del windowed_optical_flows


def train_model_incremental_with_validation(model, filename, train_paths, max_frames, max_videos, batch_size, epochs, validation_paths, max_videos_eval):
    # Load the previously trained model if it exists
    if os.path.exists(f"{filename}.h5"):
        model.load_weights(f"{filename}.h5")
    start_video=""
    start_video_evaluation = ""
    for epoch in tqdm(range(epochs), desc="Epochs"):
      window_size = 8
      windowed_optical_flows, _, start_video = get_windowed_optical_flows_data(train_paths, max_videos, window_size, start_video)
      if len(windowed_optical_flows) == 0:
        print("No more data available, break the iteration loop")
        break
      # Train the model on the current chunk of data
      model.fit(x=windowed_optical_flows, y=windowed_optical_flows, batch_size=batch_size, epochs=1)
      treshold = 0.5
      #evaluate the model on validation set
      start_video_evaluation = evaluate_model(model, filename, validation_paths, max_frames, treshold, max_videos_eval, start_video_evaluation)
      # Save the updated model
      model.save(f"{filename}.h5")
      del windowed_optical_flows


# A higher MS-SSIM value indicates better similarity between the original and reconstructed flow,
# indicating a more accurate reconstruction.
def calculate_ms_ssim_error(original_flow, reconstructed_flow):
    ms_ssim_error = compare_ssim(original_flow, reconstructed_flow, multichannel=True)
    return ms_ssim_error


def calculate_anomaly_score(original_flow, reconstructed_flow):
  ms_msim_error = calculate_ms_ssim_error(original_flow, reconstructed_flow)
  return 1 - ms_msim_error


def evaluate_model(model, filename, paths, max_frames, threshold, max_videos, start_video, readFromFile=False):
  if readFromFile:
    # Load the trained model
    if os.path.exists(f"{filename}.h5"):
        model = tf.models.load_model(f"{filename}.h5")
    else:
        print("Trained model weights not found!")
        return
    total_anomaly_scores = []
    true_labels = []
    window_size = 8
    windowed_optical_flows, window_labels, start_video = get_windowed_optical_flows_data(model, filename, paths, max_frames, max_videos, window_size, start_video)
    # Predict the reconstructed optical flows
    reconstructed_optical_flows = model.predict(windowed_optical_flows)
    # Calculate the anomaly scores for each reconstructed flow
    anomaly_scores = []
    for i in range(len(reconstructed_optical_flows)):
        original_flow = windowed_optical_flows[i]
        reconstructed_flow = reconstructed_optical_flows[i]
        anomaly_score = calculate_anomaly_score(original_flow, reconstructed_flow)
        anomaly_scores.append(anomaly_score)
    # Collect the anomaly scores and true labels for all videos
    total_anomaly_scores.extend(anomaly_scores)
    true_labels.extend(window_labels)  # Assuming `labels` is a list of true anomaly labels
    # Convert lists to numpy arrays for evaluation metrics computation
    total_anomaly_scores = np.array(total_anomaly_scores)
    true_labels = np.array(true_labels)
    # Compute precision, recall, and F1-score
    precision = precision_score(true_labels, total_anomaly_scores > threshold)
    recall = recall_score(true_labels, total_anomaly_scores > threshold)
    f1 = f1_score(true_labels, total_anomaly_scores > threshold)
    # Print the evaluation metrics
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)
    # Plot the anomaly scores
    plt.plot(total_anomaly_scores)
    plt.xlabel("Frame Index")
    plt.ylabel("Anomaly Score")
    plt.title("Anomaly Detection")
    plt.show()
    return start_video



if __name__ == "__main__":
    USCDped1_train_path = '/content/drive/MyDrive/UCSDped1/Train'
    USCDped2_train_path = '/content/drive/MyDrive/UCSDped2/Train'

    USCDped1_test_path = '/content/drive/MyDrive/UCSDped1/Test'
    USCDped2_test_path = '/content/drive/MyDrive/UCSDped2/Test'

    USCDped1_validation_path = '/content/drive/MyDrive/UCSDped1/Validation'
    USCDped2_validation_path = '/content/drive/MyDrive/UCSDped2/Validation'

    train_paths = [USCDped1_train_path, USCDped2_train_path]
    validation_paths = [USCDped1_validation_path, USCDped2_validation_path]

    lstm_autoencoder = build_lstm_autoencoder(input_shape)
    train_model_incremental_with_validation(lstm_autoencoder, "autoencoder", train_paths, max_frames, max_videos, batch_size, epochs, validation_paths, max_videos_eval)
