## Setup

NOTE: All dependencies are within a conda environment to ensure reproducibility. To install all dependencies: pip install -r requirements.txt

In [None]:
import tensorflow.compat.v1 as tf
#Lets see if tensorflow finds the GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# import tensorflow as tf

In [None]:
# #Lets see if it works
tf.ones(1) + tf.ones(1)

In [None]:
import numpy as np # for working with arrays and matrices
import pandas as pd # for data manipulation and analysis
import matplotlib.pyplot as plt # for data visualization
import seaborn as sns # for data visualization
import time # for time-related functions
import random # for random number generation
import cv2 # for computer vision and image processing tasks
import datetime # for saving date and time information
import csv # for loading csv files


import h5py # for working with HDF5 (Hierarchical Data Format) files
import boto3 # for working with Amazon Web Services (AWS)
from pynwb import NWBHDF5IO # for working with Neurodata Without Border (NWB) files
import fsspec 
from fsspec.implementations.cached import CachingFileSystem # library used for working with various file systems in Python.
import requests 
import aiohttp # libraries which are used for making HTTP requests in Python.
import os # OS module provides various operating system-related functions to the code
import pickle
import re


# used for splitting data into training and testing sets in Python.
from sklearn.model_selection import train_test_split 


from tensorflow.keras import datasets, layers, models
from tensorflow.keras.utils import plot_model
import imgaug.augmenters as iaa

import importlib

# # Loading functions
from load_calcium_video import load_video_data, load_one_video
from pixel_values_normalization import normalize_video
from align_behavior_to_calcium import align_files_old_labels, align_files_new_labels
from class_balance import check_class_imbalance_old, check_class_imbalance_new, check_class_imbalance_old_merged
from model_architecture import construct_model
from preprocessing_model import model_preprocessing
from run_model import model_execution
# from save_model_info import save_training_info
from plots import plot_first_frames, plot_random_frames, plot_image_pixel_values
from send_email_when_code_is_run import send_email
import run_k_fold_model


from urllib.parse import urlparse
from ann_visualizer.visualize import ann_viz
import sys
sys.path.append('/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3')
from nwb_data_generator import NWBDataGeneratorTime

# sklearn
from sklearn.model_selection import KFold
import h5py



In [None]:
comment =  "single video, training and testing, 5-fold, accuracy, loss, cm" #"k-fold-cross validation, one video, old (25) labels, new architecture"

# what your experiment's ID number?
experiment_ID = '4.3'

# which animal data files are you using?
data_file = 'Animal3Learnday11'

# What is the experiment's name?
experiment_name = str(data_file)+"_"+str(experiment_ID)

# What is the train-test split strategy?
train_test_split_strategy = "k-fold"

# What is the name of you model?
name = 'BPNN_V3'

# What is the model's version?
model_version = str(name)+'_1'

# Which labels am I using? Old or New?
labels_type = 'old' # or 'old'



# will you merge labels or not?
merge_labels = False

# Shuffle data? This to find chance performance
shuffled_labels = True

# Which architecture am I using 
architecture_type = 'standard' # or 'old'

# am i analysing multiple videos or only one?
multiple_videos = False 

# store the initiation variables:
%store experiment_ID
%store data_file
%store experiment_name
%store train_test_split_strategy
%store name
%store model_version
%store labels_type
%store merge_labels
%store shuffled_labels
%store architecture_type
%store multiple_videos

In [None]:
# # Define the name of the output directory
# output_dir = "output"

# # Check if the output directory already exists
# if not os.path.exists(output_dir):
#     # Create the output directory
#     os.mkdir(output_dir)

#     # Create the balance, accuracy, loss, and cm directories inside the output directory
#     os.mkdir(os.path.join(output_dir, "balance"))
#     os.mkdir(os.path.join(output_dir, "accuracy"))
#     os.mkdir(os.path.join(output_dir, "loss"))
#     os.mkdir(os.path.join(output_dir, "cm"))
#     os.mkdir(os.path.join(output_dir, "architecture"))
#     os.mkdir(os.path.join(output_dir, "pickles"))
# else:
#     print(f"The directory {output_dir} already exists.")

# create output directory
output_dir = str(experiment_ID)+'_output'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    os.mkdir(os.path.join(output_dir, "balance"))
    os.mkdir(os.path.join(output_dir, "accuracy"))
    os.mkdir(os.path.join(output_dir, "loss"))
    os.mkdir(os.path.join(output_dir, "cm"))
    os.mkdir(os.path.join(output_dir, "architecture"))
    os.mkdir(os.path.join(output_dir, "pickles"))
    
%store output_dir

### Loading Calcium Videos

In [None]:
video_name_list = []
video_data_list = []

if multiple_videos == True:
    # paths to videos
    video_paths = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211025_184906_animal3learnday8.nwb", 
                   "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211026_142935_animal3learnday9.nwb", 
                   "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211027_165052_animal3learnday10.nwb"]
    # load the CSV file with the FOV information
    fov_info = pd.read_csv('/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/aligned_videos_animal3.csv')
    images = load_video_data(video_paths, fov_info, video_name_list, video_data_list)

else:
    # change the path if you're using another video other than animal3learnday11
    # video_path = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211028_174510_animal2learnday11.nwb"]    
    video_path = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211028_181307_animal3learnday11.nwb"]
    images = load_one_video(video_path, video_name_list, video_data_list)

In [None]:
images[0].shape

In [None]:
# Check if the video has been loaded correctly
images[:1]

In [None]:
# Determine the size of the calcium video dataset
num_of_frames = images.shape[0]
img_height = images.shape[1]
img_width = images.shape[2]
print("The number of video frames is ", num_of_frames, " and the frame dimensions (height x width) are: ", img_height, "X", img_width)

### Load Labels

In [None]:
# set this depending on how many videos you're reading
num_of_videos = 1


if multiple_videos == False:
    
    # bonsai_paths = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/tmaze_2021-10-28T17_45_15.csv"]
    bonsai_paths = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/tmaze_2021-10-28T18_13_23.csv"]
    if labels_type == 'old':
        
        # behavior_paths = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211028_174510_animal2learnday11.h5"]
        
        behavior_paths = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211028_181307_animal3learnday11.h5"]
        df_new_annotations, df_new_annotations_check = align_files_old_labels(bonsai_paths, behavior_paths, num_of_videos, merge_labels)
    else:
        h5_path = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/behavior_segmentation_arrowmaze.h5"
        df_new_annotations, df_new_annotations_check = align_files_new_labels(bonsai_paths, num_of_videos, h5_path)


else:
    bonsai_paths = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/tmaze_2021-10-25T18_48_49.csv", 
               "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/tmaze_2021-10-26T14_29_27.csv", 
               "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/tmaze_2021-10-27T16_50_53.csv"]
    if labels_type == 'old':
        behavior_paths = ["/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211025_184906_animal3learnday8.h5", 
                       "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211026_142935_animal3learnday9.h5", 
                       "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/20211027_165052_animal3learnday10.h5"]
        df_new_annotations, df_new_annotations_check = align_files_old_labels(bonsai_paths, behavior_paths, num_of_videos, merge_labels)
    else:
        h5_path = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/data/behavior_segmentation_arrowmaze.h5"
        df_new_annotations, df_new_annotations_check = align_files_new_labels(bonsai_paths, num_of_videos, h5_path)

In [None]:
df_new_annotations_check#df_new_annotations_check

In [None]:
df_new_annotations_check.drop_duplicates()

In [None]:
df_new_annotations

### Aligning Behavior with Calcium video

In [None]:
df_new_annotations = df_new_annotations.reset_index(drop=True)
df_new_annotations_unique = df_new_annotations['state_id'].unique()

In [None]:
df_new_annotations = df_new_annotations['state_id']

In [None]:
df_new_annotations_unique

In [None]:
# labels_number = len()'all' # can be also 3 (merged) or 6 (new)

In [None]:
no_of_labels = len(df_new_annotations_unique)
%store no_of_labels

In [None]:
# class_counts = pd.value_counts(df_new_annotations)
# total_counts = class_counts[0] + class_counts[1] + class_counts[2] + class_counts[3] + class_counts[4] + class_counts[5]

In [None]:
# total_counts

In [None]:
# class_percents = pd.value_counts(df_new_annotations, normalize=True) * 100

In [None]:
# class_percents

In [None]:
save_dir = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/"+str(output_dir)+"/balance"

In [None]:
if labels_type == 'new':
    class_counts, total_counts = check_class_imbalance_new(df_new_annotations, experiment_ID, save_dir, df_new_annotations_unique, df_new_annotations_check, no_of_labels, data_file)

elif merge_labels == True:
    names_of_labels = 'Main Corr', 'Left Corr', 'Right Corr'
    class_counts, total_counts = check_class_imbalance_old_merged(df_new_annotations, experiment_ID, save_dir, df_new_annotations_unique, df_new_annotations_check, no_of_labels, names_of_labels, data_file)

else:
    class_counts, total_counts = check_class_imbalance_old(df_new_annotations, experiment_ID, save_dir, df_new_annotations_unique, df_new_annotations_check, no_of_labels, data_file)

In [None]:
labels = df_new_annotations

#### Data Verification

In [None]:
# train_images, val_images, train_labels, val_labels, num_classes = model_preprocessing(train_images, val_images, train_labels, val_labels, df_new_annotations_unique)
vmin = images.min()
vmax = images.max()
plot_first_frames(images, labels, vmin, vmax, data_file)
plot_random_frames(images, labels, vmin, vmax, data_file)

In [None]:
labels

### Preparing for K-fold

In [None]:
images, labels, num_classes = model_preprocessing(images, labels, df_new_annotations_unique)

In [None]:
def start_model(params, is_basic_BPNN):
        # start clock
        start_time = time.time()


        # Initialize lists to store the training and validation loss and accuracy values for each fold
        train_loss_all = []
        val_loss_all = []
        train_acc_all = []
        val_acc_all = []
        average_score_list = []

        
        if is_basic_BPNN == True:
            train_loss_all, val_loss_all, train_acc_all, val_acc_all, average_score_list, conf_matrices, f1_score_val_list = run_k_fold_model.run_k_fold_basic(params,
                                                                                                      train_loss_all,
                                                                                                      val_loss_all,
                                                                                                      train_acc_all,
                                                                                                      val_acc_all,
                                                                                                      average_score_list,
                                                                                                      experiment_ID)
            
            
                
        

            # create pickles directory
            dir_name_pickles = os.path.join(output_dir, 'pickles')
            if not os.path.exists(dir_name_pickles):
                os.mkdir(dir_name_pickles)

            # save pickle files
            with open(os.path.join(dir_name_pickles, 'train_loss_all.pkl'), 'wb') as f:
                pickle.dump(train_loss_all, f)

            with open(os.path.join(dir_name_pickles, 'val_loss_all.pkl'), 'wb') as f:
                pickle.dump(val_loss_all, f)

            with open(os.path.join(dir_name_pickles, 'train_acc_all.pkl'), 'wb') as f:
                pickle.dump(train_acc_all, f)

            with open(os.path.join(dir_name_pickles, 'val_acc_all.pkl'), 'wb') as f:
                pickle.dump(val_acc_all, f)

            with open(os.path.join(dir_name_pickles, 'average_score_list.pkl'), 'wb') as f:
                pickle.dump(average_score_list, f)

            with open(os.path.join(dir_name_pickles, 'conf_matrices.pkl'), 'wb') as f:
                pickle.dump(conf_matrices, f)

                
            model_acc_dir = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/output/accuracy"
            model_loss_dir = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/output/loss"
            
                 
            
        else:
            train_loss_all, val_loss_all, train_acc_all, val_acc_all, average_score_list, conf_matrices, f1_score_val_list = run_k_fold_model.run_k_fold(params,
                                                                                                      train_loss_all,
                                                                                                      val_loss_all,
                                                                                                      train_acc_all,
                                                                                                      val_acc_all,
                                                                                                      average_score_list,
                                                                                                      experiment_ID)

        total_accuracy_score = np.mean(average_score_list)

        # end clock
        end_time = time.time()
        execution_time = end_time - start_time
        hours, remainder = divmod(execution_time, 3600)
        minutes, seconds = divmod(remainder, 60)
        print(f"Execution time: {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")
        print("Total average accuracy score: {:.3f}".format(total_accuracy_score))
        
        return train_loss_all, val_loss_all, train_acc_all, val_acc_all, average_score_list, conf_matrices, f1_score_val_list

In [None]:
# Training Parameters

# channel dimensions
channel_dimension = 1 # 3 or 5 if you're including the time dimension
input_shape = (img_height, img_width, channel_dimension)

# model type
is_basic_BPNN = True

# set number of folds
num_folds = 2 # 10

# do you want shuffled labels?
shuffle = False

# number of epochs
epochs = 1

# names of unique behaviours
df_new_annotations_names = df_new_annotations_check['state_name'].unique()

# number of unique behaviour labels
no_of_behaviors = len(df_new_annotations_names)

In [None]:
if is_basic_BPNN == True:
    
    input_shape = (img_height, img_width, channel_dimension)
    model = construct_model(input_shape, num_classes, name)

    params = {
        'images': images,
        'labels': labels,
        'number_of_folds': num_folds,
        'shuffle_data': shuffle,
        'input_shape': input_shape,
        'number_of_classes': num_classes,
        'model_name': name,
        'epochs': epochs,
        'behaviours': no_of_behaviors, # <- 
        'df_new_annotations': df_new_annotations,
        'unique_annotations': df_new_annotations_unique,
        'check_annotations': df_new_annotations_check,
        'label_names': df_new_annotations_names, # <- 
        'output_directory': output_dir
    }
    
    train_loss_all, val_loss_all, train_acc_all, val_acc_all, average_score_list, conf_matrices, f1_score_val_list = start_model(params, is_basic_BPNN)



In [None]:
f1_score_mean = np.mean(f1_score_val_list)

In [None]:
f1_score_mean

In [None]:
val_acc_all

In [None]:
save_dir = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/"+str(output_dir)

In [None]:
%store train_loss_all
%store val_loss_all
%store train_acc_all
%store val_acc_all
%store f1_score_val_list
%store f1_score_mean
%store epochs
%store total_accuracy_score
%store num_folds
%store conf_matrices
%store num_folds
%store no_of_labels

In [None]:
# %store model
# %store history
%store name
%store comment
%store save_dir

### Starting K-fold

In [None]:



if labels_type == 'old':

    no_of_behaviors = ['Main Corr', 'Left Corr', 'Right Corr']
    
    if shuffled_labels == False:

        params = {
                'images': images,
                'labels': labels,
                'number_of_folds': num_folds,
                'shuffle_data': shuffle,
                'input_shape': input_shape,
                'number_of_classes': num_classes,
                'model_name': name,
                'epochs': epochs,
                'behaviours': no_of_behaviors,
                'df_new_annotations': df_new_annotations,
                'unique_annotations': df_new_annotations_unique,
                'check_annotations': df_new_annotations_check
            }

        train_loss_all, val_loss_all, train_acc_all, val_acc_all, average_score_list, conf_matrices = start_model(params)

    else:

        # shuffled_labels = np.random.permutation(labels)

        shuffled_labels = np.roll(labels, 15000, axis=0)

        params = {
            'images': images,
            'labels': shuffled_labels,
            'number_of_folds': num_folds,
            'shuffle_data': shuffle,
            'input_shape': input_shape,
            'number_of_classes': num_classes,
            'model_name': name,
            'epochs': epochs,
            'behaviours': no_of_behaviors,
            'df_new_annotations': df_new_annotations,
            'unique_annotations': df_new_annotations_unique,
            'check_annotations': df_new_annotations_check
        }

        train_loss_all, val_loss_all, train_acc_all, val_acc_all, average_score_list, conf_matrices = start_model(params)

        
else:
    
    no_of_behaviors = ['Grooming', 'Immobile', 'Still', 'Moving', 'Right Turn', 'Left Turn']
    
    if shuffled_labels == False:

        params = {
                'images': images,
                'labels': labels,
                'number_of_folds': num_folds,
                'shuffle_data': shuffle,
                'input_shape': input_shape,
                'number_of_classes': num_classes,
                'model_name': name,
                'epochs': epochs,
                'behaviours': no_of_behaviors,
                'df_new_annotations': df_new_annotations,
                'unique_annotations': df_new_annotations_unique,
                'check_annotations': df_new_annotations_check
            }

        train_loss_all, val_loss_all, train_acc_all, val_acc_all, average_score_list, conf_matrices = start_model(params)

    else:
        
        
        no_of_behaviors = ['Grooming', 'Immobile', 'Still', 'Moving', 'Right Turn', 'Left Turn']

        # shuffled_labels = np.random.permutation(labels)

        shuffled_labels = np.roll(labels, 15000, axis=0)

        params = {
            'images': images,
            'labels': shuffled_labels,
            'number_of_folds': num_folds,
            'shuffle_data': shuffle,
            'input_shape': input_shape,
            'number_of_classes': num_classes,
            'model_name': name,
            'epochs': epochs,
            'behaviours': no_of_behaviors,
            'df_new_annotations': df_new_annotations,
            'unique_annotations': df_new_annotations_unique,
            'check_annotations': df_new_annotations_check
        }

        train_loss_all, val_loss_all, train_acc_all, val_acc_all, average_score_list, conf_matrices = start_model(params)


In [None]:
# model = construct_model(input_shape, num_classes, name)
# Plot the model's architecture to a file
# plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
save_dir = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/output/"

In [None]:
# average_score = np.mean(val_acc_all)

In [None]:
# average_score

In [None]:
# # Plot training and validation accuracy for each fold
# plt.figure(figsize=(4, 3))
# for i in range(num_folds):
#     plt.plot(train_acc_all[i], label=f'Train Acc Fold {i+1}')
#     plt.plot(val_acc_all[i], label=f'Val Acc Fold {i+1}')
# plt.title('Training and Validation Accuracy')
# plt.xlabel('Epoch')
# plt.ylabel('Accuracy')
# plt.legend()
# plt.savefig('Training-and-Validation-Accuracy.svg', bbox_inches='tight', dpi=300)
# plt.show()

In [None]:
# # Plot training and validation loss for each fold
# for i in range(num_folds):
#     plt.plot(train_loss_all[i], label=f'Train Loss Fold {i+1}')
#     plt.plot(val_loss_all[i], label=f'Val Loss Fold {i+1}')
# plt.title('Training and Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()
# plt.savefig('Training-and-Validation-Loss.svg', bbox_inches='tight', dpi=300)
# plt.show()

In [None]:
# SAVE ALL HISTORIES IN PICKLE AND LOAD FROM THE OTHER PIPELINE

In [None]:
# # Save the history object to a pickle file
# with open('All_histories.pkl', 'wb') as f:
#     pickle.dump(all_histories.history, f)

## Method without K-fold

### Data Pre-processing

In [None]:
# # Split the data into training and validation sets
split_index = int(0.2 * len(images))  # Index to split data

# images = np.concatenate([images, images, images], axis=-1)

val_images, train_images = images[:split_index], images[split_index:]
val_labels, train_labels = labels[:split_index], labels[split_index:]

In [None]:
check_distribution_among_datasets(val_labels, experiment_ID, save_dir, dataset_type = 'Validation_set')
check_distribution_among_datasets(train_labels, experiment_ID, save_dir, dataset_type = 'Training_set')

In [None]:
train_images, val_images, train_labels, val_labels, num_classes = model_preprocessing(train_images, val_images, train_labels, val_labels, df_new_annotations_unique)

In [None]:
# Split the data into training and validation sets
# train_images, val_images, train_labels, val_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

In [None]:
vmin = 0
vmax = 0.20

In [None]:
# # Plot the first 5 random images
plot_first_frames(train_images, train_labels, vmin, vmax)
plot_first_frames(val_images, val_labels, vmin, vmax)

In [None]:
plot_random_frames(train_images, train_labels, vmin, vmax)
plot_random_frames(val_images, val_labels, vmin, vmax)

In [None]:
train_images, val_images, train_labels, val_labels, num_classes = model_preprocessing(train_images, val_images, train_labels, val_labels, df_new_annotations_unique)

In [None]:
check_distribution_among_datasets(val_labels, experiment_ID, save_dir, dataset_type = 'Validation_set')
check_distribution_among_datasets(train_labels, experiment_ID, save_dir, dataset_type = 'Training_set')

In [None]:
input_shape = (img_height, img_width, channel_dimension)

In [None]:
model = construct_model(input_shape, num_classes, name)

In [None]:
validation_data=(val_images, val_labels)

In [None]:
params = {
    'model': model,
    'tf': tf,
    'train_images': train_images,
    'train_labels': train_labels,
    'epochs': epochs,
    'batch_size': batch_size,
    'validation_data': validation_data,
    'val_images': validation_data[0],
    'val_labels': validation_data[1]
}

In [None]:
save_dir = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/output/pickles"


In [None]:
history = model_execution(params, save_dir, model_version)

#### Data Augmentation

In [None]:
# # Perform data augmentation
# # Define the augmentation pipeline
# augmentation_pipeline = iaa.Sequential([
#     iaa.Fliplr(0.5), # flip horizontally with a probability of 0.5
#     iaa.Crop(percent=(0, 0.1)), # crop by up to 10% of the image width/height
#     iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.5))), # apply Gaussian blur with a probability of 0.5
#     # iaa.Affine(rotate=(-10, 10)) # rotate by up to 10 degrees
# ])

In [None]:
# # Apply the augmentation pipeline to the training set
# augmented_train_images = []
# for image in train_images:
#     # Apply the same augmentation operation to both the image and its corresponding annotation
#     augmented_image = augmentation_pipeline(image=image)
#     augmented_train_images.append(augmented_image)


In [None]:
# # Convert the augmented training set back to numpy arrays
# train_images = np.array(augmented_train_images)

In [None]:
# plot_random_frames(train_images, labels)

### Build Model

In [None]:
# import visualkeras
# from PIL import ImageFont
# visualkeras.layered_view(model, legend=True)
# # Define the directory name and the plot name
# dir_name = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/output/architecture"
# plot_name = f"{model_version}_architecture"
# ann_viz(model, view=True, filename=plot_name, title="CNN — "+str(name)+" — Simple Architecture")
# plot_path = os.path.join(dir_name, f"{plot_name}.png")
# plot = plt.gcf()
# plot.savefig(plot_path, dpi=300, bbox_inches="tight")

In [None]:
# from ann_visualizer.visualize import ann_viz
# ann_viz(model, view=True, filename="cconstruct_model", title="CNN — Model 1 — Simple Architecture")

In [None]:
# from keras.utils.vis_utils import plot_model
# plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True, rankdir='LR')

In [None]:
# changes grayscale to rgb

In [None]:
validation_data=(val_images, val_labels)

### Train Model

We have 24186 images of dimensions 349x374 and the number 1 demonstrates that images are grayscale.

In [None]:
params = {
    'model': model,
    'tf': tf,
    'train_images': train_images,
    'train_labels': train_labels,
    'epochs': epochs,
    'batch_size': batch_size,
    'validation_data': validation_data,
    'val_images': validation_data[0],
    'val_labels': validation_data[1]
}

In [None]:
save_dir = "/home/dmc/Desktop/kostas/direct-Behavior-prediction-from-miniscope-calcium-imaging-using-convolutional-neural-networks/src/V3/"+str(output_dir)

In [None]:
# # Call the send_email function after your model has finished training
# sender_email = 'guskikala@gmail.com'
# recipient_email = 'guskikala@gmail.com'
# subject = 'CNN Model Training Completed'
# message = 'Your CNN model training is complete!'

# send_email(sender_email, recipient_email, subject, message)

### Save results

In [None]:
%store model
%store history
%store name
%store comment
%store experiment_ID
%store save_dir
%store model_version

In [None]:
#save_training_info(model, history, video_name, comment, experiment_ID, save_dir, f1_score=f1_score)

High bias: If the training accuracy is low, it suggests that the model is underfitting the training data, i.e., it is not complex enough to capture the patterns in the data. In this case, you may need to increase the model's complexity by adding more layers or neurons, or by using a more complex architecture.

High variance: If the training accuracy is high but the validation accuracy is low, it suggests that the model is overfitting the training data, i.e., it is memorizing the training data instead of generalizing to new data. In this case, you may need to use regularization techniques like dropout or L2 regularization, or use early stopping to prevent the model from overfitting.

Good fit: If the training accuracy and validation accuracy are both high and close to each other, it suggests that the model is neither underfitting nor overfitting the data, i.e., it is generalizing well to new data.

Plateauing: If the validation accuracy is no longer increasing as the training set size or epochs increase, it suggests that the model has reached its capacity and adding more data or epochs is unlikely to improve its performance.

In general, a model accuracy curve can help you diagnose issues with your model and guide you in selecting appropriate strategies to improve its performance. It can also give you an idea of how much training data or how many epochs you need to achieve good performance.

### Reflect on the results

1. Insufficient data? One calcium video of 24186 frames and with 349x374 dimensions.
2. Model architecture not appropriate. Try increasing the number of layers or filters, or adding more complex layers like BatchNormalization, Dropout, or Conv2DTranspose.
3. Incorrect data preprocessing
4. Incorrect hyperparameters
5. Class Imbalance (do oversampling, or undersampling)

### [Ignore for now]

## Reusable snippets

In [None]:
# Load calcium video from local environment
# with h5py.File('path', 'r') as f:
#     video_data = np.array(f['analysis/recording_20211016_163921-PP-BP-MC/data'])

In [None]:
# Loading locally
# with h5py.File('/Users/konstantinoskalaitzidis/Developer/dmc/thesis_data/20211016_163921_animal1learnday1.h5', 'r') as f:
#     print(list(f.keys()))
#     behavior_data = np.array(f['per_frame'])

In [None]:
# # save the model architecture to a JSON file
# with open('model_architecture.json', 'w') as f:
#     f.write(model.to_json())

In [None]:
# # load the model architecture from the JSON file
# with open('model_architecture.json', 'r') as f:
#     json_string = f.read()

# model_json = model_from_json(json_string)

# # print the loaded model summary
# model.summary()

In [None]:
# mySession = readSessionServer.SessionIterator()
# sess = mySession.findSession()
# # for sess in mySession.findSessions():
# #     print(sess)
# if sess.hasBehavior() and sess.hasCalcium():
#     behavior = sess.getBehaviorSegmentation(align_with_calcium=True).reset_index()

## [Ignore for now] Define hyperparameters

In [None]:
# IMG_SIZE = 224
# BATCH_SIZE = 64
# EPOCHS = 10

# MAX_SEQ_LENGTH = 20
# NUM_FEATURES = 2048

In [None]:
# import sys
# sys.path.append("/Users/konstantinoskalaitzidis/Developer/dmc")
# from readSessionsServer import SessionIterator

#TODO: Script to retrieve videos from a list of calcium videos (of the same animal) from the db

## Dataset preparation and label annotation (feature engineering)

### [Ignore for now] Data availale for processing - overview

The following is not going to be used for now but will allow us to have an overview of all the videos I have available to train my CNN model. I expect to have all recordings sessions for each animal as input for the CNN which is going to be trained only based on recordings from the corresponding animal. The data will be split to train/test at some point...

In [None]:
# train_df = pd.read_csv("train.csv")
# test_df = pd.read_csv("test.csv")

# print(f"Total videos for training: {len(train_df)}")
# print(f"Total videos for testing: {len(test_df)}")

# train_df.sample(10)

Extract frames from the calcium imaging video and save to directory. Each frame contains spatial information, and the sequence of those frames contains temporal information (the latter is not exploited for now). Maybe also ask for path input from the user to make it reproducible for others.

Helpful source: https://keras.io/examples/vision/video_classification/

The number of frames may differ from video to video.
The frame rate may also differ from video to video but it should be 20fps for all. 

The duration of each frame depends on the frame rate of the video. If a video has a frame rate of 25 fps, then each frame will have a duration of 1/25th of a second, or approximately 0.04 seconds. The calcium videos use 20fps, while the behavioral recordings are at 60fps. Alignment of these videos will follow shortly. 

### [Ignore for now] Fetch all calcium videos from the dmc database and align calcium videos with behavior annotations

In [None]:
# mySession = readSessionServer.SessionIterator()
# for sess in mySession.findSessions():
#     print(sess)
    # if sess.hasBehavior() and sess.hasCalcium():
        # behavior = sess.getBehaviorSegmentation(align_with_calcium=True).reset_index()

### [Ignore for now] Open calcium video locally, create dir for saving frames and count number of frames with OpenCV

In [None]:
# Open the HDF5 file
# with h5py.File('/Users/konstantinoskalaitzidis/Developer/dmc/thesis_data/20211016_163921_animal1learnday1.nwb', 'r') as f:
#     # Print the keys of the file
#     print(list(f.keys()))
#     # dataset = f['identifier'][()]
#     # print(dataset)

In [None]:
# Directory where frames from video will be stored after extraction
# frames_dir = "path"

In [None]:
# Open the video using OpenCV and count the number of frames
# cap = cv2.VideoCapture(raw_calcium_video_path)
# frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# cap.release()

# print(f"Number of frames in the video: {frame_count}")

In [None]:
# video = 'path'

# cap = cv2.VideoCapture(video)
# frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# cap.release()

# print(f"Number of frames in the video: {frame_count}")

In [None]:
# Open the video file
# cap = cv2.VideoCapture(video)

# # Get the frame rate of the video
# frame_rate = int(cap.get(cv2.CAP_PROP_FPS))

# # Release the video capture object
# cap.release()

# print(f"Frame rate of the video: {frame_rate}")

In [None]:
# save each frame as one image

In [None]:
# cap = cv2.VideoCapture(video)

# # Loop through the video frames and save each one as an image file
# frame_count = 0
# while(cap.isOpened()):
#     ret, frame = cap.read()
#     if ret == False:
#         break
#     # Save the frame as an image file
#     frame_file = os.path.join(frames_dir, "frame_" + str(frame_count) + ".jpg")
#     cv2.imwrite(frame_file, frame)
#     frame_count += 1

# # Close the video file
# cap.release()

In [None]:
# # define paths
# video_path = '/Users/konstantinoskalaitzidis/Developer/dmc/thesis_data/20211016_163921_animal1learnday1.nwb'
# train_dir = '/Users/konstantinoskalaitzidis/Developer/dmc/thesis_data/train'
# test_dir = '/Users/konstantinoskalaitzidis/Developer/dmc/thesis_data/test'

# # define train-test split ratio
# train_test_ratio = 0.8

# # open video file
# cap = cv2.VideoCapture(video_path)

# # get video frame count
# frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# # create list of frame indices
# frame_indices = list(range(frame_count))

# # shuffle frame indices
# random.shuffle(frame_indices)

# # split frame indices into train and test sets
# train_frame_indices = frame_indices[:int(frame_count * train_test_ratio)]
# test_frame_indices = frame_indices[int(frame_count * train_test_ratio):]

# # iterate over frames and save to train or test directory
# for i in range(frame_count):
#     # read frame
#     ret, frame = cap.read()
#     if not ret:
#         break
    
#     # save frame to train or test directory
#     if i in train_frame_indices:
#         cv2.imwrite(os.path.join(train_dir, f'{i}.jpg'), frame)
#     else:
#         cv2.imwrite(os.path.join(test_dir, f'{i}.jpg'), frame)

In [None]:
#==== Subtracting the background ====#
# min_frame = np.min(video_data, axis=0)
# video_data = video_data - min_frame

In [None]:
#==== Normalize pixel values ====#
# images = normalize_video(video_data)