<a href="https://colab.research.google.com/github/bafanaS/DLC-Object-Recognition-Analysis/blob/master/EXPLORE_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load Drive and Initialize Values

In [None]:
project_name = 'name'

label_path = f'/content/drive/MyDrive/.../{project_name}/labeled'
training_path = f'/content/drive/MyDrive/.../{project_name}/training'
project_path = f'/content/drive/MyDrive/.../{project_name}/'
plot_path = f'/content/drive/MyDrive/.../{project_name}/plots'

source_dir = f'/content/drive/MyDrive/.../{project_name}/labeled'
target_dir = f'/content/drive/MyDrive/.../{project_name}/training'
objects = ['ob1', 'ob2']

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Equalize number of samples in each folder
I would not recommend doing this unless the training seems to be faulty each time. Then you can manually adjust the training data with this code.

In [None]:
import os
import random

def get_file_count(folder_path):
    return len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])

def equalize_folders(folder_paths):
    # Get the file counts for each folder
    file_counts = [get_file_count(folder) for folder in folder_paths]
    min_count = min(file_counts)

    # Iterate through each folder and delete files as needed
    for folder_path, count in zip(folder_paths, file_counts):
        if count > min_count:
            # Get all files in the folder
            all_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

            # Randomly select files to delete
            files_to_delete = random.sample(all_files, count - min_count)

            # Delete the selected files
            for file_name in files_to_delete:
                os.remove(os.path.join(folder_path, file_name))
                print(f"Deleted {file_name} from {folder_path}")



In [None]:
# Example usage
folder1 = label_path+'/topnovel'
folder2 = label_path+'/botfamiliar'
folder3 = label_path+'/no'

equalize_folders([folder1, folder2, folder3])

Deleted no_4959.jpg from /content/drive/MyDrive/Deeplabcut/slr2obj/labeled/no
Deleted no_7121.jpg from /content/drive/MyDrive/Deeplabcut/slr2obj/labeled/no


# Create training set

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -------------------------------------------------------------------------------------
# ORT analysis - script training data - developed by Victor Ibañez
# 03.04.2021
# -------------------------------------------------------------------------------------


# -----------------------------------------------------
# import libraries
# -----------------------------------------------------

import os
import glob
import random
import math
import shutil

# -----------------------------------------------------
# create training data
# -----------------------------------------------------

def create_training_data(source_dir,target_dir,objects):

    # iterate through raw data
    def collect_candidates(folder_path):

        candidates = []

        for filename in glob.iglob(folder_path + '/**/*.jpg', recursive=True):
            candidates.append(filename)

        return candidates

    # redesign names and image size
    def create_images(candidates, imgsave_path, file_prefix):

        cnt = 0
        for filename in candidates:
            cnt += 1
            new_filename = os.path.join(imgsave_path, '{0}_{1}.jpg'.format(file_prefix, cnt))
            shutil.copy(filename, new_filename)

    # create data randomly and splitting into training, test and validation
    def process(source_dir, target_dir, dist_training, dist_validation):

        objects.append('no')

        distributions = {'training': dist_training, 'validation': dist_validation}

        for cls in objects:
            candidates = collect_candidates(os.path.join(source_dir, cls))
            random.shuffle(candidates)

            offset = 0
            for key, percentage in distributions.items():

                share = math.floor(len(candidates) / 100 * percentage)

                class_path = os.path.join(target_dir, key, cls)
                if not os.path.isdir(class_path):
                    os.makedirs(class_path)

                create_images(candidates[offset:offset+share], class_path, cls)
                offset += share

    process(source_dir,target_dir,80,20)




In [None]:
create_training_data(source_dir=source_dir, target_dir=target_dir, objects=objects)

# Train the Convolutional Neural Network

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -------------------------------------------------------------------------------------
# ORT analysis - script network multi classes - developed by Victor Ibañez
# 03.04.2021
# -------------------------------------------------------------------------------------

# -----------------------------------------------------
# import libraries
# -----------------------------------------------------
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt
import numpy as np
import os
import glob
from PIL import Image

import tensorflow as tf
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.InteractiveSession(config=config)

def network_multi(source_dir, target_dir, project_path, project_name, plot_path):

    # -----------------------------------------------------
    # set directory
    # -----------------------------------------------------

    train_data_dir = os.path.join(target_dir, 'training')
    validation_data_dir = os.path.join(target_dir, 'validation')

    # -----------------------------------------------------
    # set parameters
    # -----------------------------------------------------

    # extract number of training / validation samples
    t = []
    v = []
    for t_file in glob.iglob(train_data_dir + '/**/*.jpg', recursive=True):
        t.append(t_file)
    for v_file in glob.iglob(validation_data_dir + '/**/*.jpg', recursive=True):
        v.append(v_file)
    t_l = len(t)
    v_l = len(v)

    # extract weights
    path = source_dir
    classes = os.listdir(path)

    cnt_list = []
    for cl in classes:
        cnt = 0
        for i in glob.iglob(os.path.join(path,cl) + '/**/*.jpg', recursive=True):
            cnt += 1
        cnt_list.append(cnt)

    w_list = []
    total = sum(cnt_list)
    for i in cnt_list:
        w_list.append((1 / i)*(total)/len(cnt_list))

    weights = {}
    for i in range(len(w_list)):
        weights[i] = w_list[i]

    print('weight distribution among classes:',weights)

    # extract image size
    print(t)
    im = Image.open(t[0])
    img_width, img_height = im.size

    print('your images are of size: ', img_height, img_width, '3')

    epochs = 50
    batch_size = 15
    if len(classes)==2:
        nclasses = 1
        loss_type = 'binary_crossentropy'
        class_type = 'binary'
        activation_fun = 'sigmoid'
    else:
        nclasses = len(classes)
        loss_type = 'categorical_crossentropy'
        class_type = 'categorical'
        activation_fun = 'softmax'
    nb_train_samples = t_l#*nclasses
    nb_validation_samples = v_l#*nclasses
    learning_rate = 0.00015
    dropout_rate = 0.5
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                  patience=3, min_lr=0.000005)
    early = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min')

    # -----------------------------------------------------
    # set input shape of images
    # -----------------------------------------------------

    input_shape = (img_height, img_width, 3)

    # -----------------------------------------------------
    # design the CNN
    # -----------------------------------------------------

# Design the CNN
    model = Sequential([
        Conv2D(32, (3, 3), input_shape=input_shape, activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(256, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(500, activation='relu'),
        Dense(500, activation='relu'),
        Dropout(dropout_rate),
        Dense(nclasses, activation=activation_fun)
    ])

    opt = Adam(learning_rate=learning_rate)
    model.compile(loss=loss_type, optimizer=opt, metrics=['accuracy'])
    callbacks = [early, reduce_lr]

    # Image data generators
    train_datagen = ImageDataGenerator(rescale=1. / 255)
    validation_datagen = ImageDataGenerator(rescale=1. / 255)

    train_generator = train_datagen.flow_from_directory(train_data_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode=class_type)
    validation_generator = validation_datagen.flow_from_directory(validation_data_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode=class_type)

    # Train and test the CNN
    history = model.fit(train_generator, epochs=epochs, class_weight=weights, callbacks=callbacks, validation_data=validation_generator, validation_steps=nb_validation_samples // batch_size)

    # Save the model
    project = os.path.join(project_path, project_name)
    model.save(os.path.join(project, project_name) + '.h5')

    # -----------------------------------------------------
    # plot training and validation accuracy & loss values
    # -----------------------------------------------------

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    #plt.show()
    plt.savefig(os.path.join(plot_path, 'accuracy.png'))
    plt.close()

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    #plt.show()
    plt.savefig(os.path.join(plot_path, 'loss.png'))
    plt.close()




In [None]:
# label_path = '/content/drive/MyDrive/Deeplabcut/nortest/labeled'
# training_path = '/content/drive/MyDrive/Deeplabcut/nortest/training'
# project_path = '/content/drive/MyDrive/Deeplabcut/nortest/'
# project_name = 'nortest'
# plot_path = '/content/drive/MyDrive/Deeplabcut/nortest/plots'



network_multi(label_path, training_path, project_path, project_name, plot_path)


weight distribution among classes: {0: 1.0, 1: 1.0, 2: 1.0}
['/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_1.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_2.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_3.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_4.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_5.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_6.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_7.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_8.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_9.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnovel/topnovel_10.jpg', '/content/drive/MyDrive/Deeplabcut/slr2obj/training/training/topnove

# Predict on future videos

In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image
import os, glob
import pandas as pd
from PIL import Image

# # Configure GPU options
# physical_devices = tf.config.list_physical_devices('GPU')
# if physical_devices:
#     tf.config.experimental.set_memory_growth(physical_devices[0], True)

# On mac you need to shut this down
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

def predict_frames_multi(project_path, model, videos, time, ref_point, names, objects, bins, o_coord):
    # import model
    project_name = os.path.basename(project_path)
    loaded_model = tf.keras.models.load_model(model)

    data = []
    names.append('no')
    names.sort()
    index = names.index('no')
    color_list_fill = [(255,102,255),(102,255,102),(102,255,255),(102,102,255),(107,178,255),(255,102,102)]
    color_list_box = [(255,0,255),(0,255,0),(0,255,255),(0,0,255),(0,128,255),(255,0,0)]

    # import video
    def frame_prediction(video, time, vid_name, ref_point):
        vidcap = cv2.VideoCapture(video)
        success, frame = vidcap.read()
        fps = vidcap.get(cv2.CAP_PROP_FPS)

        count = 1
        max_time = (int(time)*60*int(fps))+15

        freq_list = []
        cnt_list = []
        create_list = []
        image_list = []

        for i in range(len(names)):
            cnt_list.append(0)
            freq_list.append(0)

        while success:
            if count > 15 and count < max_time:
                res1 = frame[ref_point[0][1]:ref_point[1][1], ref_point[0][0]:ref_point[1][0]]
                res = cv2.resize(res1, dsize=(150, 150), interpolation=cv2.INTER_CUBIC)
                i = cv2.cvtColor(res, cv2.COLOR_BGR2RGB)
                i = i/255.
                image_list.append(i)
                res_gray = cv2.cvtColor(res1, cv2.COLOR_BGR2GRAY)
                res_col = cv2.cvtColor(res_gray, cv2.COLOR_GRAY2BGR)
                create_list.append(res_col)
            elif count >= max_time:
                break

            success, frame = vidcap.read()
            count += 1

        img_array = np.array(image_list)
        result_proba = loaded_model.predict(img_array)
        result = np.argmax(result_proba, axis=1)

        cnt_min = 1
        for i in range(len(result)):
            if i > 0 and result[i] != index and result[i] != result[i-1]:
                freq_list[int(result[i])] += 1

            prob_result = result_proba[i][result[i]] if len(names) > 2 else result_proba[i][0]

            if result[i] != index and prob_result > 0.99:
                cnt_list[int(result[i])] += 1
                res_col = create_list[i]
                for j in range(len(objects)):
                    if objects[j] == names[int(result[i])]:
                        overlay = res_col.copy()
                        overlay[o_coord[j][0][1]:o_coord[j][1][1], o_coord[j][0][0]:o_coord[j][1][0]] = color_list_fill[j]
                        o = cv2.addWeighted(overlay,0.5,res_col,0.5,0)
                        res_col = o
                        cv2.rectangle(res_col, (o_coord[j][0][0], o_coord[j][0][1]), (o_coord[j][1][0], o_coord[j][1][1]), color_list_box[j], thickness=2)
                cv2.putText(res_col, names[int(result[i])], (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3, cv2.LINE_AA, False)
                create_list[i] = res_col

            if (i+2) % (int(fps)*60*int(bins)) == 0:
                d = {'experiment':os.path.basename(os.path.dirname(videos[0])), 'animal':vid_name, 'minute':cnt_min*int(bins)}
                for j in range(len(names)):
                    if j == index:
                        continue
                    else:
                        d[names[j]] = cnt_list[j]/int(fps)
                        d[names[j]+'_freq'] = freq_list[j]
                print('minute:', cnt_min*int(bins))
                data.append(d)
                print(d)
                cnt_min += 1
                cnt_list = [0] * len(cnt_list)
                freq_list = [0] * len(freq_list)

        height, width, layers = create_list[0].shape
        size = (width, height)
        name = os.path.join(project_path, 'prediction_videos', vid_name)
        out = cv2.VideoWriter(name + '.MP4', cv2.VideoWriter_fourcc(*'MP4V'), 25, size)
        for i in create_list:
            out.write(i)
        out.release()

        print('Done!', count, 'frames predicted from video:', video)

    print('start predicting...')
    for video in videos:
        split = video.split('.')
        vid_name = os.path.basename(split[0])
        print('processing video:', vid_name)
        frame_prediction(video, time, vid_name, ref_point)

    print('all videos processed!')
    exp_name = os.path.join(project_path, project_name)
    df = pd.DataFrame.from_dict(data)
    filename = exp_name + '.csv'
    if os.path.isfile(filename):
        cnt = 1
        while True:
            new_filename = exp_name + '_' + str(cnt) + '.csv'
            cnt += 1
            if os.path.isfile(new_filename):
                continue
            else:
                filename = new_filename
                break
    df.to_csv(filename, index=False)


## Define parameters:

In [None]:
import ast

#predict_frames_multi(project_path, model, videos, time, ref_point, names, objects, bins, o_coord)
project_name = 'name' # project name

label_path = f'/content/drive/MyDrive/.../{project_name}/labeled'
training_path = f'/content/drive/MyDrive/.../{project_name}/training'
project_path = f'/content/drive/MyDrive/.../{project_name}/'
plot_path = f'/content/drive/MyDrive/.../{project_name}/plots'

model = f'/content/drive/MyDrive/.../{project_name}/{project_name}/{project_name}.h5'

time = 10 # time of one video

bins = 10 # time per one instance of recording object exploration

log = open(os.path.join(project_path,"logfile"),"r")
file=log.readlines()
ref_point = ast.literal_eval(file[10])
coord = ast.literal_eval(file[13])
obj_key = ast.literal_eval(file[16])
dic = ast.literal_eval(file[22])
log.close()

names = list(dic.keys())
objects = list(obj_key.keys())




In [None]:
videos = [] # The video locations that you want to predict

In [None]:
predict_frames_multi(project_path, model, videos, time, ref_point, names, objects, bins, coord)


start predicting...
processing video: Test 17
minute: 10
{'experiment': '2022-02-02 - SLR - COHORT 1', 'animal': 'Test 17', 'minute': 10, 'botfamiliar': 5.214285714285714, 'botfamiliar_freq': 4, 'topnovel': 2.857142857142857, 'topnovel_freq': 31}
Done! 8415 frames predicted from video: /content/drive/My Drive/Deeplabcut/m-orientation-hussaini lab-2023-07-10/SLR Videos/2022-02-02 - SLR - COHORT 1/Test 17.mp4
processing video: Test 18
minute: 10
{'experiment': '2022-02-02 - SLR - COHORT 1', 'animal': 'Test 18', 'minute': 10, 'botfamiliar': 6.0, 'botfamiliar_freq': 29, 'topnovel': 13.8, 'topnovel_freq': 21}
Done! 9015 frames predicted from video: /content/drive/My Drive/Deeplabcut/m-orientation-hussaini lab-2023-07-10/SLR Videos/2022-02-02 - SLR - COHORT 1/Test 18.mp4
processing video: Test 19
minute: 10
{'experiment': '2022-02-02 - SLR - COHORT 1', 'animal': 'Test 19', 'minute': 10, 'botfamiliar': 33.333333333333336, 'botfamiliar_freq': 67, 'topnovel': 12.133333333333333, 'topnovel_freq'