#setup:
- connect to google drive in this step
- get_data() function in this step

In [18]:
import pandas as pd
import math
import numpy as np
import os
import glob
import sklearn
import pickle
import glob
import scipy.io
import cv2
from google.colab.patches import cv2_imshow
import random

In [19]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [20]:
project_path = '/content/drive/MyDrive/230702/demo-me-2021-07-14'
output_dir = os.path.join(project_path, "behaviors/")
classifiers_dir = os.path.join(project_path, "classifiers/")
if not os.path.exists(output_dir):
  os.mkdir(output_dir)

In [21]:
def get_data(individual, bodypart, h5_file):
  mouse_data = h5_file.xs(individual,level='individuals',axis=1)
  out_data = mouse_data.xs(bodypart,level='bodyparts',axis=1)
  out_data.columns = out_data.columns.droplevel("scorer")
  out_data_copy = out_data.copy()
  output = out_data.copy()
  # if missing a lot of body parts:
  # for i in range(out_data_copy) go by every 5:
  #  between i:i+4 find index value with highest "likelihood"
  #  output[i:i+4] = x and y from max likelihood
  return output

# Getting overall video features subworkflow:

## Choose some data to open:

In [30]:
video_name = '221002_PZ71_1'

h5_suffix = 'DLC_resnet50_demoJul14shuffle1_50000_el_filtered'

# area_vec = [area_startx, area_starty, area_distx, area_disty]
female_side_mat = scipy.io.loadmat(project_path + "/behaviors/" + video_name + "_female_side.mat")
female_side_vec = female_side_mat['croprect'][0]
male_side_mat = scipy.io.loadmat(project_path + "/behaviors/" + video_name + "_male_side.mat")
male_side_vec = male_side_mat['croprect'][0]

h5_file = pd.read_hdf(project_path +'/videos/'+ video_name + h5_suffix + '.h5')
[nframes, ncols] = h5_file.shape

In [31]:
# get all data ready for accessing as needed later on

# names of csv individuals
individual1 = 'ind1'
individual2 = 'ind2'
#individual3 = 'ind3'

# getting feature points
mouse1_feature_points = {}
mouse1_feature_points['snout'] = get_data('ind1', 'snout', h5_file)
mouse1_feature_points['leftear'] = get_data('ind1', 'leftear', h5_file)
mouse1_feature_points['rightear'] = get_data('ind1', 'rightear', h5_file)
mouse1_feature_points['shoulder'] = get_data('ind1', 'shoulder', h5_file)
mouse1_feature_points['spine1'] = get_data('ind1', 'spine1', h5_file)
mouse1_feature_points['spine2'] = get_data('ind1', 'spine2', h5_file)
mouse1_feature_points['spine3'] = get_data('ind1', 'spine3', h5_file)
mouse1_feature_points['spine4'] = get_data('ind1', 'spine4', h5_file)
mouse1_feature_points['tailbase'] = get_data('ind1', 'tailbase', h5_file)

mouse2_feature_points = {}
mouse2_feature_points['snout'] = get_data('ind2', 'snout', h5_file)
mouse2_feature_points['leftear'] = get_data('ind1', 'leftear', h5_file)
mouse2_feature_points['rightear'] = get_data('ind1', 'rightear', h5_file)
mouse2_feature_points['shoulder'] = get_data('ind2', 'shoulder', h5_file)
mouse2_feature_points['spine1'] = get_data('ind2', 'spine1', h5_file)
mouse2_feature_points['spine2'] = get_data('ind2', 'spine2', h5_file)
mouse2_feature_points['spine3'] = get_data('ind2', 'spine3', h5_file)
mouse2_feature_points['spine4'] = get_data('ind2', 'spine4', h5_file)
mouse2_feature_points['tailbase'] = get_data('ind2', 'tailbase', h5_file)

# commented out because ran with n_tracks = 2
#mouse3_feature_points = {}
#mouse3_feature_points['snout'] = get_data('mus3', 'snout', h5_file)
#mouse3_feature_points['shoulder'] = get_data('mus3', 'shoulder', h5_file)
#mouse3_feature_points['spine1'] = get_data('mus3', 'spine1', h5_file)
#mouse3_feature_points['spine2'] = get_data('mus3', 'spine2', h5_file)
#mouse3_feature_points['spine3'] = get_data('mus3', 'spine3', h5_file)
#mouse3_feature_points['spine4'] = get_data('mus3', 'spine4', h5_file)
#mouse3_feature_points['tailbase'] = get_data('mus3', 'tailbase', h5_file)


In [32]:
# male_side_vec = [x, y, width, height]; female_side_vec = [x, y, width, height]
relevant_area = female_side_vec.copy()
# relevant area is female_side_vec + male_side_vec size exactly right next to each other with 50 buffer
relevant_area[2] = female_side_vec[2] + male_side_vec[2] + 50
print(relevant_area)

[  0.56150794   0.99404762 335.55952381 165.34126984]


## feature points setup and helper functions
- ex. `euclid_dist(first_mouse_feature_points['snout'].loc[[100]], second_mouse_feature_points['shoulder'].loc[[100]]])`

In [33]:
def within_area(area_vector, input_coor):
  area_startx = area_vector[0]
  area_starty = area_vector[1]
  area_distx = area_vector[2]
  area_disty = area_vector[3]
  x = input_coor["x"].iloc[0]
  y = input_coor["y"].iloc[0]
  if (area_startx <= x <= (area_startx+area_distx)) and (area_starty <= y <= (area_starty+area_disty)):
    result = 1
  else:
    result = 0
  return result

In [77]:
def euclid_dist(point1_coor, point2_coor):
  # euclid_dist(first_mouse_feature_points['snout'].loc[[100]], second_mouse_feature_points['shoulder'].loc[[100]]])
  point1 = np.array((point1_coor["x"].iloc[0], point1_coor["y"].iloc[0]))
  point2 = np.array((point2_coor["x"].iloc[0], point2_coor["y"].iloc[0]))
  output_dist = np.linalg.norm(point1 - point2)

  # but if any coordinate is NaN, then output_dist is 0
  if np.isnan(point1).any() or np.isnan(point2).any():
    output_dist = 0

  return output_dist

In [81]:
def euclid_angle(pointa_coor, pointb_coor, pointc_coor):
  #angle_{pointa, pointb, pointc}
  # euclid_angle(first_mouse_feature_points['snout'].loc[[100]], first_mouse_feature_points['shoulder'].loc[[100]]], second_mouse_feature_points['snout'].loc[[100]]])
  a = np.array((pointa_coor["x"].iloc[0], pointa_coor["y"].iloc[0]))
  b = np.array((pointb_coor["x"].iloc[0], pointb_coor["y"].iloc[0]))
  c = np.array((pointc_coor["x"].iloc[0], pointc_coor["y"].iloc[0]))

  ba = a - b
  bc = c - b

  cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
  angle = np.arccos(cosine_angle)

  output_ang = np.degrees(angle)

  # but if any coordinate is NaN, then output_ang is 0
  if np.isnan(a).any() or np.isnan(b).any() or np.isnan(c).any():
    output_ang = 0

  return output_ang

In [36]:
def torso_in_area(area_vector, mouse_num, i):
  # finds if any part of torso at all is in area
  if mouse_num == 1:
    feature_points = mouse1_feature_points
  elif mouse_num == 2:
    feature_points = mouse2_feature_points
  #elif mouse_num == 3:
  #  feature_points = mouse3_feature_points

  if within_area(area_vector, feature_points['snout'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['shoulder'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['spine1'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['spine2'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['spine3'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['spine4'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['tailbase'].loc[[i]]):
    return 1
  else:
    return 0

In [37]:
def ear_or_torso_in_area(area_vector, mouse_num, i):
  # finds if any part of torso at all is in area
  if mouse_num == 1:
    feature_points = mouse1_feature_points
  elif mouse_num == 2:
    feature_points = mouse2_feature_points
  #elif mouse_num == 3:
  #  feature_points = mouse3_feature_points

  if within_area(area_vector, feature_points['snout'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['shoulder'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['spine1'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['spine2'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['spine3'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['spine4'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['tailbase'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['leftear'].loc[[i]]):
    return 1
  elif within_area(area_vector, feature_points['rightear'].loc[[i]]):
    return 1
  else:
    return 0

In [38]:
def snout_in_area(area_vector, mouse_num, i):
  # finds if any part of snout at all is in area
  if mouse_num == 1:
    feature_points = mouse1_feature_points
  elif mouse_num == 2:
    feature_points = mouse2_feature_points
  #elif mouse_num == 3:
  #  feature_points = mouse3_feature_points

  if within_area(area_vector, feature_points['snout'].loc[[i]]):
    result = 1
  else:
    result = 0
  return result

In [39]:
def shoulder_in_area(area_vector, mouse_num, i):
  # finds if any part of shoulder at all is in area
  if mouse_num == 1:
    feature_points = mouse1_feature_points
  elif mouse_num == 2:
    feature_points = mouse2_feature_points
  #elif mouse_num == 3:
  #  feature_points = mouse3_feature_points

  if within_area(area_vector, feature_points['shoulder'].loc[[i]]):
    result = 1
  else:
    result = 0
  return result

## code for calculating features for any frame i:
- ex. `get_i_features(i)`

In [40]:
def check_mice_exist(i):
  # 2 mice detected
  if ear_or_torso_in_area(relevant_area, 1, i) and ear_or_torso_in_area(relevant_area, 2, i):
    first_mouse_feature_points = mouse1_feature_points
    second_mouse_feature_points = mouse2_feature_points

  #elif torso_in_area(relevant_area, 1, i) and torso_in_area(relevant_area, 3, i):
    # first_mouse_feature_points = mouse1_feature_points
    # second_mouse_feature_points = mouse3_feature_points

  #elif torso_in_area(relevant_area, 2, i) and torso_in_area(relevant_area, 3, i):
  #  first_mouse_feature_points = mouse2_feature_points
  #  second_mouse_feature_points = mouse3_feature_points


  # 1 mouse detected
  elif ear_or_torso_in_area(relevant_area, 1, i):
    first_mouse_feature_points = mouse1_feature_points
    second_mouse_feature_points = 0

  elif ear_or_torso_in_area(relevant_area, 2, i):
    first_mouse_feature_points = mouse2_feature_points
    second_mouse_feature_points = 0

  #elif torso_in_area(relevant_area, 3, i):
  #  first_mouse_feature_points = mouse3_feature_points
  #  second_mouse_feature_points = 0

  # no mouse detected
  else:
    first_mouse_feature_points = 0
    second_mouse_feature_points = 0

  return first_mouse_feature_points, second_mouse_feature_points

In [41]:
def check_female_side_mice(i):
  if ear_or_torso_in_area(female_side_vec, 1, i) and ear_or_torso_in_area(female_side_vec, 2, i):
    return 1
  else:
    return 0

In [83]:
def get_torso_dists(i, first_mouse_feature_points, second_mouse_feature_points):
  dists = np.zeros((7,7))

  torso_bodyparts = ['snout', 'shoulder', 'spine1', 'spine2', 'spine3', 'spine4', 'tailbase']

  for bodypart_i in range(0, len(torso_bodyparts)):
    for bodypart_j in range(0, len(torso_bodyparts)):
      dists[bodypart_i, bodypart_j] = euclid_dist(first_mouse_feature_points[torso_bodyparts[bodypart_i]].loc[[i]], second_mouse_feature_points[torso_bodyparts[bodypart_j]].loc[[i]])
  return dists

In [43]:
def long_short(i, first_mouse_feature_points, second_mouse_feature_points):
  torso_bodyparts = ['snout', 'shoulder', 'spine1', 'spine2', 'spine3', 'spine4', 'tailbase']

  first_mouse_dists = np.zeros((7,7))
  second_mouse_dists = np.zeros((7,7))
  for bodypart_i in range(0, len(torso_bodyparts)):
    for bodypart_j in range(0, len(torso_bodyparts)):
      # euclid_dist should be nan if any one of the input bodyparts is not present
      first_mouse_dists[bodypart_i, bodypart_j] = euclid_dist(first_mouse_feature_points[torso_bodyparts[bodypart_i]].loc[[i]], first_mouse_feature_points[torso_bodyparts[bodypart_j]].loc[[i]])
      second_mouse_dists[bodypart_i, bodypart_j] = euclid_dist(second_mouse_feature_points[torso_bodyparts[bodypart_i]].loc[[i]], second_mouse_feature_points[torso_bodyparts[bodypart_j]].loc[[i]])

  # nanmax should be 0 if there was only 1 bodypart
  if np.nanmax(first_mouse_dists) > np.nanmax(second_mouse_dists):
    long_mouse_feature_points = first_mouse_feature_points
    short_mouse_feature_points = second_mouse_feature_points
    long_mouse_dists = first_mouse_dists
    short_mouse_dists = second_mouse_dists
  else:
    long_mouse_feature_points = second_mouse_feature_points
    short_mouse_feature_points = first_mouse_feature_points
    long_mouse_dists = second_mouse_dists
    short_mouse_dists = first_mouse_dists

  return long_mouse_feature_points, short_mouse_feature_points, long_mouse_dists, short_mouse_dists

In [85]:
def get_i_features(i):
  first_mouse_feature_points, second_mouse_feature_points = check_mice_exist(i)
  if second_mouse_feature_points == 0:
    return np.array([0, 0, 0,0,0,0, 0,0])

  else:
    # check female side
    female_side_mice = check_female_side_mice(i)

    # torso dist stuff
    torso_dists = get_torso_dists(i, first_mouse_feature_points, second_mouse_feature_points)
    first_min_id,second_min_id = np.unravel_index(np.nanargmin(torso_dists), torso_dists.shape)
    min_dist = torso_dists[first_min_id,second_min_id]
    min_posteriority_diff = abs(first_min_id - second_min_id)
    first_max_id,second_max_id = np.unravel_index(np.nanargmax(torso_dists), torso_dists.shape)
    max_dist = torso_dists[first_max_id,second_max_id]
    max_posteriority_diff = abs(first_max_id - second_max_id)

    # angle stuff
    long_mouse_feature_points, short_mouse_feature_points, long_mouse_dists, short_mouse_dists = long_short(i, first_mouse_feature_points, second_mouse_feature_points)
    long_ids = np.array([np.unravel_index(np.nanargmax(long_mouse_dists), long_mouse_dists.shape)])
    short_ids = np.array([np.unravel_index(np.nanargmax(short_mouse_dists), short_mouse_dists.shape)])
    torso_bodyparts = ['snout', 'shoulder', 'spine1', 'spine2', 'spine3', 'spine4', 'tailbase']
    long_posterior = torso_bodyparts[np.max(long_ids)]
    long_anterior = torso_bodyparts[np.min(long_ids)]
    short_posterior = torso_bodyparts[np.max(short_ids)]
    short_anterior = torso_bodyparts[np.min(short_ids)]
    # more angle stuff
    posterior_ang = euclid_angle(long_mouse_feature_points[long_anterior].loc[[i]], long_mouse_feature_points[long_posterior].loc[[i]], short_mouse_feature_points[short_posterior].loc[[i]])
    anterior_ang = euclid_angle(long_mouse_feature_points[long_posterior].loc[[i]], long_mouse_feature_points[long_anterior].loc[[i]], short_mouse_feature_points[short_anterior].loc[[i]])

    return np.array([1, female_side_mice, min_dist,min_posteriority_diff,max_dist,max_posteriority_diff, posterior_ang, anterior_ang])

## calculate all_i_features for all frames:
- commented out rn

In [45]:
num_features = 8
total_frames = 36000

In [89]:
# this function takes a while
def get_all_i_features(total_frames):
  all_i_features = np.empty([total_frames, num_features])
  for frame in range(total_frames):
    all_i_features[frame, 0:num_features] = get_i_features(frame)
  return all_i_features

In [None]:
# takes a while
all_i_features = get_all_i_features(total_frames)

# pickle dumping and loading all_i_features
in behaviors/video_name + '_features.pickle'
- dump pickle is not commented out right now
- load pickle is commented out right now

In [None]:
# save all_i_features as pickle
import pickle
with open(project_path + "/behaviors/" + video_name + "_features.pickle", 'wb') as file:
    pickle.dump(all_i_features, file)

In [None]:
# load an all_i_features pickle
#import pickle
#with open(project_path + "/behaviors/" + video_name + "_features.pickle", 'rb') as file:
#    all_i_features = pickle.load(file)
#    print('loaded.')

# Getting training frames and features

In [15]:
import pandas as pd

frames_df = pd.DataFrame({'video_name':  ['221009_PZ70_1', '221009_PZ70_1', '221016_PZ70_1', '221016_PZ70_1'],
                                      'i': [10195, 20573, 1297, 6673],
                                  'label': [0, 1, 0, 0]})

print(frames_df)

      video_name      i  label
0  221009_PZ70_1  10195      0
1  221009_PZ70_1  20573      1
2  221016_PZ70_1   1297      0
3  221016_PZ70_1   6673      0


In [17]:
import numpy as np
num_features = 8
num_frames = frames_df.shape[0]
total_features = np.empty([num_frames, num_features])
video_name = ''
for index, row in frames_df.iterrows():
  print(row['video_name'], row['i'], row['label'])
  if not (row['video_name'] == video_name):
    # all_i_features = load the pickle for frame[0]
    video_name = row['video_name']

  i = row['i']

  temp_features = all_i_features[i, :]
  total_features[index, 0:num_features] = temp_features


221009_PZ70_1 10195 0


IndexError: ignored

# Forest training:

## load evaluation pickle data

In [None]:
!pip install deeplabcut

[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting deeplabcut
  Using cached deeplabcut-2.3.5-py3-none-any.whl (1.4 MB)
Collecting dlclibrary (from deeplabcut)
  Using cached dlclibrary-0.0.3-py3-none-any.whl (14 kB)
Collecting filterpy>=1.4.4 (from deeplabcut)
  Using cached filterpy-1.4.5-py3-none-any.whl
Collecting ruamel.yaml>=0.15.0 (from deeplabcut)
  Using cached ruamel.yaml-0.17.31-py3-none-any.whl (112 kB)
Collecting torch<=1.12 (from deeplabcut)
  Using cached torch-1.12.0-cp310-cp310-manylinux1_x86_64.whl (776.3 MB)
Collecting tensorpack>=0.11 (from deeplabcut)
  Using cached tensorpack-0.11-py2.py3-none-any.whl (296 kB)
Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml>=0.15.0->deeplabcut)
  Using cached ruamel.yaml.clib-0.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (485 kB)
Collecting msgpack-numpy>=0.4.4.2 (from tensorpack>=0.11->deeplabcut)
  Using cached msgpack_nu

In [None]:
import math
import os
import pickle
import random
import shelve
import warnings
from itertools import combinations
from pathlib import Path

import networkx as nx
import numpy as np
import pandas as pd

import deeplabcut
from deeplabcut.utils import auxiliaryfunctions, conversioncode
from deeplabcut.generate_training_dataset import trainingsetmanipulation
from deeplabcut.pose_estimation_tensorflow.lib.trackingutils import TRACK_METHODS


Loading DLC 2.3.5...
DLC loaded in light mode; you cannot use any GUI (labeling, relabeling and standalone GUI)


In [None]:
def LoadFullMultiAnimalData(dataname):
    """Save predicted data as h5 file and metadata as pickle file; created by predict_videos.py"""
    data_file = dataname.split(".h5")[0] + "_full.pickle"
    try:
        with open(data_file, "rb") as handle:
            data = pickle.load(handle)
    except (pickle.UnpicklingError, FileNotFoundError):
        data = shelve.open(data_file, flag="r")
    with open(data_file.replace("_full.", "_meta."), "rb") as handle:
        metadata = pickle.load(handle)
    return data, metadata

dataname = "/content/drive/MyDrive/training/demo-me-2021-07-14/evaluation-results/iteration-2/demoJul14-trainset95shuffle1/DLC_resnet50_demoJul14shuffle1_50000-snapshot-50000.h5"
data, metadata = LoadFullMultiAnimalData(dataname)

data_copy = data.copy()

In [None]:
from tqdm import tqdm
import pickle, re
import numpy as np
import pandas as pd

frame_names = list(data.keys())

frames = [data_copy[frame_names[f]]['prediction']['coordinates'][0] for f in range(len(frame_names)-1)]
frames_confidences = [data_copy[frame_names[f]]['prediction']['confidence'][0] for f in range(len(frame_names)-1)]


meta_columns = ['snout', 'leftear', 'rightear', 'shoulder', 'spine1', 'spine2', 'spine3', 'spine4', 'tailbase', 'tail1', 'tail2', 'tailend']

number_indiv = 2

col = []
for id in range(number_indiv):
    for i in meta_columns:
        col.append('x_' + i + '_' + str(id))
        col.append('y_' + i + '_' + str(id))
        col.append('conf_' + i + '_' + str(id))
df = pd.DataFrame(index = frame_names, columns=col)


In [None]:
for n in range(len(frames)):
#for n in range(5):
    dets = frames[n]
    confs = frames_confidences[n]
    for id in range(number_indiv):
      for bdypt in range(len(meta_columns)):
        try:
          confidence = confs[id][0]

          if confidence < 0.01:
            hi = 'hi'

          else:
            for p in ['conf_', 'x_', 'y_']:
              if p == 'conf_':
                df[p+meta_columns[bdypt]+ '_' + str(id)][frame_names[n]] = confs[id][0]
              elif p == 'x_':
                df[p+meta_columns[bdypt]+ '_' + str(id)][frame_names[n]] = dets[bdypt][id][0]
              elif p == 'y_':
                df[p+meta_columns[bdypt]+ '_' + str(id)][frame_names[n]] = dets[bdypt][id][0]

        except:
            pass

In [None]:
df.to_csv("/content/drive/MyDrive/training/demo-me-2021-07-14/evaluation-results/iteration-2/demoJul14-trainset95shuffle1/df.csv", index=False)

In [None]:
df

Unnamed: 0,x_snout_0,y_snout_0,conf_snout_0,x_leftear_0,y_leftear_0,conf_leftear_0,x_rightear_0,y_rightear_0,conf_rightear_0,x_shoulder_0,...,conf_tailbase_1,x_tail1_1,y_tail1_1,conf_tail1_1,x_tail2_1,y_tail2_1,conf_tail2_1,x_tailend_1,y_tailend_1,conf_tailend_1
"(labeled-data, 221002_PZ71_1, img00962.png)",109.434,109.434,0.949,90.357,90.357,0.949,118.76,118.76,0.949,120.436,...,,,,,,,,,,
"(labeled-data, 221002_PZ71_1, img03268.png)",267.081,267.081,1.0,260.304,260.304,1.0,3.669,3.669,1.0,254.273,...,0.171,196.18,196.18,0.171,188.638,188.638,0.171,51.587,51.587,0.171
"(labeled-data, 221002_PZ71_1, img04194.png)",127.687,127.687,1.0,107.823,107.823,1.0,116.298,116.298,1.0,110.25,...,,,,,,,,,,
"(labeled-data, 221002_PZ71_1, img07363.png)",27.032,27.032,0.995,27.603,27.603,0.995,10.844,10.844,0.995,18.348,...,0.974,251.925,251.925,0.974,250.133,250.133,0.974,60.791,60.791,0.974
"(labeled-data, 221002_PZ71_1, img07483.png)",53.822,53.822,0.999,70.912,70.912,0.999,84.599,84.599,0.999,79.293,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"(labeled-data, 221127_PZ90_1, img27974.png)",194.806,194.806,0.993,177.137,177.137,0.993,186.507,186.507,0.993,204.282,...,,,,,,,,,,
"(labeled-data, 221127_PZ90_1, img28625.png)",19.098,19.098,0.999,24.915,24.915,0.999,14.318,14.318,0.999,26.057,...,1.0,,,1.0,126.121,126.121,1.0,,,1.0
"(labeled-data, 221127_PZ90_1, img31226.png)",105.401,105.401,1.0,90.525,90.525,1.0,82.71,82.71,1.0,84.923,...,1.0,212.513,212.513,1.0,197.335,197.335,1.0,183.443,183.443,1.0
"(labeled-data, 221127_PZ90_1, img32907.png)",101.69,101.69,1.0,119.888,119.888,1.0,123.562,123.562,1.0,117.345,...,1.0,195.585,195.585,1.0,212.598,212.598,1.0,228.722,228.722,1.0


## set up features from pickle to csv

In [None]:
# area_vec = [area_startx, area_starty, area_distx, area_disty]
# these are placeholders fine for now
female_side_mat = scipy.io.loadmat(project_path + "/behaviors/221002_PZ71_1_male_side.mat")
female_side_vec = female_side_mat['croprect'][0]
male_side_mat = scipy.io.loadmat(project_path + "/behaviors/221002_PZ71_1_female_side.mat")
male_side_vec = male_side_mat['croprect'][0]

[nframes, ncols] = [len(frames), len(df.columns)]

- `mouse1_feature_points['snout'].loc[[100]]`

In [None]:
# get all data ready for accessing as needed later on

# names of csv individuals
individual1 = 'ind1'
individual2 = 'ind2'
#individual3 = 'ind3'

# getting feature points
mouse1_feature_points = {}
mouse1_feature_points['snout'] = pd.DataFrame([df['x_snout_0'],df['y_snout_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['snout'].columns = ["x", "y"]
mouse1_feature_points['leftear'] = pd.DataFrame([df['x_leftear_0'],df['y_leftear_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['leftear'].columns = ["x", "y"]
mouse1_feature_points['rightear'] = pd.DataFrame([df['x_rightear_0'],df['y_rightear_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['rightear'].columns = ["x", "y"]
mouse1_feature_points['shoulder'] = pd.DataFrame([df['x_shoulder_0'],df['y_shoulder_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['shoulder'].columns = ["x", "y"]
mouse1_feature_points['spine1'] = pd.DataFrame([df['x_spine1_0'],df['y_spine1_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['spine1'].columns = ["x", "y"]
mouse1_feature_points['spine2'] = pd.DataFrame([df['x_spine2_0'],df['y_spine2_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['spine2'].columns = ["x", "y"]
mouse1_feature_points['spine3'] = pd.DataFrame([df['x_spine3_0'],df['y_spine3_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['spine3'].columns = ["x", "y"]
mouse1_feature_points['spine4'] = pd.DataFrame([df['x_spine4_0'],df['y_spine4_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['spine4'].columns = ["x", "y"]
mouse1_feature_points['tailbase'] = pd.DataFrame([df['x_tailbase_0'],df['y_tailbase_0']]).transpose().reset_index(drop=True)
mouse1_feature_points['tailbase'].columns = ["x", "y"]

mouse2_feature_points = {}
mouse2_feature_points['snout'] = pd.DataFrame([df['x_snout_1'],df['y_snout_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['snout'].columns = ["x", "y"]
mouse2_feature_points['leftear'] = pd.DataFrame([df['x_leftear_1'],df['y_leftear_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['leftear'].columns = ["x", "y"]
mouse2_feature_points['rightear'] = pd.DataFrame([df['x_rightear_1'],df['y_rightear_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['rightear'].columns = ["x", "y"]
mouse2_feature_points['shoulder'] = pd.DataFrame([df['x_shoulder_1'],df['y_shoulder_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['shoulder'].columns = ["x", "y"]
mouse2_feature_points['spine1'] = pd.DataFrame([df['x_spine1_1'],df['y_spine1_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['spine1'].columns = ["x", "y"]
mouse2_feature_points['spine2'] = pd.DataFrame([df['x_spine2_1'],df['y_spine2_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['spine2'].columns = ["x", "y"]
mouse2_feature_points['spine3'] = pd.DataFrame([df['x_spine3_1'],df['y_spine3_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['spine3'].columns = ["x", "y"]
mouse2_feature_points['spine4'] = pd.DataFrame([df['x_spine4_1'],df['y_spine4_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['spine4'].columns = ["x", "y"]
mouse2_feature_points['tailbase'] = pd.DataFrame([df['x_tailbase_1'],df['y_tailbase_1']]).transpose().reset_index(drop=True)
mouse2_feature_points['tailbase'].columns = ["x", "y"]

# commented out because ran with n_tracks = 2
#mouse3_feature_points = {}
#mouse3_feature_points['snout'] = get_data('mus3', 'snout', h5_file)
#mouse3_feature_points['shoulder'] = get_data('mus3', 'shoulder', h5_file)
#mouse3_feature_points['spine1'] = get_data('mus3', 'spine1', h5_file)
#mouse3_feature_points['spine2'] = get_data('mus3', 'spine2', h5_file)
#mouse3_feature_points['spine3'] = get_data('mus3', 'spine3', h5_file)
#mouse3_feature_points['spine4'] = get_data('mus3', 'spine4', h5_file)
#mouse3_feature_points['tailbase'] = get_data('mus3', 'tailbase', h5_file)

In [None]:
# male_side_vec = [x, y, width, height]; female_side_vec = [x, y, width, height]
relevant_area = [0, 0, 400, 400]

## calculate all i features first:

In [None]:
#pd.read_csv('file_name.csv', usecols= ['column_name1','column_name2'])

In [None]:
# takes a while
num_features = 8
total_frames = nframes
all_i_features = get_all_i_features(total_frames)

In [None]:
all_i_features[1]

array([  1.        ,   0.        ,   8.79357993,   1.        ,
       371.82502982,   0.        ,   0.        ,   0.        ])

## load in mount or nonmount labels

In [None]:
# write in a csv and then load in

## Get specific input frame features function
- ex. `get_input_features(frame_examples, i_features)`


In [None]:
def get_input_features(frame_examples, i_features):
  input_features = np.array([all_i_features[i, 0:num_features-1].flatten() for i in frame_examples])

  return np.nan_to_num(input_features, nan=-1)

# uh

In [None]:
with open(project_path + "/classifiers/" +video_name+ '_i_features_temporal2.pickle', 'wb') as f:
    pickle.dump(all_i_features, f)

In [None]:
#i_features_file = open(project_path + "/classifiers/" + video_name + '_i_features_temporal2.pickle', 'rb')
#i_features = pickle.load(i_features_file)
#i_features_file.close()

#mount example features

In [None]:
training_mount_features = get_input_features(training_mount_examples, i_features)

#nonmount example features
- requires previous section (mount example features) to have already run

In [None]:
forest_example_behavior = corrected_behavior.copy()
for i in range(0, 36001):
  beh = corrected_behavior["behavior"][i]
  if beh == 'mount':
    # this is probably redundant but oh well it takes 3s
    # 30 frames = 1 second surround
    forest_example_behavior["behavior"][i-30:i] = 'ignore'
    forest_example_behavior["behavior"][i:i+30] = 'ignore'

In [None]:
# find all frames with at least 1 individual
training_nonmount_examples = [None] * 36000
n_examples = 0
for i in range(30, 35970):
  if forest_example_behavior["behavior"][i] != 'ignore':
    training_nonmount_examples[n_examples] = [i]
    n_examples = n_examples + 1
training_nonmount_examples[n_examples:] = []

num_nonmount_examples = len(training_nonmount_examples)
print(num_nonmount_examples)

28186


In [None]:
training_nonmount_features = get_input_features(training_nonmount_examples, i_features)

#all features together

In [None]:
random_indices = np.random.choice(training_nonmount_features.shape[0], num_nonmount_examples, replace=False)
final_training_nonmount_features = training_nonmount_features[random_indices]

random_indices = np.random.choice(training_nonmount_features.shape[0], num_nonmount_examples, replace=False)
final_testing_nonmount_features = training_nonmount_features[random_indices]

In [None]:
print(num_mount_examples)
print(num_nonmount_examples)

5744
28186


In [None]:
print(training_mount_features.mean(axis=0)[0:122*2]-training_nonmount_features.mean(axis=0)[0:122*2])
#print(training_nonmount_features.mean(axis=0)[0:122*2])

[ 4.90751276e-02  1.95118007e+00 -1.40812761e-02 -1.16902966e+00
  4.89686918e-02  1.86201120e+00 -1.38040438e-02 -1.16388206e+00
  4.87236398e-02  1.78432177e+00 -1.33527169e-02 -1.13413823e+00
  4.85140665e-02  1.71176169e+00 -1.32850581e-02 -1.13468010e+00
  4.85495451e-02  1.67134662e+00 -1.32528778e-02 -1.12334028e+00
  4.82723129e-02  1.61246244e+00 -1.25919775e-02 -1.06756709e+00
  4.83787487e-02  1.59023238e+00 -1.17569826e-02 -9.84837832e-01
  4.82756113e-02  1.54408459e+00 -1.16860254e-02 -1.00506562e+00
  4.82046540e-02  1.50046721e+00 -1.15474093e-02 -1.03154962e+00
  4.81336968e-02  1.46179564e+00 -1.12314002e-02 -1.04343413e+00
  4.77500288e-02  1.37429021e+00 -1.06381588e-02 -1.02494962e+00
  4.77500288e-02  1.33806103e+00 -1.04640641e-02 -9.69485769e-01
  4.75759341e-02  1.27037703e+00 -1.03221497e-02 -8.99642853e-01
  4.78209860e-02  1.23026655e+00 -1.02866711e-02 -8.48846699e-01
  4.82046540e-02  1.19358450e+00 -9.44837784e-03 -7.19207246e-01
  4.80305593e-02  1.10734

In [None]:
training_features = np.vstack((training_mount_features, final_training_nonmount_features))
testing_features = np.vstack((training_mount_features, final_testing_nonmount_features))

In [None]:
training_labels = np.zeros(training_features.shape[0], dtype=int)
training_labels[0:num_mount_examples] = 1

testing_labels = np.zeros(testing_features.shape[0], dtype=int)
testing_labels[0:num_mount_examples] = 1

In [None]:
print(num_mount_examples)
print(num_nonmount_examples)

5744
28186


In [None]:
# save features and labels
with open(project_path + '/classifiers/' + video_name + '_training_features.pickle', 'wb') as f:
    pickle.dump(training_features, f)
with open(project_path + '/classifiers/' + video_name + '_training_labels.pickle', 'wb') as f:
    pickle.dump(training_labels, f)

#features from multiple videos

In [None]:
training_features = np.array([])
training_labels = np.array([])

In [None]:
video_name = 'PZ71_1'
video_training_features_file = open(output_dir + video_name + '_training_features.pickle', 'rb')
video_training_features = pickle.load(video_training_features_file)
video_training_features_file.close()

video_training_labels_file = open(output_dir + video_name + '_training_labels.pickle', 'rb')
video_training_labels = pickle.load(video_training_labels_file)
video_training_labels_file.close()

In [None]:
training_features = np.vstack([training_features, video_training_features]) if training_features.size else video_training_features
training_labels = np.vstack([training_labels, video_training_labels]) if training_labels.size else video_training_labels

#smote

In [None]:
#Importing essential libraries
import matplotlib.pyplot as plt
from statistics import mean
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import plot_confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE

#Use SMOTE to oversample the minority class
oversample = SMOTE()
over_X, over_y = oversample.fit_resample(training_features, training_labels)
over_X_train, over_X_test, over_y_train, over_y_test = train_test_split(over_X, over_y, test_size=0.1, stratify=over_y)
#Build SMOTE SRF model
SMOTE_SRF = RandomForestClassifier(n_estimators=150, random_state=0)
#Create Stratified K-fold cross validation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
scoring = ('f1', 'recall', 'precision')
#Evaluate SMOTE SRF model
scores = cross_validate(SMOTE_SRF, over_X, over_y, scoring=scoring, cv=cv)
#Get average evaluation metrics
print('Mean f1: %.3f' % mean(scores['test_f1']))
print('Mean recall: %.3f' % mean(scores['test_recall']))
print('Mean precision: %.3f' % mean(scores['test_precision']))

#Randomly spilt dataset to test and train set
X_train, X_test, y_train, y_test = train_test_split(training_features, training_labels, test_size=0.1, stratify=training_labels)
#Train SMOTE SRF
SMOTE_SRF.fit(over_X_train, over_y_train)
#SMOTE SRF prediction result
y_pred = SMOTE_SRF.predict(X_test)
#Create confusion matrix
fig = plot_confusion_matrix(SMOTE_SRF, X_test, y_test, display_labels=['Not Mount', 'Mount'], cmap='Greens')
plt.title('smote, standrfc confusion mat')
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(testing_features, training_labels, test_size=0.95, stratify=training_labels)
y_pred = SMOTE_SRF.predict(testing_features)
# Making the Confusion Matrix
print(pd.crosstab(testing_labels, y_pred, rownames=['Actual'], colnames=['Predicted']))

Predicted      0     1
Actual                
0          28159    27
1            352  5392


# save classifer

In [None]:
# save the model
with open(classifiers_dir + 'classifer_temporal2.pickle', 'wb') as f:
  pickle.dump(SMOTE_SRF, f)

#pickle to csv

In [None]:
# load a pickle
video_name ='PZ70_1'
pickle_file = open(output_dir + video_name +'_intro_events.pickle', 'rb')
pickle_data = pickle.load(pickle_file)
pickle_file.close()

In [None]:
import csv

with open(output_dir + video_name +'_intro_events.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(pickle_data.columns)
    for i in range(0, pickle_data.shape[0]):
      # write the data
      writer.writerow(pickle_data.values[i,:])