## DLC Component

In [None]:
from behavysis_pipeline import *

import traceback


In [None]:
proj_dir = r"."

proj = BehavysisProject(proj_dir)
proj.importExperiments()

exp = proj.getExperiments()[1]


In [None]:
proj.updateConfigFile(
    default_configs_fp=os.path.join(proj_dir, "default.json"),
    overwrite="set",
)


In [None]:
proj.formatVid(
    funcs=(
        # formatVid,
        getVidMetadata,
    ),
    overwrite=True,
)


In [None]:
proj.runDLC(
    gputouse=None,
    overwrite=True,
)


In [None]:
proj.calculateParams((
    calcStartFrame,
    calcEndFrame,
    calcPXPerMM,
))


In [None]:
proj.preprocess(
    (
        trimToStartEnd,
        interpolatePoints,
        calcBodyCentre,
        refineIdentities,
    ),
    overwrite=True,
)


## SIMBA component


In [None]:
proj.extractFeatures(True, True)


## Train Classifier from extracted features


ALL saved in `model` folder.
Steps:
* Data prep
    * Make a configs_json, which holds train/test experiment split
    * Prepare attributes df in single large DF (extra index midx level "experiment")
    * Prepare behaviour labels df (each column has a separate classifier though) in single large DF (extra index midx level "experiment", and extra column midx level "outcome" ("actual"))
    * Ensure that both X_all and y_all have the same rows (midx is ("experiment", "Frame"))
    * DO THE SAME FOR X_train and X_test SETS. Split videos into subset for each.
* Make individual folders for each behaviour we want to train a classifier for (in `behavs_ls`):
    * Copy X dfs to each folder
    * Save relevant columns of y dfs to each folder
    * Copy configs json
* For each behav folder, X_all preprocessing
    * Random undersampler (select subset of majority class): this seems to work best. Alternatives are random oversampling (repeat minority class instances).
        * Undersample X_all and X_train. DO NOT do for X_test.
* Define Classifier
    * GradientBoost(): This seems to work best. Alternatives are RF, XGBoost, and Keras MLP.
* Run Classifier
    * From saved classifier hyper-params.
* Evaluate
    * Using novel videos (from where though?? Maybe save some videos for a X_train and X_test dataset) to create:
        * Sorted probability results logistic graph (line for probabilities, points for actuals, vline for threshold).
        * Accuracy, Precision, Recall, F1 graph for range of threshold (from 0 to 1).
        * Timeseries probabilities against actuals lineplot for each video.
        * Annotated video with predicted vs actual behavs.


In [None]:
# MAKING BEHAVS FEATHER DF - SHOULD ONLY BE DONE IN THIS INSTANCE
# BECAUSE I'M DERIVING "fights" FROM "green_fights" and "white_fights"
# READING BORIS DATA SHOULD ALSO ONLY BE A SPECIAL CASE

in_dir = os.path.join(r"Z:\PRJ-BowenLab\TimLee\resources\project_ma_new", "scored_old")
out_dir = os.path.join(r"Z:\PRJ-BowenLab\TimLee\resources\project_ma_new", "7_scored_behavs")
for i in os.listdir(in_dir):
    name = getName(i)
    df = pd.read_csv(os.path.join(in_dir, f"{name}.csv"))
    for j in ["white_fights", "green_fights"]:
        if j not in df.columns:
            df[j] = 0
    new_df = pd.DataFrame(index=df["Frame"].astype(np.int64))
    # Making new columns
    new_df[("fight", BEHAV_ACTUAL_COL)] = ((df["green_fights"] == 1) | (df["white_fights"] == 1)).astype(np.uint8)
    new_df[("marked_fight", BEHAV_ACTUAL_COL)] = df["green_fights"]
    new_df[("unmarked_fight", BEHAV_ACTUAL_COL)] = df["white_fights"]
    # Formatting column names
    new_df.columns = pd.MultiIndex.from_tuples(new_df.columns, names=BEHAV_COLUMN_NAMES)
    writeFeather(new_df, os.path.join(out_dir, f"{name}.feather"))



In [None]:
root_clf = BehavClassifier.from_BehavysisProject(proj)


In [None]:
root_clf.combine_dfs(
    os.path.join(proj.dir, "5_features_extracted"),
    os.path.join(proj.dir, "7_scored_behavs"),
)


In [None]:
root_clf.make_train_test_split()


In [None]:
behav_clf_ls = []
behavs_ls = ["fight", "marked_fight", "unmarked_fight"]
for behav in behavs_ls:
    behav_clf_ls.append(root_clf.make_behav_model_subdir(behav))

# behav_clf_ls = []
# behavs_ls = ["fight", "marked_fight", "unmarked_fight"]
# for behav in behavs_ls:
#     behav_clf_ls.append(BehavClassifier(os.path.join(proj.dir, "behav_models", behav, "model_configs.json")))


In [None]:
for behav_clf in behav_clf_ls:
    behav_clf.make_random_undersample()


In [None]:
for behav_clf in behav_clf_ls:
    behav_clf.init_behav_classifier()


In [None]:
for behav_clf in behav_clf_ls:
    behav_clf.train_behav_classifier()


In [None]:
for behav_clf in behav_clf_ls:
    behav_clf.model_eval()


In [None]:
# proj.classifyBehaviour(True)


In [None]:
# TODO: postprocess behav inference (e.g. min-bout duration)


In [None]:
proj.evaluate(
    (
        evalVid,
        # keypointsPlot
    ),
    overwrite=True,
)


### Cleaning up

In [None]:
# import shutil
# import os

for i in ["0_configs", "2_formatted_vid", "3_dlc_csv", "4_preprocessed_csv", "5_features_extracted", "6_predicted_behavs", "analysis", "diagnostics", "evaluate"]:
    if os.path.exists(os.path.join(proj_dir, i)):
        shutil.rmtree(os.path.join(proj_dir, i))
