In [27]:
import os
import pandas as pd
from glob import glob

In [2]:
# F = staring at the camera/inspecting the camera
# P = No reaction
# A = coming closer to the camera (because the camera was spotted. We can see that the chimpanzee noticed the camera, and they are now
#     getting closer to inspect it)
# R = Moving away from the camera (because the camera was spotted. We can see that the chipmanzee noticed the camera, and they are now
#     going away from it, often quicky as affraid of it)

#### **North East**

In [3]:
df_ne = pd.read_csv("DS_North_East/20220406_DSNE_Chimp.txt", delimiter="\t")
print(f"Number of videos: {df_ne.VideoName.nunique()}")

Number of videos: 873


In [4]:
# Replace lower case p with upper case P
df_ne.Reaction.replace({"p": "P"}, inplace=True)

In [5]:
def extract_videos_reactions(df, keep_p=False):
    r, no_r, react, no_react = [], [], [], []
    for video in df.VideoName.unique():
        reactions = set(df[df.VideoName == video]["Reaction"])
        # check if A or P or N is in reactions
        if ("A" in reactions) or ("F" in reactions) or ("R" in reactions):
            r.append(video)
            if not keep_p:
                if "P" in reactions:
                    reactions.remove("P")
            react.append(list(reactions))
        else:
            no_r.append(video)
            no_react.append(list(reactions))
    return r, no_r, react, no_react

In [6]:
# TODO: consider whether excluding P from multilabel is correct...


def create_multilabel_df(r, no_r, react, no_react):
    df = pd.DataFrame({"video": r, "react": react})
    df["inspection"] = df["react"].apply(lambda x: True if "F" in x else False)
    df["attraction"] = df["react"].apply(lambda x: True if "A" in x else False)
    df["avoidance"] = df["react"].apply(lambda x: True if "R" in x else False)
    df["no_reaction"] = False

    tmp = pd.DataFrame(
        {
            "video": no_r,
            "react": no_react,
            "inspection": False,
            "avoidance": False,
            "attraction": False,
            "no_reaction": True,
        }
    ).dropna()

    df = pd.concat([df, tmp])
    to_label = lambda x: ",".join([str(i) for i in x])
    df["label"] = (
        df[df.columns[2:]].astype(int).apply(to_label, axis=1).astype("category")
    )
    df["label"] = df.label.str.split(",")
    df["label"] = df.label.apply(lambda x: [int(i) for i in x])

    return df

In [7]:
r, no_r, react, no_react = extract_videos_reactions(df=df_ne, keep_p=False)
ne_df = create_multilabel_df(r, no_r, react, no_react)

In [8]:
ne_df

Unnamed: 0,video,react,inspection,attraction,avoidance,no_reaction,label
0,20200415_101022_Pan troglodytes verus_273137_1...,"[F, A]",True,True,False,False,"[1, 1, 0, 0]"
1,20200402_071827_Pan troglodytes verus_271696_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
2,20200402_084859_Pan troglodytes verus_271696_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
3,20200325_091743_Pan troglodytes verus_270934_1...,"[F, R]",True,False,True,False,"[1, 0, 1, 0]"
4,20200330_151611_Pan troglodytes verus_270934_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
...,...,...,...,...,...,...,...
666,20200608_140943_Pan troglodytes verus_268743_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
667,20200522_074814_Pan troglodytes verus_270004_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
668,20200422_155433_Pan troglodytes verus_288938_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
669,20200704_192106_Pan troglodytes verus_285951_1...,[P],False,False,False,True,"[0, 0, 0, 1]"


In [9]:
ne_df.video = ne_df.video.str.rstrip(" ").str.replace(" ", "_").str.lower()

#### **North West**

In [10]:
df_nw = pd.read_csv("DS_North_West/20220331_DSNW_Chimp.txt", delimiter="\t")
print(f"Number of videos: {df_nw.VideoName.nunique()}")

Number of videos: 382


In [11]:
r, no_r, react, no_react = extract_videos_reactions(df=df_nw, keep_p=False)
nw_df = create_multilabel_df(r, no_r, react, no_react)

In [12]:
nw_df

Unnamed: 0,video,react,inspection,attraction,avoidance,no_reaction,label
0,20201025_113755_Pan troglodytes verus_250256_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
1,20201027_142211_Pan troglodytes verus_258806_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
2,20201121_111607_Pan troglodytes verus_250256_1...,"[F, R]",True,False,True,False,"[1, 0, 1, 0]"
3,20201223_110755_Pan troglodytes verus_255956_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
4,20210322_063617_Pan troglodytes verus_246603_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
...,...,...,...,...,...,...,...
316,20210305_114417_Pan troglodytes verus_264506_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
317,20210305_114620_Pan troglodytes verus_264506_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
318,20210305_115136_Pan troglodytes verus_258313_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
319,20210323_104748_Pan troglodytes verus_258313_1...,[P],False,False,False,True,"[0, 0, 0, 1]"


In [14]:
nw_df.video = (
    nw_df.video.str.rstrip(" ").str.lstrip(" ").str.replace(" ", "_").str.lower()
)

In [15]:
nw_df

Unnamed: 0,video,react,inspection,attraction,avoidance,no_reaction,label
0,20201025_113755_pan_troglodytes_verus_250256_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
1,20201027_142211_pan_troglodytes_verus_258806_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
2,20201121_111607_pan_troglodytes_verus_250256_1...,"[F, R]",True,False,True,False,"[1, 0, 1, 0]"
3,20201223_110755_pan_troglodytes_verus_255956_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
4,20210322_063617_pan_troglodytes_verus_246603_1...,[F],True,False,False,False,"[1, 0, 0, 0]"
...,...,...,...,...,...,...,...
316,20210305_114417_pan_troglodytes_verus_264506_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
317,20210305_114620_pan_troglodytes_verus_264506_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
318,20210305_115136_pan_troglodytes_verus_258313_1...,[P],False,False,False,True,"[0, 0, 0, 1]"
319,20210323_104748_pan_troglodytes_verus_258313_1...,[P],False,False,False,True,"[0, 0, 0, 1]"


**Write to CSV**

In [16]:
nw_df.to_csv("guinea_nw.csv", index=False)
ne_df.to_csv("guinea_ne.csv", index=False)