In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy import stats
import warnings
import sys
import seaborn as sns
import matplotlib.pyplot as plt
import swifter
sys.path.append('../')
pd.options.mode.chained_assignment = None 



In [2]:
#combine all CSV's of the same data
files = open("bees/files-short.txt")
filenames = []
while True:
    line = files.readline().strip()
    if not line:
        break
    filenames.append("bees/" + line)

vdf = pd.concat(map(pd.read_csv, filenames), ignore_index=True)

In [4]:
vdf['track_tagid'] = vdf['track_tagid'].astype(str)
vdf['track_starttime'] = pd.to_datetime(vdf['track_starttime'])
vdf['track_endtime'] = pd.to_datetime(vdf['track_endtime'])
#fill missing tagids
vdf['track_tagid'] = (
    vdf.groupby(['video_name', 'track_tagid'])['track_tagid']
       .transform(lambda s: s.ffill().bfill())
)
vdf.fillna(method='ffill', inplace=True)
vdf = vdf.sort_values(by=['track_tagid','track_tagid'])




In [5]:
#make tracks single rows
tracks = (vdf.groupby(['video_name','track_id'], as_index=False)
            .agg(track_tagid=('track_tagid','first'),
                 start=('track_starttime','first'),
                 end=('track_endtime','first')))

#sort by video name, track id and start
tracks = tracks.sort_values(['video_name','track_tagid','start'])

#obtain breakpoints of 5 second gaps
tracks['gap_s'] = (
    tracks['start'] - tracks.groupby(['video_name','track_tagid'])['end'].shift()
).dt.total_seconds()

#mark as breakpoints
tracks['split'] = tracks['gap_s'].isna() | (tracks['gap_s'] > 5)

#segment to obtain track values by individual ids
tracks['segment'] = tracks.groupby(['video_name','track_tagid'])['split'].cumsum()

#group by unique ids and tracks
tracks['group_id'] = tracks.groupby(['video_name','track_tagid','segment']).ngroup()


In [6]:
vdf = vdf.merge(
    tracks[['video_name','track_id','group_id']],
    on=['video_name','track_id'],
    how='left',
    suffixes=('','_new')
)

### GRAPH GENERATION

In [7]:
for i in vdf['group_id'].unique():
    detections = vdf[vdf['group_id'] == i]

    positions = detections['cy']
    x = detections['frame']
    plot = sns.lineplot(x=x,y=positions)
    plot.set_ylim(-200, 1500)
    plt = plot.get_figure()
    plt.savefig(f"graphs/{i}.png")
    plt.clf()


<Figure size 640x480 with 0 Axes>

In [15]:
vdf.groupby('group_id')['track_id'].nunique().loc[lambda s: s > 1]

group_id
1      7
4      2
5      5
6      2
7      2
      ..
409    2
415    2
424    2
437    3
450    2
Name: track_id, Length: 106, dtype: int64

In [18]:
vdf.to_csv("beeActivity.csv")