In [1]:
import pandas as pd
import vent_utils as vu

### Read the results CSV file

In [2]:
filename = '247.csv.gz'
df = pd.read_csv(filename)
df

Unnamed: 0,video_id,fish_id,frame_id,x1,y1,width,height,area,label,score,filename
0,247,1,0,592.869080,276.706116,55.851013,54.952942,3069.177482,open,0.998894,247.frame_000000.jpg
1,247,1,1,609.196472,276.714813,57.012772,55.993950,3192.370276,open,0.989953,247.frame_000001.jpg
2,247,1,2,628.898743,278.862549,52.703541,54.364429,2865.197927,open,0.998550,247.frame_000002.jpg
3,247,1,3,643.355469,279.635864,53.611305,54.800533,2937.928118,open,0.998755,247.frame_000003.jpg
4,247,1,4,658.778625,281.765778,53.618073,54.172813,2904.641838,open,0.991226,247.frame_000004.jpg
...,...,...,...,...,...,...,...,...,...,...,...
81394,247,4514,26992,46.918255,664.107056,76.438179,77.330719,5911.019342,open,0.997908,247.frame_026992.jpg
81395,247,4515,26983,1212.001465,417.816986,49.192627,53.977814,2655.310454,open,0.986124,247.frame_026983.jpg
81396,247,4515,26984,1199.562378,414.041138,48.106342,55.981174,2693.049542,open,0.983715,247.frame_026984.jpg
81397,247,4515,26985,1187.201538,414.104034,47.134453,52.103954,2455.891376,open,0.994734,247.frame_026985.jpg


### Count how many open/closed consecutive frames there are in every open/closed sequence
#### Note: this is a bit slow

In [3]:
vent_df, vent_df_no_nulls, fish_sizes_df = vu.process_tracks(
    tracks_df=df,
    drop_DJ_sequence=None,
    drop_DJ_fraction=0.5,
    n_impute_randomly=1,
    fix_open_within_closed=1,
    fix_single_frames_with_medians=False,
    impute_with_medians=False,
    extend_flanks_with_medians=False
)
vent_df

Dropping fish if DJ fraction is higher than 0.5
Imputing nulls of length 1 with random choice
Changing closed-open-closed to closed-closed-closed for open of 1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,label,size
video_id,fish_id,change_id,Unnamed: 3_level_1,Unnamed: 4_level_1
247,1,1,open,5
247,1,2,closed,15
247,1,3,open,8
247,1,4,closed,15
247,1,5,open,5
247,...,...,...,...
247,4513,11,open,2
247,4514,1,open,3
247,4514,2,closed,4
247,4514,3,open,10


### Filter only fish with head larger than 75 x 75 pixels (i.e. it is close enough)

In [4]:
ONLY_LARGER_THAN = 75 * 75
vent_df = vu.subset_vent_df_based_on_size(vent_df=vent_df, fish_sizes_df=fish_sizes_df, only_larger_than=ONLY_LARGER_THAN)

### Calculate the average length of open+closed duration for every fish

In [5]:
PER_STATUS = False
vent_lengths1 = vu.get_average_vent_length(vent_df=vent_df, estimator='mean', per_status=PER_STATUS, per_fish=True, remove_flanking=False)
vent_lengths1

video_id  fish_id
247       2          36.000000
          7          54.000000
          14          6.000000
          32         25.000000
          49         18.571429
                       ...    
          4431       18.000000
          4442        1.000000
          4465        9.500000
          4483        1.000000
          4514       10.500000
Name: size, Length: 353, dtype: float64

### Calculate mean across all the fish in the video

In [6]:
print('Number of samples')
display(vent_lengths1.groupby('video_id').count())
print('\nMEAN open+closed duration (frames)')
display(vent_lengths1.groupby('video_id').mean())

Number of samples


video_id
247    353
Name: size, dtype: int64


MEAN open+closed duration (frames)


video_id
247    17.349686
Name: size, dtype: float64

### Calculate the average length of open duration and closed duration for every fish

In [7]:
PER_STATUS = True
vent_lengths2 = vu.get_average_vent_length(vent_df=vent_df, estimator='mean', per_status=PER_STATUS, per_fish=True, remove_flanking=False)
vent_lengths2

video_id  fish_id  label 
247       2        open      36.0
          7        open      54.0
          14       open       6.0
          32       closed     7.6
                   open      17.4
                             ... 
          4465     closed     3.0
                   open       6.5
          4483     open       1.0
          4514     closed     4.0
                   open       6.5
Name: size, Length: 531, dtype: float64

### Calculate mean across all the fish in the video

In [8]:
print('Number of samples')
display(vent_lengths2.groupby(['video_id', 'label']).count())
print('\nMEAN duration (frames)')
display(vent_lengths2.groupby(['video_id', 'label']).mean())

Number of samples


video_id  label 
247       closed    225
          open      306
Name: size, dtype: int64


MEAN duration (frames)


video_id  label 
247       closed     4.296493
          open      16.855321
Name: size, dtype: float64