In [1]:
import numpy as np
import pandas as pd

from sqlalchemy import create_engine
from sqlalchemy.types import VARCHAR, Integer, Float

In [245]:
connection_str = 'postgresql:///measureyes'
engine = create_engine(connection_str, echo=False)

In [246]:
target_vid = 'Measureyes_0924_01'

Query_raw = ("""
SELECT * 
FROM {}
ORDER BY person_index, timestamp
;""".format(target_vid)
            )

In [247]:
# Import all records for video
all_recs = pd.read_sql_query(Query_raw, engine)

In [249]:
# get the sample rate in milliseconds; it's timestamp at record idx 1
sample_rate = all_recs.loc[1, 'timestamp']
print("sample rate in milliseconds:", sample_rate)

sample rate in milliseconds: 41


In [250]:
# Get dataframe of only records where yaw was recorded
pose_recs = all_recs.dropna(subset=['face_yaw'])

cols = ['timestamp', 'person_index', 'face_yaw', 'face_pitch', 'face_box_top', 'face_box_left']
pose_recs = pose_recs[cols]

In [251]:
pose_recs.head(15)

Unnamed: 0,timestamp,person_index,face_yaw,face_pitch,face_box_top,face_box_left
893,11386,19,-32.426006,-4.658849,0.340278,0.096094
2046,6172,31,-31.617493,-2.338707,0.372222,0.798437
2058,5797,32,-2.420268,0.07677,0.370833,0.908594
2059,6798,32,-14.524308,-2.389813,0.336111,0.225
2061,8383,32,-60.660793,-12.552453,0.323611,0.892187
2066,9175,32,-44.529732,-2.461496,0.3375,0.667188
2072,9968,32,-43.117874,5.770154,0.298611,0.799219
2081,10760,32,45.096027,5.798729,0.225,0.867969
2087,11177,32,-43.739269,-16.429325,0.334722,0.288281
2092,11594,32,28.041138,3.179321,0.306944,0.83125


In [252]:
print(pose_recs['person_index'].unique())
print(len(pose_recs['person_index'].unique()))

[ 19  31  32  33  34  36  37  38  43  46  47  53  57  58  61  62  63  67
  74 112 146 149 153 155 157 165]
26


In [253]:
# Eight most commonly spotted persons with yaws recorded
pd.value_counts(pose_recs['person_index'])[:8]

32     153
34     132
33      61
46      57
47      46
67      19
112      4
149      4
Name: person_index, dtype: int64

In [254]:
# Grab and re-index a dataframe of records from only one person_index
df_indiv = pose_recs[pose_recs['person_index'] == 46]
df_indiv.index = range(len(df_indiv))

In [255]:
df_indiv.tail(15)

Unnamed: 0,timestamp,person_index,face_yaw,face_pitch,face_box_top,face_box_left
42,73239,46,14.556217,-5.471243,0.227778,0.828125
43,73448,46,17.370554,2.666018,0.219444,0.730469
44,73656,46,-6.165985,2.091419,0.25,0.628125
45,73823,46,-27.48884,-2.543867,0.25,0.551562
46,74240,46,-32.041859,-2.608912,0.226389,0.4125
47,74449,46,-29.948095,-0.328268,0.230556,0.367969
48,74616,46,-29.207191,-2.716384,0.229167,0.334375
49,74824,46,-22.801897,-6.043785,0.225,0.292188
50,75033,46,-13.164354,-7.152011,0.2125,0.250781
51,75241,46,-7.517159,-5.063367,0.226389,0.224219


In [256]:
# Turn off an annoying and irrelevant SettingWithCopy error: see https://stackoverflow.com/questions/42105859/pandas-map-to-a-new-column-settingwithcopywarning
pd.options.mode.chained_assignment = None

In [257]:
# Make column that shows timedelta from previous row
df_indiv['timediff'] = df_indiv['timestamp'].diff().fillna(0)

In [258]:
cols2 = ['person_index', 'timestamp', 'timediff', 'face_yaw']
df_indiv = df_indiv[cols2]
df_indiv.tail(12)

Unnamed: 0,person_index,timestamp,timediff,face_yaw
45,46,73823,167.0,-27.48884
46,46,74240,417.0,-32.041859
47,46,74449,209.0,-29.948095
48,46,74616,167.0,-29.207191
49,46,74824,208.0,-22.801897
50,46,75033,209.0,-13.164354
51,46,75241,208.0,-7.517159
52,46,75450,209.0,-16.137062
53,46,75617,167.0,-27.744755
54,46,75825,208.0,-24.237137


In [259]:
# Grab indeces where time deltas indicate a gap between HTs
qq = list(df_indiv[df_indiv['timediff'] == 0].index)
pp = list(df_indiv[df_indiv['timediff'] >= 1500].index)
qq.extend(pp)
ht_starts = np.array(qq)
ht_starts

array([ 0,  7, 23, 41, 56])

In [260]:
# Records at HT starts
df_indiv.loc[ht_starts]

Unnamed: 0,person_index,timestamp,timediff,face_yaw
0,46,15765,0.0,2.307748
7,46,31990,14598.0,54.677425
23,46,54429,19228.0,21.048765
41,46,73031,13806.0,52.025948
56,46,123665,47423.0,38.229702


In [261]:
# Records at HT ends
ht_ends = list(ht_starts[1:] - 1)
ht_ends.append(-1)
ht_ends = np.array(ht_ends)
ht_ends

array([ 6, 22, 40, 55, -1])

In [265]:
df_indiv.iloc[ht_ends, :]

Unnamed: 0,person_index,timestamp,timediff,face_yaw
6,46,17392,417.0,68.354904
22,46,35201,417.0,-63.021053
40,46,59225,208.0,24.894844
55,46,76242,417.0,-53.926182
56,46,123665,47423.0,38.229702


In [266]:
combined = list(ht_starts)
combined.extend(list(ht_ends))
combined.sort()
combined.pop(0)
combined.append(-1)

combined

[6, 7, 22, 23, 40, 41, 55, 56, 56, -1]

In [267]:
# DF showing HT starts and ends
df_indiv.iloc[combined, :]

Unnamed: 0,person_index,timestamp,timediff,face_yaw
6,46,17392,417.0,68.354904
7,46,31990,14598.0,54.677425
22,46,35201,417.0,-63.021053
23,46,54429,19228.0,21.048765
40,46,59225,208.0,24.894844
41,46,73031,13806.0,52.025948
55,46,76242,417.0,-53.926182
56,46,123665,47423.0,38.229702
56,46,123665,47423.0,38.229702
56,46,123665,47423.0,38.229702


In [268]:
# Calculate a dwell time for a single HT
df_indiv.iloc[-1, 1] - df_indiv.loc[0, 'timestamp']

107900

In [270]:
# Generate array of dwells
dwells = []
if len(ht_starts) == 1:
    dwells.append(df_indiv.iloc[-1, 1] - df_indiv.loc[0, 'timestamp'])
else:
    for start, end in zip(ht_starts, ht_ends):
        start_time = df_indiv.iloc[start, 1]
        end_time = df_indiv.iloc[end, 1]
        dwells.append(end_time - start_time)
dwells = np.array(dwells)
dwells

array([1627, 3211, 4796, 3211,    0])

In [271]:
# Construct HT table for individual
df_HTs = df_indiv.iloc[ht_starts, :]
df_HTs['dwell'] = dwells / 1000.

cols = ['person_index', 'timestamp', 'dwell']

df_HTs = df_HTs[cols]

cols_renamed = ['person_index', 'HT_start', 'HT_dwell']
df_HTs.columns = cols_renamed

df_HTs

Unnamed: 0,person_index,HT_start,HT_dwell
0,46,15765,1.627
7,46,31990,3.211
23,46,54429,4.796
41,46,73031,3.211
56,46,123665,0.0


In [None]:
# next steps: 
# Filter by yaw
# Iterate this through each individual
#     Filter for dwell minimum
# append dataframes
# sort by HT start
# reindex
# Calculate stats: HT counts, HT distinct viewers, HTR
# put into a function