In [None]:
from labda import Subject
from pathlib import Path
from prepare_data import (
    randomly_remove_data,
    get_origin_geodataframe,
    get_interpolate_geodataframe,
    get_location_deviation,
)
from labda.spatial.interpolate import interpolate
import pandas as pd

files = Path("../../temp/Carlson/Processed").glob("*.parquet")

max_missing_bout = "1m"
min_continous_bout = "2h"
max_duration = "15m"
samples = (2, 10)

window = "5m"

results = []

for file in files:
    sbj = Subject.from_parquet(file)
    print(sbj.id)
    crs = sbj.metadata.crs
    sampling_frequency = sbj.metadata.sampling_frequency
    try:
        df = randomly_remove_data(
            sbj.df,
            sampling_frequency,
            max_missing_bout,
            min_continous_bout,
            max_duration,
            samples,
        )
    except Exception as e:
        print(f"Error processing {sbj.id}: {e}")
        continue

    df_removed = df.loc[~df["remove"]]
    df = get_origin_geodataframe(df, sampling_frequency, crs)

    linear = interpolate(
        df_removed, sampling_frequency, crs, limit=max_duration, method="linear"
    )
    linear = get_interpolate_geodataframe(linear, sampling_frequency, crs)
    linear = get_location_deviation(df, linear, crs)
    linear.name = "linear"

    meseck = interpolate(
        df_removed,
        sampling_frequency,
        crs,
        limit=max_duration,
        method="meseck",
        window=window,
    )
    meseck = get_interpolate_geodataframe(meseck, sampling_frequency, crs)
    meseck = get_location_deviation(df, meseck, crs)
    meseck.name = "meseck"

    hwang = interpolate(
        df_removed,
        sampling_frequency,
        crs,
        limit=max_duration,
        method="hwang",
        window=window,
    )
    hwang = get_interpolate_geodataframe(hwang, sampling_frequency, crs)
    hwang = get_location_deviation(df, hwang, crs)
    hwang.name = "hwang"

    result = pd.concat([linear, meseck, hwang], axis=1)
    result["subject"] = sbj.id
    results.append(result)

116
108
89
94
86
109
80
111
88
91
85
114
122
Error processing 122: DataFrame is empty.
93
115
117
102
103
105
123
97
96
101
78
79
112
99
82
113
118
92
124
119
121
98
120
87
90
100
104


In [None]:
from labda import Subject
from prepare_data import (
    randomly_remove_data,
)
from labda.spatial.interpolate import interpolate
import pandas as pd

max_missing_bout = "1m"
min_continous_bout = "2h"
max_duration = "15m"
samples = (2, 10)

window = "5m"

sbj = Subject.from_parquet("../../temp/Carlson/Processed/90.parquet")
print(sbj.id)
crs = sbj.metadata.crs
sampling_frequency = sbj.metadata.sampling_frequency
# df = randomly_remove_data(
#         sbj.df,
#         sampling_frequency,
#         max_missing_bout,
#         min_continous_bout,
#         max_duration,
#         samples,
#     )

# df_removed = df.loc[~df["remove"]]
# df = get_origin_geodataframe(df, sampling_frequency, crs)

df_removed = pd.read_parquet("error.parquet")

hwang = interpolate(
    df_removed,
    sampling_frequency,
    crs,
    limit=max_duration,
    method="hwang",
    window=window,
)

hwang["impute"].value_counts(dropna=False)

90
DataFrame is empty or has only two rows. Future features cannot be calculated.
DataFrame is empty or has only two rows. Future features cannot be calculated.


impute
False            1785
stop              636
move              129
interpolate        30
stop-and-move      20
Name: count, dtype: int64

In [None]:
df_removed.to_parquet("error.parquet")

In [14]:
analysis = pd.concat(results, axis=0)
analysis[["linear", "meseck", "hwang"]].mean()

linear     82.106679
meseck    346.359766
hwang      84.202062
dtype: float64