In [82]:
import polars as pl
import numpy as np

In [83]:
data = pl.read_csv("../../data/animals.csv")
data = data.drop(["longitude_se", "latitude_se", "abbreviated_name"])

In [84]:
from datetime import datetime

data = (
    data
    .with_columns(pl.col("date").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").year, return_dtype=int).alias("year"))
    .with_columns(pl.col("date").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").month, return_dtype=int).alias("month"))
    .with_columns(pl.col("date").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").day, return_dtype=int).alias("day"))
    .with_columns(pl.col("date").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").hour, return_dtype=int).alias("hour"))
)

In [85]:
data_processed = (
    data
    .sort(by = "hour")
    .group_by(["individual_id", "year", "month", "day"])
    .agg(pl.all().last())
)
data_processed = data_processed.sort(by=["year", "month", "day", "hour", "individual_id"], descending=[False, False, False, False, False])

In [86]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radio de la Tierra en km
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    return R * c  # Distancia en km


In [87]:
data_processed = data_processed.with_columns([
    pl.col("decimal_latitude").shift(1).over("individual_id").alias("prev_lat"),
    pl.col("decimal_longitude").shift(1).over("individual_id").alias("prev_lon")
])

In [88]:
# Aplicar la función de Haversine usando `map`
data_processed = data_processed.with_columns(
    pl.struct(["decimal_latitude", "decimal_longitude", "prev_lat", "prev_lon"]).map_elements(
        lambda row: haversine(row["prev_lat"], row["prev_lon"], row["decimal_latitude"], row["decimal_longitude"])
        if row["prev_lat"] is not None else None, return_dtype=pl.Float64
    ).alias("distance_km")
)

In [89]:
data_processed = data_processed.with_columns(
    (pl.col("decimal_latitude") - pl.col("prev_lat")).alias("delta_lat"),
    (pl.col("decimal_longitude") - pl.col("prev_lon")).alias("delta_lon")
)

# Calcular el ángulo en radianes y convertirlo a grados
data_processed = data_processed.with_columns(
    pl.struct(["delta_lat", "delta_lon"])
    .map_elements(lambda d: np.degrees(np.arctan2(d["delta_lat"], d["delta_lon"])) if d["delta_lat"] is not None and d["delta_lon"] is not None else None,
                  return_dtype=pl.Float64)
    .alias("angle_degrees")
)
data_processed.filter(pl.col("individual_id") == "M30_44554").head()

individual_id,year,month,day,date,decimal_longitude,decimal_latitude,hour,prev_lat,prev_lon,distance_km,delta_lat,delta_lon,angle_degrees
str,i64,i64,i64,str,f64,f64,i64,f64,f64,f64,f64,f64,f64
"""M30_44554""",2003,12,25,"""2003-12-25 22:30:11""",74.210189,-53.305201,22,,,,,,
"""M30_44554""",2003,12,26,"""2003-12-26 22:30:11""",73.670844,-53.169558,22,-53.305201,74.210189,38.933636,0.135643,-0.539345,165.883131
"""M30_44554""",2003,12,27,"""2003-12-27 22:30:11""",73.828314,-53.228181,22,-53.169558,73.670844,12.349546,-0.058622,0.157469,-20.419189
"""M30_44554""",2003,12,28,"""2003-12-28 22:30:11""",73.56542,-53.162012,22,-53.228181,73.828314,18.995692,0.066169,-0.262893,165.87236
"""M30_44554""",2003,12,29,"""2003-12-29 22:30:11""",74.599275,-53.36752,22,-53.162012,73.56542,72.456286,-0.205508,1.033855,-11.242603
