In [26]:
import polars as pl
import numpy as np

In [27]:
data = pl.read_csv("../../data/animals.csv")
data = data.drop(["longitude_se", "latitude_se", "abbreviated_name"])

In [28]:
from datetime import datetime

data = (
    data
    .with_columns(pl.col("date").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").year, return_dtype=int).alias("year"))
    .with_columns(pl.col("date").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").month, return_dtype=int).alias("month"))
    .with_columns(pl.col("date").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").day, return_dtype=int).alias("day"))
    .with_columns(pl.col("date").map_elements(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").hour, return_dtype=int).alias("hour"))
)

In [29]:
data_processed = (
    data
    .sort(by = "hour")
    .group_by(["individual_id", "year", "month", "day"])
    .agg(pl.all().last())
)
data_processed = data_processed.sort(by=["year", "month", "day", "hour", "individual_id"], descending=[False, False, False, False, False])

In [30]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radio de la Tierra en km
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    return R * c  # Distancia en km


In [31]:
data_processed = data_processed.with_columns([
    pl.col("decimal_latitude").shift(1).over("individual_id").alias("prev_lat"),
    pl.col("decimal_longitude").shift(1).over("individual_id").alias("prev_lon")
])

In [32]:
# Aplicar la función de Haversine usando `map`
data_processed = data_processed.with_columns(
    pl.struct(["decimal_latitude", "decimal_longitude", "prev_lat", "prev_lon"]).map_elements(
        lambda row: haversine(row["prev_lat"], row["prev_lon"], row["decimal_latitude"], row["decimal_longitude"])
        if row["prev_lat"] is not None else None, return_dtype=pl.Float64
    ).alias("distance_km")
)

In [33]:
data_processed = data_processed.with_columns(
    (pl.col("decimal_latitude") - pl.col("prev_lat")).alias("delta_lat"),
    (pl.col("decimal_longitude") - pl.col("prev_lon")).alias("delta_lon")
)

# Calcular el ángulo en radianes y convertirlo a grados
data_processed = data_processed.with_columns(
    pl.struct(["delta_lat", "delta_lon"])
    .map_elements(lambda d: np.degrees(np.arctan2(d["delta_lat"], d["delta_lon"])) if d["delta_lat"] is not None and d["delta_lon"] is not None else None,
                  return_dtype=pl.Float64)
    .alias("angle_degrees")
)

In [34]:
from datetime import datetime

# Parsear y formatear la fecha para que se pueda concatenar con los datos de Copernicus
data_processed = (
    data_processed
    .with_columns(
        pl.col("date").str.to_datetime("%Y-%m-%d %H:%M:%S").alias("date_format"),
    )
    .with_columns(
        pl.col("date_format").map_elements(
            lambda x: datetime.strftime(x, "%Y-%m-%dT00:00:00"), 
            return_dtype=pl.String
        )
    )
)

In [36]:
data_processed.write_csv("../../data/animals_processed.csv")