# Selección de animal a estudiar

In [24]:
import polars as pl

In [25]:
tracking = pl.read_csv("../../data/foca_procesado.csv")

In [26]:
tracking = tracking.with_columns(
    pl.col("date").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S")
)
tracking = tracking.sort(by = "date")

Nos vamos a quedar con los datos de días consecutivos porque es necesario para el modelo

In [27]:
def longest_consecutive_days(df):

    unique_dates = df.select('date').unique().sort(by='date')['date'].to_list()

    # Encontrar la secuencia más larga de días consecutivos
    max_streak = 0
    current_streak = 1
    start_date = unique_dates[0]
    best_start, best_end = start_date, start_date
    
    for i in range(1, len(unique_dates)):
        if (unique_dates[i] - unique_dates[i - 1]).days <= 1:
            current_streak += 1
        else:
            if current_streak > max_streak:
                max_streak = current_streak
                best_start, best_end = start_date, unique_dates[i - 1]
            current_streak = 1
            start_date = unique_dates[i]
    
    if current_streak > max_streak:
        max_streak = current_streak
        best_start, best_end = start_date, unique_dates[-1]
    
    return max_streak, best_start, best_end

In [28]:
max_streak, best_start, best_end = longest_consecutive_days(tracking)

In [29]:
tracking_filtered = tracking.filter(pl.col("date") >= best_start).filter(pl.col("date") <= best_end)

In [30]:
max_latitude = tracking["decimal_latitude"].max()
max_longitude = tracking["decimal_longitude"].max()
min_latitude = tracking["decimal_latitude"].min()
min_longitude = tracking["decimal_longitude"].min()

In [31]:
box = pl.DataFrame({"max_latitude": max_latitude, "min_latitude": min_latitude, "max_longitude": max_longitude, "min_longitude": min_longitude})
box.write_csv("../../data/world_box.csv")

In [32]:
temp = pl.DataFrame({
    "first": [1, 0],
    "day": [
        best_start.day,
        best_end.day
    ],
    "month": [
        best_start.month,
        best_end.month
    ],
    "year": [
        best_start.year,
        best_end.year
    ]
})

temp.write_csv("../../data/temporal_subset.csv")