# Process tracking data

In [None]:
import polars as pl

In [2]:
schema = {
    "individual_id": pl.String,
    "abbreviated_name": pl.Utf8,
    "date": pl.Datetime,
    "decimal_longitude": pl.Float64,
    "decimal_latitude": pl.Float64,
    "longitude_se": pl.Float64,
    "latitude_se": pl.Float64,
}

In [3]:
data = pl.read_csv("../../data/foca.csv", schema=schema)
data = data.drop(["longitude_se", "latitude_se", "abbreviated_name"])

In [None]:
data_processed = (
    data
    .with_columns(pl.col("date").dt.year().alias("year"))
    .with_columns(pl.col("date").dt.month().alias("month"))
    .with_columns(pl.col("date").dt.day().alias("day"))
    .with_columns(pl.col("date").dt.hour().alias("hour"))
)

## Parse date

In [None]:
from datetime import datetime

# Format date according to the specification
data_processed = (
    data_processed
    .with_columns(
        pl.col("date").map_elements(
            lambda x: datetime.strftime(x, "%Y-%m-%dT00:00:00"), 
            return_dtype=pl.String
        )
    )
)

## Write

In [11]:
data_processed.write_csv("../../data/foca_procesado.csv")

In [None]:
schema = {
    "individual_id": pl.String,
    "date": pl.String,
    "decimal_longitude": pl.Float64,
    "decimal_latitude": pl.Float64,
    "year": pl.Int32,
    "month": pl.Int8,
    "day": pl.Int8,
    "hour": pl.Int8,
}

data = pl.read_csv("../../data/foca_procesado.csv", schema=schema)
data = data.drop("individual_id")
data = data.rename({"decimal_longitude": "longitude", "decimal_latitude": "latitude"})
data = data.filter(pl.col("longitude") <= 120) # Outliers

In [7]:
data.write_csv("../../data/datos_foca.csv")