In [10]:
import polars as pl
import altair as alt

In [11]:
# bike trips for 2024
recorridos_2024_path = r"data/recorridos_realizados_2024.csv"

In [12]:
ecobici2024_df = pl.read_csv(recorridos_2024_path, use_pyarrow=True, try_parse_dates=True)

In [13]:
ecobici2024_df.describe()

statistic,Id_recorrido,duracion_recorrido,fecha_origen_recorrido,id_estacion_origen,nombre_estacion_origen,direccion_estacion_origen,long_estacion_origen,lat_estacion_origen,fecha_destino_recorrido,id_estacion_destino,nombre_estacion_destino,direccion_estacion_destino,long_estacion_destino,lat_estacion_destino,id_usuario,modelo_bicicleta,género
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""count""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598""","""1964598"""
"""null_count""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0""","""0"""
"""mean""",,,"""2024-04-25 15:27:57.674000""",,,,,,"""2024-04-25 15:51:45.310000""",,,,,,,,
"""std""",,,,,,,,,,,,,,,,,
"""min""","""20180969BAEcobici""","""1.000""","""2024-01-01 00:06:50""","""101BAEcobici""","""-CDO BARRACAS-""","""1019 Riglos""","""-58,3554654""","""-34,536691""","""2024-01-01 00:28:36""","""101BAEcobici""","""-- CDO Chacarita -- (Temporal)""","""1019 Riglos""","""-58,3554654""","""-34,536691""","""1000003BAEcobici""","""FIT""",""""""
"""25%""",,,"""2024-02-27 15:04:23""",,,,,,"""2024-02-27 15:25:59""",,,,,,,,
"""50%""",,,"""2024-04-19 12:25:47""",,,,,,"""2024-04-19 12:45:17""",,,,,,,,
"""75%""",,,"""2024-06-22 15:48:09""",,,,,,"""2024-06-22 16:18:11""",,,,,,,,
"""max""","""22823318BAEcobici""","""999""","""2024-08-31 23:54:32""","""9BAEcobici""","""399 - GARCIA DEL RIO""","""o´higins 1327""","""-58,527098""","""-34,687767""","""2024-09-03 01:00:43""","""9BAEcobici""","""399 - GARCIA DEL RIO""","""o´higins 1327""","""-58,527098""","""-34,687767""","""999994BAEcobici""","""ICONIC""","""OTHER"""


In [14]:
#ecobici2024_df.dtypes

In [15]:
#ecobici2024_df = ecobici2024_df.drop_nulls(subset=['fecha_origen_recorrido', 'fecha_destino_recorrido'])

In [16]:
are_empty_dates = ecobici2024_df.filter(pl.col('fecha_origen_recorrido').is_null() | pl.col('fecha_destino_recorrido').is_null()).shape[0]
are_empty_dates

0

In [17]:
# make coordinate columns numeric
ecobici2024_df = ecobici2024_df.with_columns(
    pl.col('long_estacion_origen').str.replace(',','.').cast(pl.Float64).alias('estacion_origen_lon'),
    pl.col('lat_estacion_origen').str.replace(',','.').cast(pl.Float64).alias('estacion_origen_lat'),
    pl.col('long_estacion_destino').str.replace(',','.').cast(pl.Float64).alias('estacion_destino_lon'),
    pl.col('lat_estacion_destino').str.replace(',','.').cast(pl.Float64).alias('estacion_destino_lat')
)

In [33]:
ecobici2024_df = ecobici2024_df.with_columns(
    pl.col('fecha_destino_recorrido').dt.strftime("%B").alias("month"),
    pl.col('fecha_destino_recorrido').dt.strftime("%A").alias("day_name"),
    pl.col('fecha_destino_recorrido').dt.date().alias("date"),
    (pl.col('fecha_destino_recorrido') - pl.col('fecha_origen_recorrido')).dt.total_minutes().alias('trip_duration_minutes')
).with_columns(
    pl.when(pl.col('day').is_in(['Saturday', 'Sunday'])).then(1).otherwise(0).alias('is_weekend')
)

Visualization

In [19]:
ecobici2024_df.limit(100).plot.line(x='fecha_origen_recorrido', y='trip_duration_minutes')


In [26]:
trips_by_day_df = ecobici2024_df.group_by("day_name").len(name='trip_count')
trips_by_day_df

day,trip_count
str,u32
"""Wednesday""",349664
"""Tuesday""",344135
"""Saturday""",124375
"""Monday""",322168
"""Thursday""",365129
"""Sunday""",121386
"""Friday""",337741


In [29]:
# Trips by day name
(
    alt.Chart(trips_by_day_df).mark_bar().encode(
        x="day_name",
        y="trip_count"
    ).properties(
        title="Trips by day in 2024"
    )
).show()

In [34]:
trips_by_date_df = ecobici2024_df.group_by("date").len(name='trip_count_by_date')
len(trips_by_date_df)

246

In [40]:
(
    alt.Chart(trips_by_date_df).mark_line().encode(
        x="date",
        y="trip_count_by_date"
    ).properties(
        title="Trips by day in 2024",
        width=600
    )
).show()