In [44]:
import altair as alt
import polars as pl
from polars import col
from datetime import datetime

# glimpse headers
# pl.scan_parquet('strava.parquet').collect().glimpse(max_items_per_column=10)

# dataframe for analysis - 2024 activities
lf = pl.scan_parquet('strava.parquet').select(
    col("name"),
    col("type"),
    col("distance"),
    col("moving_time"),
    col("elapsed_time"),
    col("total_elevation_gain"),
    col("start_date_local"),
    col("kudos_count"),
    col("comment_count"),
    col("photo_count"),
    col("elev_high"),
    ).with_columns(
        col("distance").cast(pl.Float32) / 1000,
        col("start_date_local").cast(pl.Datetime).cast(pl.Date)
        ).filter(
            col("start_date_local") >= datetime(2024, 1, 1)
        )

# scatterplot analysis - all sports
caption_analysis = lf.select(
    col("name"),
    col("type"),
    col("kudos_count"),
    col("start_date_local"),
    col("distance")).with_columns(
        default_caption_flag=col("name").is_in([
            "Morning Run", "Afternoon Run", "Evening Run",
            "Morning Ride", "Afternoon Ride", "Evening Ride",
            "Morning Workout", "Afternoon Workout", "Evening Workout"]
            ).cast(pl.Int32)
        )

caption_analysis_run = caption_analysis.filter(col("type") == "Run").collect(streaming=True)


# building the chart - run
chart_run = alt.Chart(caption_analysis_run).mark_point().encode(
    alt.X("distance:Q").title("distance ran (km)"),
    alt.Y("kudos_count:Q").title("number of kudos"),
    alt.Color("default_caption_flag:N").title("default caption used?"),
    tooltip="start_date_local:T",
).transform_calculate(
    default_caption_flag="datum.default_caption_flag == 1 ? 'Yes':'No'"
).configure_point(
    size=50,
    filled=True
).properties(
    width=400,
    height=400
).interactive()

#     .properties(width=500, title="kudos")
#     .configure_axisX(tickCount=14)
#     )
# caption_analysis_chart_run.encoding.x.title = "distance (km)"
# caption_analysis_chart_run.encoding.y.title = "kudos"

# with pl.Config(float_precision=2, tbl_cols=11):
#     print(lf.collect(streaming=True))

# alt.Chart.configure_axisX?
chart_run