# Estatísticas Strava

## Importação de Bibliotecas

In [1]:
import os
from datetime import date, datetime, timedelta

import ipywidgets as widgets
import pandas as pd
import pytz
from IPython.display import HTML, clear_output, display
from requests_oauthlib import OAuth2Session

## CONSTANTES e Métodos

In [2]:
first_day_year = datetime(2025, 1, 1, 0, 0)

EPOCH_TIMESTAMP = first_day_year.timestamp()

QUANTITY_PER_PAGE = 100

TRANSLATE_ACTIVITIES = {
    "Hike": "Trilha",
    "Crossfit": "Crossfit",
    "Walk": "Caminhada",
    "WeightTraining": "Levantamento de Peso",
    "Run": "Corrida",
    "Workout": "Treino",
    "Swim": "Natação",
    "Yoga": "Yoga",
    "MountainBikeRide": "Bicicleta",
}

TRANSLATE_WEEKDAYS = {
    "Sunday": "Domingo",
    "Monday": "Segunda-feira",
    "Tuesday": "Terça-feira",
    "Wednesday": "Quarta-feira",
    "Thursday": "Quinta-feira",
    "Friday": "Sexta-feira",
    "Saturday": "Sábado",
}


def login_strava():
    # Baseado na documentação presente em https://developers.strava.com/
    # Defina local id, secret, e redirect_url - para gerar esses dados é necessário criar uma app no Strava
    CLIENT_ID = os.environ.get("STRAVA_CLIENT_ID")
    CLIENT_SECRET = os.environ.get("STRAVA_CLIENT_SECRET")
    REDIRECT_URL = "https://developers.strava.com"

    # Cria sessão
    session = OAuth2Session(client_id=CLIENT_ID, redirect_uri=REDIRECT_URL)

    # Define base auth url e escopo
    AUTH_BASE_URL = "https://www.strava.com/oauth/authorize"
    session.scope = ["activity:read"]

    # Gerar link de autorização
    auth_link = session.authorization_url(AUTH_BASE_URL)
    print(f"Clique aqui: {auth_link[0]}")
    redirect_response = input("Cole a redirect url aqui e pressione a tecla ENTER: ")

    # Obter token de sessão
    TOKEN_URL = "https://www.strava.com/api/v3/oauth/token"
    session.fetch_token(
        token_url=TOKEN_URL,
        client_id=CLIENT_ID,
        client_secret=CLIENT_SECRET,
        authorization_response=redirect_response,
        include_client_id=True,
    )

    return session


def generate_df_analysis_by_df(df, show_total):
    activity_counts_dict = df["sport_type"].value_counts().to_dict()
    df_activity_counts = pd.DataFrame.from_dict(
        activity_counts_dict, orient="index", columns=["Count"]
    )

    elapsed_time_by_activity_dict = (
        df.groupby("sport_type")["elapsed_time"].sum().to_dict()
    )

    elapsed_time_dict = {
        index: str(timedelta(seconds=value))
        for index, value in elapsed_time_by_activity_dict.items()
    }
    df_elapsed_time = pd.DataFrame.from_dict(
        elapsed_time_dict, orient="index", columns=["Elapsed Time"]
    )

    distance_by_activity_dict = df.groupby("sport_type")["distance"].sum().to_dict()
    distance_dict = {
        index: value / 1000 for index, value in distance_by_activity_dict.items()
    }
    df_distance = pd.DataFrame.from_dict(
        distance_dict, orient="index", columns=["Distance (km)"]
    )

    total_elevation_gain_by_activity_dict = (
        df.groupby("sport_type")["total_elevation_gain"].sum().to_dict()
    )
    df_elevation_gain = pd.DataFrame.from_dict(
        total_elevation_gain_by_activity_dict,
        orient="index",
        columns=["Elevation Gain (m)"],
    )

    concat_df = pd.concat(
        [df_activity_counts, df_elapsed_time, df_distance, df_elevation_gain], axis=1
    )
    concat_df.rename(index=TRANSLATE_ACTIVITIES, inplace=True)

    concat_df["Elapsed Time"] = pd.to_timedelta(concat_df["Elapsed Time"])

    if show_total:
        concat_df.loc["Total"] = concat_df.sum(numeric_only=False)
        total_seconds = concat_df.loc["Total", "Elapsed Time"].total_seconds()
        concat_df.loc["Total", "Elapsed Time"] = pd.to_timedelta(
            total_seconds, unit="s"
        )

    return concat_df


def get_number_days_year():
    TIMEZONE = pytz.timezone("America/Sao_Paulo")
    today = datetime.now(TIMEZONE).date()
    first_day_year = datetime(today.year, 1, 1).replace(tzinfo=TIMEZONE).date()
    return (today - first_day_year).days + 1


def format_time(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = int(seconds % 60)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"

## Login

In [3]:
session = login_strava()
response = session.get("https://www.strava.com/api/v3/athlete")

Clique aqui: https://www.strava.com/oauth/authorize?response_type=code&client_id=145648&redirect_uri=https%3A%2F%2Fdevelopers.strava.com&scope=activity%3Aread&state=NKS7RkINQe5f6y4r2LmpotxuHjkyk1


Cole a redirect url aqui e pressione a tecla ENTER:  https://developers.strava.com/?state=NKS7RkINQe5f6y4r2LmpotxuHjkyk1&code=6867a8c0036332e2bd2463e79d7dad6a3d3a7e68&scope=read,activity:read


In [4]:
data = response.json()
print(response.status_code, response.reason, data["username"])

200 OK rodrigomaria


## Consumo da API REST

In [5]:
all_activities = []
page = 1

while True:
    response = session.get(
        f"https://www.strava.com/api/v3/athlete/activities?after={EPOCH_TIMESTAMP}&page={page}&per_page={QUANTITY_PER_PAGE}"
    )

    if response.status_code != 200 or not response.json():
        print(response.status_code, response.json(), response.reason)
        break

    print(f"Dados Página {page}")
    all_activities.extend(response.json())
    page += 1

Dados Página 1
Dados Página 2
Dados Página 3
Dados Página 4
Dados Página 5
Dados Página 6
Dados Página 7
200 [] OK


## Manipulação dos Dados

In [6]:
# Criação do Dataframe
df = pd.DataFrame.from_dict(all_activities, orient="columns")

# Limpeza/ajuste das colunas
df.drop("type", axis=1, inplace=True)
df.drop("resource_state", axis=1, inplace=True)
df.drop("athlete", axis=1, inplace=True)
df.start_date_local = pd.to_datetime(df.start_date_local)
df.start_date = pd.to_datetime(df.start_date)

df.tail(5)

Unnamed: 0,name,distance,moving_time,elapsed_time,total_elevation_gain,sport_type,device_name,id,start_date,start_date_local,...,has_kudoed,suffer_score,workout_type,average_cadence,average_watts,max_watts,weighted_average_watts,device_watts,kilojoules,average_temp
650,Caminhada vespertina,665.9,707,753,0.0,Walk,Apple Watch Ultra 2,16734473133,2025-12-13 20:24:59+00:00,2025-12-13 17:24:59+00:00,...,False,2.0,,,,,,,,
651,Caminhada ao entardecer,510.2,559,592,0.0,Walk,Apple Watch Ultra 2,16734660327,2025-12-13 21:22:10+00:00,2025-12-13 18:22:10+00:00,...,False,1.0,,,,,,,,
652,Caminhada vespertina,1221.9,1000,1189,3.4,Walk,Apple Watch Ultra 2,16743802580,2025-12-14 19:39:05+00:00,2025-12-14 16:39:05+00:00,...,False,2.0,,,,,,,,
653,Caminhada matinal,1129.8,966,1004,0.0,Walk,Apple Watch Ultra 2,16748305481,2025-12-15 12:24:09+00:00,2025-12-15 09:24:09+00:00,...,False,2.0,,,,,,,,
654,5km,5000.0,2408,2408,9.0,Run,COROS PACE 3,16750272487,2025-12-15 16:28:29+00:00,2025-12-15 13:28:29+00:00,...,False,69.0,3.0,78.8,159.3,219.0,166.0,True,383.5,


## Filtro por Esporte

In [7]:
def statistics_by_activity(df_filtered):
    generated_df = generate_df_analysis_by_df(df_filtered, False)
    display(df_filtered)
    display(generated_df)


def on_change(change):
    with output:
        clear_output(wait=True)
        if change["type"] == "change" and change["name"] == "value":
            df_filtered = df[df["sport_type"] == change["new"]]
            statistics_by_activity(df_filtered)


select = widgets.Select(
    options=df.sport_type.unique(),
    value=df.sport_type.unique()[0],
    rows=9,
    description="Selecione:",
)

output = widgets.Output()

on_change({"type": "change", "name": "value", "new": select.value})
select.observe(on_change, names="value")
display(select, output)

Select(description='Selecione:', options=('Hike', 'Crossfit', 'Walk', 'WeightTraining', 'Run', 'Workout', 'Yog…

Output()

## Informações Anuais

In [8]:
total_activities = len(df)
total_time = format_time(df["elapsed_time"].sum())
total_distance = df["distance"].sum() / 1000
total_elevation = df["total_elevation_gain"].sum()

total_activity_sequence_days = len(df["start_date_local"].dt.date.unique())

df["day_of_week"] = df["start_date_local"].dt.day_name()
best_week_day = df.groupby("day_of_week").size().sort_values(ascending=False).index[0]

df["hour_of_day"] = df["start_date_local"].dt.hour
best_active_hour = (
    df.groupby("hour_of_day").size().sort_values(ascending=False).index[0]
)

median_activity_seconds = df["elapsed_time"].sum() / total_activities

df["month_year"] = df["start_date_local"].dt.strftime("%Y-%m")
monthly_stats = (
    df.groupby("month_year")
    .agg({"elapsed_time": "sum", "start_date_local": "count"})
    .reset_index()
)
monthly_stats["Tempo Total"] = monthly_stats["elapsed_time"].apply(format_time)
monthly_stats.drop("elapsed_time", axis=1, inplace=True)
monthly_time_df = monthly_stats.rename(
    columns={
        "month_year": "Ano/Mês",
        "elapsed_time": "Tempo Total",
        "start_date_local": "Atividades",
    }
)

general_informations = {
    "Total de Atividades": f"{total_activities}",
    "Tempo Total": f"{total_time}",
    "Distância Total": f"{total_distance:.1f}km",
    "Elevação Total": f"{total_elevation}m",
    "Dias em sequência em atividade": f"{total_activity_sequence_days}/{get_number_days_year()}",
    "Melhor dia da semana": f"{TRANSLATE_WEEKDAYS.get(best_week_day)}",
    "Hora mais ativa": f"{best_active_hour}:00",
    "Tempo médio em atividade": f"{format_time(median_activity_seconds)}",
}

general_informations_df = pd.DataFrame(general_informations, index=[0])

In [9]:
display(HTML(general_informations_df.to_html(index=False)))
display(HTML(monthly_time_df.to_html(index=False)))
display(HTML(generate_df_analysis_by_df(df, True).to_html(index=True)))

Total de Atividades,Tempo Total,Distância Total,Elevação Total,Dias em sequência em atividade,Melhor dia da semana,Hora mais ativa,Tempo médio em atividade
655,211:49:04,667.5km,3690.8m,283/349,Segunda-feira,17:00,00:19:24


Ano/Mês,Atividades,Tempo Total
2025-01,89,34:46:05
2025-02,84,32:58:37
2025-03,101,27:11:14
2025-04,59,18:18:53
2025-05,42,14:08:24
2025-06,37,11:07:14
2025-07,21,07:25:56
2025-08,40,12:25:28
2025-09,37,12:23:33
2025-10,57,15:10:46


Unnamed: 0,Count,Elapsed Time,Distance (km),Elevation Gain (m)
Caminhada,465,6 days 12:54:50,492.2959,3213.2
Crossfit,69,0 days 16:38:51,24.1937,53.0
Treino,37,0 days 18:25:31,0.139,0.0
Corrida,34,0 days 11:01:21,88.336,292.6
Yoga,15,0 days 02:26:39,0.0,0.0
Ride,12,0 days 02:28:33,29.951,61.0
Bicicleta,10,0 days 02:07:02,31.7234,22.0
Levantamento de Peso,6,0 days 00:39:57,0.0,0.0
Soccer,3,0 days 00:24:49,0.0,0.0
Trilha,2,0 days 00:38:47,0.7954,49.0


## Informações Semanais

In [10]:
# Geração do dataframe semanal
def generate_this_week_df(WEEK):
    start_date = pd.to_datetime(f"{WEEKS[WEEK]['start_date']}T00:00:00-00:00")
    end_date = pd.to_datetime(f"{WEEKS[WEEK]['end_date']}T23:59:00-00:00")
    this_week_df = df[
        (df["start_date_local"] >= start_date) & (df["start_date_local"] <= end_date)
    ]
    return this_week_df


# Retorna todas as segundas-feiras a partir de uma data
def anual_mondays():
    d = date(2025, 4, 14)
    d += timedelta(days=(7 - d.weekday()) % 7)
    mondays = []
    while d.year == 2025:
        mondays.append(d)
        d += timedelta(days=7)
    return mondays


# Cria o dicionário das semanas para uso no filtro
def generate_WEEKS_dict():
    WEEKS = {}

    for i, monday in enumerate(anual_mondays()):
        WEEKS[f"Week {i + 1}"] = {
            "start_date": str(monday),
            "end_date": str(monday + timedelta(days=6)),
        }

    return WEEKS


WEEKS = generate_WEEKS_dict()

In [11]:
# Informações Semanais
WEEK = "Week 32"

this_week_df = generate_this_week_df(WEEK)

# Tratamento dos dados
this_week_grouped = this_week_df.groupby("sport_type").agg(
    {"distance": "sum", "elapsed_time": "sum"}
)

this_week_grouped = this_week_grouped.rename(
    columns={"distance": "Distância", "elapsed_time": "Tempo Total"}
)

this_week_grouped.index.name = "Esporte"

display_df = this_week_grouped.copy()

display_df["Distância"] = display_df["Distância"].apply(
    lambda x: f"{round(x / 1000, 2)}km"
)
display_df["Tempo Total"] = display_df["Tempo Total"].apply(
    lambda x: f"{timedelta(seconds=int(x))}"
)

display_df.loc["Total"] = [
    f"{round(this_week_grouped['Distância'].sum() / 1000, 2)}km",
    f"{timedelta(seconds=int(this_week_grouped['Tempo Total'].sum()))}",
]

styled_df = display_df.style.set_caption(
    f"{WEEK} - {WEEKS[WEEK]['start_date']} - {WEEKS[WEEK]['end_date']}"
)

display(this_week_df)
display(styled_df)

Unnamed: 0,name,distance,moving_time,elapsed_time,total_elevation_gain,sport_type,device_name,id,start_date,start_date_local,...,average_cadence,average_watts,max_watts,weighted_average_watts,device_watts,kilojoules,average_temp,day_of_week,hour_of_day,month_year
593,Caminhada na hora do almoço,802.7,584,584,0.0,Walk,Apple Watch Ultra 2,16485133030,2025-11-17 14:38:16+00:00,2025-11-17 11:38:16+00:00,...,,,,,,,,Monday,11,2025-11
594,Caminhada na hora do almoço,740.4,587,696,21.2,Walk,Apple Watch Ultra 2,16485592670,2025-11-17 15:34:13+00:00,2025-11-17 12:34:13+00:00,...,,,,,,,,Monday,12,2025-11
595,Caminhada ao entardecer,1262.8,1162,1207,3.2,Walk,Apple Watch Ultra 2,16488741558,2025-11-17 21:10:03+00:00,2025-11-17 18:10:03+00:00,...,,,,,,,,Monday,18,2025-11
596,Caminhada ao entardecer,490.3,496,618,0.0,Walk,Apple Watch Ultra 2,16488910957,2025-11-17 21:39:45+00:00,2025-11-17 18:39:45+00:00,...,,,,,,,,Monday,18,2025-11
597,Caminhada vespertina,1302.9,1226,1293,0.0,Walk,Apple Watch Ultra 2,16498889166,2025-11-18 20:51:51+00:00,2025-11-18 17:51:51+00:00,...,,,,,,,,Tuesday,17,2025-11
598,Caminhada ao entardecer,502.5,610,610,0.0,Walk,Apple Watch Ultra 2,16499809162,2025-11-18 23:00:56+00:00,2025-11-18 20:00:56+00:00,...,,,,,,,,Tuesday,20,2025-11
599,Caminhada na hora do almoço,557.7,466,466,7.8,Walk,Apple Watch Ultra 2,16505572697,2025-11-19 14:50:24+00:00,2025-11-19 11:50:24+00:00,...,,,,,,,,Wednesday,11,2025-11
600,Caminhada na hora do almoço,436.9,400,400,2.4,Walk,Apple Watch Ultra 2,16506068981,2025-11-19 15:47:12+00:00,2025-11-19 12:47:12+00:00,...,,,,,,,,Wednesday,12,2025-11
601,Caminhada vespertina,1282.1,1135,1890,7.8,Walk,Apple Watch Ultra 2,16516360632,2025-11-20 16:16:11+00:00,2025-11-20 13:16:11+00:00,...,,,,,,,,Thursday,13,2025-11
602,Caminhada na hora do almoço,497.7,468,468,0.0,Walk,Apple Watch Ultra 2,16533936323,2025-11-22 14:07:00+00:00,2025-11-22 11:07:00+00:00,...,,,,,,,,Saturday,11,2025-11


Unnamed: 0_level_0,Distância,Tempo Total
Esporte,Unnamed: 1_level_1,Unnamed: 2_level_1
Walk,10.06km,2:59:17
Total,10.06km,2:59:17
