In [1]:
import pandas as pd
import numpy as np


def load_stats(path: str, c: str) -> pd.DataFrame:
    df = pd.read_parquet(path)
    df['distance'] = df['distance'] / 1000  # convert to km
    df['campaign'] = c
    df.rename(columns={'modeType': 'counter',
              'distance': 'score', 'stat_date': 'ts'}, inplace=True)
    return df[['playerId', 'campaign', 'ts', 'counter', 'score']]


def load_scores(path: str, c: str) -> pd.DataFrame:
    df = pd.read_parquet(path)
    df.rename(columns={'player_id': 'playerId',
              'mobilityScore': 'score', 'day': 'ts'}, inplace=True)
    df['counter'] = 'score'
    df['campaign'] = c
    return df[['playerId', 'campaign', 'ts', 'counter', 'score']]


STATS_FILES = {
    'HSC_Lecco_2023': '../data/HSC_Lecco_2023-Players_Daily_Mobility_Stats.parquet',
    'HSC_Lecco_2024': '../data/HSC_Lecco_2024-Players_Daily_Mobility_Stats.parquet',
    'HSC_Ferrara_2023': '../data/HSC_Ferrara_2023-Players_Daily_Mobility_Stats.parquet',
}

SCORES_FILES = {
    'HSC_Lecco_2023': '../data/HSC_Lecco_2023-Players_Daily_Mobility_Scores.parquet',
    'HSC_Lecco_2024': '../data/HSC_Lecco_2024-Players_Daily_Mobility_Scores.parquet',
    'HSC_Ferrara_2023': '../data/HSC_Ferrara_2023-Players_Daily_Mobility_Scores.parquet',
}

df_stats = pd.concat([load_stats(f, k) for k, f in STATS_FILES.items()])
df_scores = pd.concat([load_scores(f, k) for k, f in SCORES_FILES.items()])

df = pd.concat([df_stats, df_scores])
df['ts'] = pd.to_datetime(df['ts'])

df = df.groupby(['playerId', 'campaign', 'counter']).resample(
    'W', on='ts').sum(numeric_only=True).reset_index()
df['score'] = np.ceil(df['score']).astype(int)
df['month'] = df['ts'].dt.strftime('%B')

df.sort_values('ts', ascending=True, inplace=True, ignore_index=True)

df

Unnamed: 0,playerId,campaign,counter,ts,score,month
0,u_80f670db5858452ea13da68b1fa207c9,HSC_Lecco_2023,score,2023-02-26,0,February
1,u_3600b217e6044fa3974b097319fbd57b,HSC_Lecco_2023,score,2023-02-26,0,February
2,u_8c9493203b1c4421abdbf43b687a6b8e,HSC_Lecco_2023,score,2023-02-26,0,February
3,u_038630b46b5d420796318493a484ce8c,HSC_Lecco_2023,score,2023-02-26,0,February
4,u_1a125c7cf4cb4abdaa9d5b8f1d866fdf,HSC_Lecco_2023,score,2023-02-26,0,February
...,...,...,...,...,...,...
20654,u_7013d0bc1ea84fed833053cd101804da,HSC_Lecco_2024,bus,2024-05-26,78,May
20655,u_d07da7a08bf74dc3b17956476be069f1,HSC_Lecco_2024,score,2024-05-26,48,May
20656,u_8f899fc0a3614cf8a5093d817d30a5fc,HSC_Lecco_2024,score,2024-05-26,25,May
20657,u_c05bd6bb4a73422b81fd92debd1a5e30,HSC_Lecco_2024,walk,2024-05-26,2,May


In [2]:
ranges = {
    'bike':     (0, 200),
    'bus':      (0, 500),
    'car':      (0, 1000),
    'score':    (0, 2000),
    'train':    (0, 1000),
    'walk':     (0, 100),
}

df['scaled_score'] = df.apply(lambda x: (x['score'] - ranges[x['counter']][0]) / (ranges[x['counter']][1] - ranges[x['counter']][0]), axis=1)

df

Unnamed: 0,playerId,campaign,counter,ts,score,month,scaled_score
0,u_80f670db5858452ea13da68b1fa207c9,HSC_Lecco_2023,score,2023-02-26,0,February,0.0000
1,u_3600b217e6044fa3974b097319fbd57b,HSC_Lecco_2023,score,2023-02-26,0,February,0.0000
2,u_8c9493203b1c4421abdbf43b687a6b8e,HSC_Lecco_2023,score,2023-02-26,0,February,0.0000
3,u_038630b46b5d420796318493a484ce8c,HSC_Lecco_2023,score,2023-02-26,0,February,0.0000
4,u_1a125c7cf4cb4abdaa9d5b8f1d866fdf,HSC_Lecco_2023,score,2023-02-26,0,February,0.0000
...,...,...,...,...,...,...,...
20654,u_7013d0bc1ea84fed833053cd101804da,HSC_Lecco_2024,bus,2024-05-26,78,May,0.1560
20655,u_d07da7a08bf74dc3b17956476be069f1,HSC_Lecco_2024,score,2024-05-26,48,May,0.0240
20656,u_8f899fc0a3614cf8a5093d817d30a5fc,HSC_Lecco_2024,score,2024-05-26,25,May,0.0125
20657,u_c05bd6bb4a73422b81fd92debd1a5e30,HSC_Lecco_2024,walk,2024-05-26,2,May,0.0200
