## Generate Mobility Time Series as Dictionary

### Imports

In [1]:
import pandas as pd
import numpy as np

### Load Data

In [None]:
# Load in the dataframe with mobility data generated by intersection notebook(s)
sample_df_with_mobility = pd.read_csv('/Users/kelseydoerksen/Desktop/Giga/SchoolMapping/BWA/Mobility/sample_df_mobility_300m_buffer.csv')

### Generate Time Series Features

In [38]:
def generate_mobility_timeseries_feature(aoi_samples_df, sample_id, day_period, month, stat):
    """
    Calculate mobility time series features for samples
    :param: aoi_samples_df: df of samples to filter and process
    :param: sample_id: id of sample we are querying (school/non-school)
    :param: day_period: refers to agg_time_period from mobility data (weekday or weekend)
    :param: month: refers to month for which we want to query data for
    :param: stat: refers to stat to return
    """
    
    month_dict = {
        'oct': '2023-10-01',
        'nov': '2023-11-01',
        'dec': '2023-12-01'
    }
    
    mask = (aoi_samples_df['UID'] == sample_id) & (aoi_samples_df['agg_day_period'] == day_period) & (aoi_samples_df['start_date'] == month_dict[month])
    filtered_df = aoi_samples_df[mask]
    
    if len(filtered_df) == 0:
        return np.nan
    
    # Calculate features
    if stat == 'var':
        return filtered_df['activity_index_total'].var()
    if stat == 'peak_activity':
        filtered_df = filtered_df.sort_values(by='agg_time_period')
        peak_activity_time = filtered_df.loc[filtered_df['activity_index_total'].idxmax()]['agg_time_period']
        return peak_activity_time
    if stat == 'measurement_count':
        return len(filtered_df)
    if stat == 'max':
        return filtered_df['activity_index_total'].max()
    if stat == 'min':
        return filtered_df['activity_index_total'].min()

In [39]:
def generate_mobility_timeseries_df(sample_df, month):
    """
    Generate a dataframe of features extracted from the
    mobility timeseries per UID
    :param: 
    """
    uids = sample_df_with_mobility['UID'].unique()
    mobile_timeseries_dict = {
    'uid': [],
    'weekend_var_{}'.format(month): [],
    'weekend_peak_hour_{}'.format(month): [],
    'weekend_measurement_count_{}'.format(month): [],
    'weekday_var_{}'.format(month): [], 
    'weekday_peak_hour_{}'.format(month): [],
    'weekday_measurement_count_{}'.format(month): []
    }
    
    for u in uids:
        mobile_timeseries_dict['uid'].append(u)
        mobile_timeseries_dict['weekend_var_{}'.format(month)].append(generate_mobility_timeseries_feature(sample_df_with_mobility, u, 1, month, 'var'))
        mobile_timeseries_dict['weekend_peak_hour_{}'.format(month)].append(generate_mobility_timeseries_feature(sample_df_with_mobility, u, 1, month, 'peak_activity'))
        mobile_timeseries_dict['weekend_measurement_count_{}'.format(month)].append(generate_mobility_timeseries_feature(sample_df_with_mobility, u, 1, month, 'measurement_count'))
        mobile_timeseries_dict['weekday_var_{}'.format(month)].append(generate_mobility_timeseries_feature(sample_df_with_mobility, u, 0, month, 'var'))
        mobile_timeseries_dict['weekday_peak_hour_{}'.format(month)].append(generate_mobility_timeseries_feature(sample_df_with_mobility, u, 0, month, 'peak_activity'))
        mobile_timeseries_dict['weekday_measurement_count_{}'.format(month)].append(generate_mobility_timeseries_feature(sample_df_with_mobility, u, 0, month, 'measurement_count'))
    
    timeseries_feature_df = pd.DataFrame.from_dict(mobile_timeseries_dict)
    
    return timeseries_feature_df

In [None]:
df_oct = generate_mobility_timeseries_df(sample_df_with_mobility, 'oct')
df_nov = generate_mobility_timeseries_df(sample_df_with_mobility, 'nov')
df_dec = generate_mobility_timeseries_df(sample_df_with_mobility, 'dec')