In [1]:
import pandas as pd

In [2]:
dataset = pd.read_csv('processed/studentlife_2014.csv')

In [3]:
dataset

Unnamed: 0,user_id,date,stress_level,environmental_temperature_mean,environmental_temperature_max,environmental_temperature_min,environmental_humidity_mean,environmental_humidity_max,environmental_humidity_min,environmental_precipitation,...,individual_minutes_running,individual_minutes_unknown,environmental_minutes_silence,environmental_minutes_voice,environmental_minutes_noise,environmental_minutes_unknown,organizational_work_hours,deadlines,days_until_next_deadline,weekday
0,4,2013-04-02,1,2.0,1.0,2.0,1.0,1.0,0.0,0.0,...,28.0,4.0,518.0,195.0,176.0,0.0,4.0,0.0,6.0,1
1,4,2013-03-27,0,2.0,2.0,2.0,2.0,3.0,1.0,0.0,...,19.0,5.0,352.0,179.0,277.0,0.0,5.0,0.0,12.0,2
2,4,2013-04-03,2,2.0,1.0,2.0,1.0,1.0,0.0,0.0,...,23.0,2.0,387.0,300.0,269.0,0.0,3.0,0.0,5.0,2
3,4,2013-03-28,0,2.0,2.0,2.0,3.0,4.0,1.0,0.0,...,29.0,3.0,410.0,268.0,255.0,0.0,3.0,0.0,11.0,3
4,4,2013-03-29,1,2.0,2.0,2.0,3.0,4.0,2.0,0.0,...,42.0,10.0,368.0,293.0,288.0,0.0,3.0,0.0,10.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
643,59,2013-05-21,1,3.0,3.0,4.0,4.0,4.0,3.0,0.0,...,28.0,11.0,468.0,189.0,783.0,0.0,3.0,0.0,3.0,1
644,59,2013-05-22,1,3.0,3.0,3.0,4.0,4.0,2.0,0.0,...,14.0,16.0,462.0,124.0,849.0,0.0,1.0,0.0,2.0,2
645,59,2013-05-23,1,3.0,3.0,4.0,4.0,4.0,3.0,0.0,...,7.0,5.0,203.0,47.0,370.0,0.0,2.0,0.0,1.0,3
646,59,2013-05-24,2,3.0,3.0,3.0,4.0,4.0,4.0,0.0,...,12.0,24.0,399.0,178.0,836.0,0.0,2.0,1.0,5.0,4


In [5]:
def augment_environmental_temperature_mean(df, N):
    """
    Agrega columnas con métricas de los últimos N días para la columna environmental_temperature_mean.
    
    Parámetros:
        df (pd.DataFrame): El DataFrame original.
        N (int): Número de días hacia atrás para calcular las métricas.
        
    Retorna:
        pd.DataFrame: El DataFrame con columnas adicionales.
    """
    # Aseguramos que el DataFrame esté ordenado por user_id y date
    df = df.sort_values(by=['user_id', 'date']).reset_index(drop=True)
    
    # Creamos las columnas de métricas basadas en la ventana de N días
    df[f'env_temp_mean_last_{N}_days'] = (
        df.groupby('user_id')['environmental_temperature_mean']
        .transform(lambda x: x.rolling(window=N, min_periods=1).mean())
    )
    df[f'env_temp_min_last_{N}_days'] = (
        df.groupby('user_id')['environmental_temperature_mean']
        .transform(lambda x: x.rolling(window=N, min_periods=1).min())
    )
    df[f'env_temp_max_last_{N}_days'] = (
        df.groupby('user_id')['environmental_temperature_mean']
        .transform(lambda x: x.rolling(window=N, min_periods=1).max())
    )
    df[f'env_temp_std_last_{N}_days'] = (
        df.groupby('user_id')['environmental_temperature_mean']
        .transform(lambda x: x.rolling(window=N, min_periods=1).std())
    )
    
    return df


In [6]:
df_augmented = augment_environmental_temperature_mean(dataset, 2)

In [7]:
df_augmented

Unnamed: 0,user_id,date,stress_level,environmental_temperature_mean,environmental_temperature_max,environmental_temperature_min,environmental_humidity_mean,environmental_humidity_max,environmental_humidity_min,environmental_precipitation,...,environmental_minutes_noise,environmental_minutes_unknown,organizational_work_hours,deadlines,days_until_next_deadline,weekday,env_temp_mean_last_2_days,env_temp_min_last_2_days,env_temp_max_last_2_days,env_temp_std_last_2_days
0,4,2013-03-27,0,2.0,2.0,2.0,2.0,3.0,1.0,0.0,...,277.0,0.0,5.0,0.0,12.0,2,2.0,2.0,2.0,
1,4,2013-03-28,0,2.0,2.0,2.0,3.0,4.0,1.0,0.0,...,255.0,0.0,3.0,0.0,11.0,3,2.0,2.0,2.0,0.0
2,4,2013-03-29,1,2.0,2.0,2.0,3.0,4.0,2.0,0.0,...,288.0,0.0,3.0,0.0,10.0,4,2.0,2.0,2.0,0.0
3,4,2013-04-02,1,2.0,1.0,2.0,1.0,1.0,0.0,0.0,...,176.0,0.0,4.0,0.0,6.0,1,2.0,2.0,2.0,0.0
4,4,2013-04-03,2,2.0,1.0,2.0,1.0,1.0,0.0,0.0,...,269.0,0.0,3.0,0.0,5.0,2,2.0,2.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
643,59,2013-05-21,1,3.0,3.0,4.0,4.0,4.0,3.0,0.0,...,783.0,0.0,3.0,0.0,3.0,1,3.0,3.0,3.0,0.0
644,59,2013-05-22,1,3.0,3.0,3.0,4.0,4.0,2.0,0.0,...,849.0,0.0,1.0,0.0,2.0,2,3.0,3.0,3.0,0.0
645,59,2013-05-23,1,3.0,3.0,4.0,4.0,4.0,3.0,0.0,...,370.0,0.0,2.0,0.0,1.0,3,3.0,3.0,3.0,0.0
646,59,2013-05-24,2,3.0,3.0,3.0,4.0,4.0,4.0,0.0,...,836.0,0.0,2.0,1.0,5.0,4,3.0,3.0,3.0,0.0
