In [39]:
import pandas as pd
import numpy as np
from datetime import datetime
from zoneinfo import ZoneInfo
from statsmodels.tsa.seasonal import seasonal_decompose

In [45]:
df = pd.read_csv('../data_csv_raw/raw_data.csv')
df.drop_duplicates(inplace=True)
df = df.sort_values(by='dt')
df['utc8'] = pd.to_datetime(df['dt'], unit='s', utc=True).dt.tz_convert('Asia/Kuala_Lumpur')
df['year']       = df['utc8'].dt.year
df['month']      = df['utc8'].dt.month
df['day']        = df['utc8'].dt.day
df['hour']       = df['utc8'].dt.hour
df['hour_stan'] = (df['hour'] - df['hour'].min()) / (df['hour'].max() - df['hour'].min())
df['minute']     = df['utc8'].dt.minute
df['second']     = df['utc8'].dt.second
df['date']       = df['utc8'].dt.date
df['time']       = df['utc8'].dt.time
df['weekday']    = df['utc8'].dt.day_name()     # e.g., 'Monday'
df['week']       = df['utc8'].dt.isocalendar().week
df['day_of_year'] = df['utc8'].dt.dayofyear
df['quarter']    = df['utc8'].dt.quarter

conditions = [
    df['dt'] <= df['sunrise'],
    (df['dt'] > df['sunrise']) & (df['dt'] <= df['sunset'])
]
choices = [0, 1]
df['sunrise_sunset'] = np.select(conditions, choices, default=2)
df['sun_on'] = np.select(conditions, choices, default=0)
df['pressure_norm'] = (df['pressure'] - df['pressure'].mean()) / df['pressure'].std()
df['temp_stan'] = (df['temp'] - df['temp'].min()) / (df['temp'].max() - df['temp'].min())
df['humidity_stan'] = (df['humidity'] - df['humidity'].min()) / (df['humidity'].max() - df['humidity'].min())
df['dewpoint_stan'] = (df['dew_point'] - df['dew_point'].min()) / (df['dew_point'].max() - df['dew_point'].min())

# lag (in hour)
for i in range(6):
    i+=1
    var1 = f'templag_{i}'
    var2 = f'pressurelag_{i}'
    var3 = f'humiditylag_{i}'
    var4 = f'dewpointlag_{i}'
    df[var1] = df['temp_stan'].shift(i)
    df[var2] = df['pressure_norm'].shift(i)
    df[var3] = df['humidity_stan'].shift(i)
    df[var4] = df['dewpoint_stan'].shift(i)

# moving average
for i in range(1, 11, 2):
    i+=1
    var1 = f'tempMA_{i-1}'
    var2 = f'pressureMA_{i-1}'
    var3 = f'humidityMA_{i-1}'
    var4 = f'dewpointMA_{i-1}'
    df[var1] = df['temp_stan'].rolling(window=i).mean()
    df[var2] = df['pressure_norm'].rolling(window=i).mean()
    df[var3] = df['humidity_stan'].rolling(window=i).mean()
    df[var4] = df['dewpoint_stan'].rolling(window=i).mean()

df

Unnamed: 0,lat,lon,timezone,timezone_offset,dt,sunrise,sunset,temp,feels_like,pressure,...,humidityMA_5,dewpointMA_5,tempMA_7,pressureMA_7,humidityMA_7,dewpointMA_7,tempMA_9,pressureMA_9,humidityMA_9,dewpointMA_9
549,3.033,101.45,Asia/Kuala_Lumpur,28800,1735660800,1735687143,1735730158,25.78,26.87,1012,...,,,,,,,,,,
1655,3.033,101.45,Asia/Kuala_Lumpur,28800,1735664400,1735687143,1735730158,25.53,26.59,1012,...,,,,,,,,,,
1560,3.033,101.45,Asia/Kuala_Lumpur,28800,1735668000,1735687143,1735730158,25.45,26.51,1011,...,,,,,,,,,,
1307,3.033,101.45,Asia/Kuala_Lumpur,28800,1735671600,1735687143,1735730158,25.15,26.18,1011,...,,,,,,,,,,
314,3.033,101.45,Asia/Kuala_Lumpur,28800,1735675200,1735687143,1735730158,24.98,25.99,1010,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1869,3.033,101.45,Asia/Kuala_Lumpur,28800,1742295600,1742253553,1742297127,29.50,31.39,1009,...,0.244444,0.439510,0.731970,-0.174797,0.220833,0.443015,0.697801,0.062130,0.256667,0.460353
67,3.033,101.45,Asia/Kuala_Lumpur,28800,1742299200,1742253553,1742297127,28.81,34.85,1010,...,0.350000,0.490196,0.686236,-0.314166,0.287500,0.473456,0.685664,-0.049365,0.286667,0.476529
263,3.033,101.45,Asia/Kuala_Lumpur,28800,1742302800,1742253553,1742297127,28.37,32.69,1011,...,0.430556,0.530882,0.623461,-0.314166,0.352083,0.495441,0.669657,-0.105112,0.311667,0.487059
1385,3.033,101.45,Asia/Kuala_Lumpur,28800,1742306400,1742253553,1742297127,28.23,33.20,1012,...,0.541667,0.598922,0.559147,-0.244481,0.431250,0.529338,0.627968,-0.105112,0.365000,0.508118


In [46]:
decomp_temp = seasonal_decompose(df['temp_stan'], model='additive', period=24)
decomp_pressure = seasonal_decompose(df['pressure_norm'], model='additive', period=24)
decomp_humidity = seasonal_decompose(df['humidity_stan'], model='additive', period=24)
decomp_dewpoint = seasonal_decompose(df['dewpoint_stan'], model='additive', period=24)

df['temp_trend'] = decomp_temp.trend
df['temp_seasonal'] = decomp_temp.seasonal
df['temp_residual'] = decomp_temp.resid
df['pressure_trend'] = decomp_pressure.trend
df['pressure_seasonal'] = decomp_pressure.seasonal
df['pressure_residual'] = decomp_pressure.resid
df['humidity_trend'] = decomp_humidity.trend
df['humidity_seasonal'] = decomp_humidity.seasonal
df['humidity_residual'] = decomp_humidity.resid
df['dewpoint_trend'] = decomp_dewpoint.trend
df['dewpoint_seasonal'] = decomp_dewpoint.seasonal
df['dewpoint_residual'] = decomp_dewpoint.resid

df['temp_w_seas'] = df['temp_stan'] * (1 + df['temp_seasonal'])
df['pressure_w_seas'] = df['pressure_norm'] * (1 + df['pressure_seasonal'])
df['humidity_w_seas'] = df['humidity_stan'] * (1 + df['humidity_seasonal'])
df['dewpoint_w_seas'] = df['dewpoint_stan'] * (1 + df['dewpoint_seasonal'])

df

Unnamed: 0,lat,lon,timezone,timezone_offset,dt,sunrise,sunset,temp,feels_like,pressure,...,humidity_trend,humidity_seasonal,humidity_residual,dewpoint_trend,dewpoint_seasonal,dewpoint_residual,temp_w_seas,pressure_w_seas,humidity_w_seas,dewpoint_w_seas
549,3.033,101.45,Asia/Kuala_Lumpur,28800,1735660800,1735687143,1735730158,25.78,26.87,1012,...,,0.000544,,,0.002068,,0.172788,1.232540,0.900489,0.655471
1655,3.033,101.45,Asia/Kuala_Lumpur,28800,1735664400,1735687143,1735730158,25.53,26.59,1012,...,,0.015666,,,0.003501,,0.149379,1.160516,0.914099,0.641650
1560,3.033,101.45,Asia/Kuala_Lumpur,28800,1735668000,1735687143,1735730158,25.45,26.51,1011,...,,0.027427,,,0.008849,,0.141333,0.464897,0.924684,0.640322
1307,3.033,101.45,Asia/Kuala_Lumpur,28800,1735671600,1735687143,1735730158,25.15,26.18,1011,...,,0.041850,,,0.017501,,0.114947,0.403401,0.937665,0.627858
314,3.033,101.45,Asia/Kuala_Lumpur,28800,1735675200,1735687143,1735730158,24.98,25.99,1010,...,,0.051804,,,0.009953,,0.098154,-0.081653,0.946623,0.613695
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1869,3.033,101.45,Asia/Kuala_Lumpur,28800,1742295600,1742253553,1742297127,29.50,31.39,1009,...,,0.041850,,,0.017501,,0.488098,-0.590871,0.295191,0.387249
67,3.033,101.45,Asia/Kuala_Lumpur,28800,1742299200,1742253553,1742297127,28.81,34.85,1010,...,,0.051804,,,0.009953,,0.419462,-0.081653,0.753793,0.713502
263,3.033,101.45,Asia/Kuala_Lumpur,28800,1742302800,1742253553,1742297127,28.37,32.69,1011,...,,0.039584,,,-0.005303,,0.379217,0.341231,0.658403,0.616712
1385,3.033,101.45,Asia/Kuala_Lumpur,28800,1742306400,1742253553,1742297127,28.23,33.20,1012,...,,0.040724,,,-0.004752,,0.366002,0.832573,0.745852,0.670329


In [30]:
for i in range(1, 11, 2):
    print(i)

1
3
5
7
9
