In [1]:
import pandas as pd

# Load and preprocess
ops_daily = pd.read_csv(r'D:\kowsi\project_works\Advanced_Forecasting_Anomaly_Detection\logic_leap_horizon_datasets\operations_daily_365d.csv')
site_meta = pd.read_csv(r'D:\kowsi\project_works\Advanced_Forecasting_Anomaly_Detection\logic_leap_horizon_datasets\site_meta.csv')


In [2]:

ops_daily['date'] = pd.to_datetime(ops_daily['date'])


In [3]:

# Drop zero production/power rows as non-operational
ops_daily = ops_daily[(ops_daily['units_produced'] > 0) & (ops_daily['power_kwh'] > 0)]


In [4]:

# Merge site meta data
df = ops_daily.merge(site_meta, on='site_id', how='left')


In [5]:

# Create time-based features
df['day_of_week'] = df['date'].dt.dayofweek
df['month'] = df['date'].dt.month
df['week_of_year'] = df['date'].dt.isocalendar().week


In [6]:

# Lag features (previous day units produced)
df = df.sort_values(['site_id', 'date'])
df['units_produced_lag1'] = df.groupby('site_id')['units_produced'].shift(1)
df['power_kwh_lag1'] = df.groupby('site_id')['power_kwh'].shift(1)


In [7]:

# Rolling mean features (7-day)
df['units_produced_roll7'] = df.groupby('site_id')['units_produced'].rolling(7).mean().reset_index(0,drop=True)
df['power_kwh_roll7'] = df.groupby('site_id')['power_kwh'].rolling(7).mean().reset_index(0,drop=True)


In [8]:

# Fill lag and rolling nulls
df.fillna(0, inplace=True)


In [11]:

# Save engineered features for modeling
df.to_csv(r'D:\kowsi\project_works\Advanced_Forecasting_Anomaly_Detection\outputs\engineered_features.csv', index=False)

print(df.head())


        date site_id  units_produced  downtime_minutes  power_kwh  \
0 2025-01-01      S1            1280                34       4211   
1 2025-01-02      S1            1249               193       5471   
2 2025-01-03      S1            1163                43       4178   
3 2025-01-04      S1            1348                32       4554   
4 2025-01-06      S1            1367                85       4440   

   rework_units  defects_ppm  staff_count  material_cost_per_unit  \
0            27          453           56                   71.94   
1            40          480           55                   71.87   
2            33          454           53                   72.04   
3            35          387           54                   72.26   
4            37          562           57                   72.28   

   price_per_unit  ...  region  commissioned_year  shift_hours_per_day  \
0           96.04  ...   South               2018                   20   
1           95.90  ...