In [50]:
%reload_ext autoreload


In [51]:
import pandas as pd
from src.train import train_loop
from src.model import CNNLSTMModel
from src.data_loader import InverterTimeSeriesDataset
import torch

inverter_data = pd.read_parquet('data/inverter_data/')
inverter_data['event_local_time'] = pd.to_datetime(inverter_data['event_local_time'])
failure_sessions = pd.read_csv('data/failure_sessions.csv', index_col=0, parse_dates=['start_time', 'end_time'])

In [65]:
## TO BE REPLACED
import sys
sys.path.append('src')        # point at your src/ folder
from preprocess import load_parquet_data, load_failure_sessions, prepare_dataset

inverter_df      = load_parquet_data('data/inverter_data')
failure_sessions = load_failure_sessions('data/failure_sessions.csv')
labeled_df       = prepare_dataset(inverter_df, failure_sessions, pre_days=5)



Loaded 1 parquet files → 185922 rows
Kept 61 sessions longer than 3 days
Total pre-failure rows: 1440
Total rows: 184802


In [69]:
from preprocess import prepare_dataset
labeled_df = prepare_dataset(inverter_df, failure_sessions, pre_days=5)

labeled_df = labeled_df.rename(columns={'failure_label':'label'})
print(labeled_df.columns.tolist())


Total pre-failure rows: 1440
Total rows: 184802


In [73]:
feature_cols = ['metric.AC_CURRENT_A.MEASURED', 'metric.AC_POWER.MEASURED']
# forward‐fill then back‐fill within each device group
# 1) Pick exactly the columns the Dataset will see
cols_needed = [
    'device_name',
    'event_local_time',
    *feature_cols,  # your two AC_CURRENT_A and AC_POWER
    'label'
]

df_small = labeled_df[cols_needed]

# 2) Drop any row with a NaN in *any* of those columns
df_small = df_small.dropna().reset_index(drop=True)

# 3) Now create the dataset; no other columns remain to trip the NaN-check




dataset = InverterTimeSeriesDataset(
    df_small,
    feature_cols,
    label_col='label',
    window_size=30,
    stride=1
)



Processing devices: 100%|██████████| 16/16 [00:19<00:00,  1.23s/it]


In [74]:
train_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)

model = CNNLSTMModel(num_features=len(feature_cols))
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()

In [57]:
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118


Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118


In [75]:
train_loop(model, train_loader, num_epochs=10, optimizer=optimizer, device='cpu', criterion=criterion)

[Epoch 1/10] Step 0/5157 - Loss: 0.2527
[Epoch 1/10] Step 100/5157 - Loss: 0.0004
[Epoch 1/10] Step 200/5157 - Loss: 0.0001
[Epoch 1/10] Step 300/5157 - Loss: 0.0311
[Epoch 1/10] Step 400/5157 - Loss: 0.0000
[Epoch 1/10] Step 500/5157 - Loss: 0.0001
[Epoch 1/10] Step 600/5157 - Loss: 0.0000
[Epoch 1/10] Step 700/5157 - Loss: 0.0001
[Epoch 1/10] Step 800/5157 - Loss: 0.0000
[Epoch 1/10] Step 900/5157 - Loss: 0.0000
[Epoch 1/10] Step 1000/5157 - Loss: 0.0000
[Epoch 1/10] Step 1100/5157 - Loss: 0.0000
[Epoch 1/10] Step 1200/5157 - Loss: 0.0000
[Epoch 1/10] Step 1300/5157 - Loss: 0.0000
[Epoch 1/10] Step 1400/5157 - Loss: 0.0000
[Epoch 1/10] Step 1500/5157 - Loss: 0.0000


KeyboardInterrupt: 