In [11]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_score, recall_score, f1_score

# ----- Step 1: Load and Prepare Weekly Data -----
df_weekly = pd.read_csv("/content/weekly_train.csv")
df_weekly['Week'] = pd.to_datetime(df_weekly['week'])  # Convert 'week' column to datetime
df_weekly = df_weekly.sort_values(by=['fdr_Id', 'Week'])

# Shift the Fault_Occurred column to create target labels
df_weekly['target'] = df_weekly.groupby('fdr_Id')['Fault_Occurred'].shift(-1)
df_weekly = df_weekly.dropna(subset=['target'])
df_weekly['target'] = df_weekly['target'].astype(int)

# ----- Step 2: Define Features and Normalize -----
exclude_cols = ['fdr_Id', 'Week', 'Fault_Occurred', 'target', 'week']
features = [col for col in df_weekly.columns if col not in exclude_cols]
scaler = MinMaxScaler()
df_weekly[features] = scaler.fit_transform(df_weekly[features])

# ----- Step 3: Create Feeder-Wise Sequences -----
def create_sequences_by_feeder(df, seq_length=3):
    X_list, y_list = [], []
    feeders = df['fdr_Id'].unique()
    for feeder in feeders:
        feeder_df = df[df['fdr_Id'] == feeder].reset_index(drop=True)
        for i in range(len(feeder_df) - seq_length):
            seq_X = feeder_df[features].iloc[i:i+seq_length].values
            seq_y = feeder_df['target'].iloc[i + seq_length]
            X_list.append(seq_X)
            y_list.append(seq_y)
    return torch.tensor(np.array(X_list), dtype=torch.float32), torch.tensor(np.array(y_list), dtype=torch.float32)

seq_length = 10
X, y = create_sequences_by_feeder(df_weekly, seq_length=seq_length)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X, y = X.to(device), y.to(device)

# ----- Step 4: Split Data and Create DataLoaders -----
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

batch_size = 64
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=False)

# ----- Step 5: Define the LSTM Model -----
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, num_layers=2, dropout=0.3):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        x = hidden[-1]
        x = self.fc(x)
        return torch.sigmoid(x)

# Instantiate model
model = LSTMModel(input_dim=X.shape[2]).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ----- Step 6: Train the Model -----
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_X).squeeze()
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}")

# ----- Step 7: Evaluate the Model -----
model.eval()
y_pred_list, y_test_list = [], []
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        output = model(batch_X).squeeze()
        y_pred_list.extend(output.cpu().numpy())
        y_test_list.extend(batch_y.cpu().numpy())

y_pred_binary = (np.array(y_pred_list) > 0.5).astype(int)
y_test_cpu = np.array(y_test_list)

precision = precision_score(y_test_cpu, y_pred_binary)
recall = recall_score(y_test_cpu, y_pred_binary)
f1 = f1_score(y_test_cpu, y_pred_binary)
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

# ----- Step 8: Predict Feeders Going Down in Target Week -----
target_week = pd.to_datetime("2025-01-06")
df_filtered = df_weekly[df_weekly['Week'] < target_week]

feeder_predictions = []
feeders = df_filtered['fdr_Id'].unique()

with torch.no_grad():
    for feeder in feeders:
        feeder_df = df_filtered[df_filtered['fdr_Id'] == feeder].reset_index(drop=True)
        if len(feeder_df) >= seq_length:
            seq_data = feeder_df[features].iloc[-seq_length:].values
            seq_tensor = torch.tensor(seq_data, dtype=torch.float32).unsqueeze(0).to(device)
            pred_prob = model(seq_tensor).item()
            feeder_predictions.append((feeder, pred_prob))

threshold = 0.3
down_feeders = {(f, prob) for f, prob in feeder_predictions if prob > threshold}

# ----- Step 9: Compare Predictions with Actual Data -----
actual_data = pd.read_csv("/content/weekly_test.csv")
actual_data['Week'] = pd.to_datetime(actual_data['week'])
actual_failures = set(actual_data[actual_data['Fault_Occurred'] == 1]['fdr_Id'])

predicted_failures = {f for f, _ in down_feeders}
true_positives = actual_failures & predicted_failures
false_negatives = actual_failures - predicted_failures
false_positives = predicted_failures - actual_failures

precision = len(true_positives) / (len(true_positives) + len(false_positives) + 1e-9)
recall = len(true_positives) / (len(true_positives) + len(false_negatives) + 1e-9)
f1_score = 2 * (precision * recall) / (precision + recall + 1e-9)

print(f"Correct Predictions: {len(true_positives)}")
print(f"Missed Failures: {len(false_negatives)}")
print(f"Incorrect Predictions: {len(false_positives)}")
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f}")


Epoch 1/10, Loss: 0.6151
Epoch 2/10, Loss: 0.5717
Epoch 3/10, Loss: 0.5628
Epoch 4/10, Loss: 0.5613
Epoch 5/10, Loss: 0.5576
Epoch 6/10, Loss: 0.5564
Epoch 7/10, Loss: 0.5534
Epoch 8/10, Loss: 0.5553
Epoch 9/10, Loss: 0.5534
Epoch 10/10, Loss: 0.5540
Precision: 0.6019, Recall: 0.5004, F1 Score: 0.5465
Correct Predictions: 78
Missed Failures: 51
Incorrect Predictions: 7
Precision: 0.9176, Recall: 0.6047, F1 Score: 0.7290


In [None]:
import pandas as pd
import numpy as np
import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ----- Step 1: Load the Processed Monthly Data -----
df_monthly = pd.read_csv("/content/feeder_monthly_data_upto_2024.csv")

# Convert 'Month' to datetime and extract time features
df_monthly['Month'] = pd.to_datetime(df_monthly['Month'])
df_monthly['Year'] = df_monthly['Month'].dt.year
df_monthly['Month_Num'] = df_monthly['Month'].dt.month

# Sort by feeder and Month
df_monthly = df_monthly.sort_values(by=['fdr_Id', 'Month'])

# ----- Step 2: Define the Feature Set -----
exclude_cols = ['fdr_Id', 'Month', 'Fault_Label', 'target']
features = [col for col in df_monthly.columns if col not in exclude_cols]
print("Features used for prediction:", features)

# Load trained model
# Ensure the model is loaded and set to evaluation mode
model.eval()

# ----- Step 3: Define the Sequence Length -----
seq_length = 12  # Use past 12 months to predict the target month

# ----- Step 4: Select the Target Month -----
target_month = "2025-01-01"  # Change this to any future date
target_month = pd.to_datetime(target_month)

# Filter data to only include months before the target month
df_filtered = df_monthly[df_monthly['Month'] < target_month]

# ----- Step 5: Create Predictions for Each Feeder -----
feeder_predictions = []
feeders = df_filtered['fdr_Id'].unique()

with torch.no_grad():
    for feeder in feeders:
        feeder_df = df_filtered[df_filtered['fdr_Id'] == feeder].reset_index(drop=True)

        # Check if we have enough historical data
        if len(feeder_df) >= seq_length:
            # Take the last 'seq_length' rows to form the sequence
            seq_data = feeder_df[features].iloc[-seq_length:].values
            seq_tensor = torch.tensor(seq_data, dtype=torch.float32).unsqueeze(0).to(device)

            # Get the prediction probability
            pred_prob = model(seq_tensor).item()
            feeder_predictions.append((feeder, pred_prob))
        else:
            print(f"Not enough data for feeder {feeder}")

# ----- Step 6: Filter Feeders Predicted to Go Down -----
threshold = 0.5  # Define fault threshold
down_feeders = [(f, prob) for f, prob in feeder_predictions if prob > threshold]

# ----- Step 7: Display the Predictions -----
print(f"Feeders predicted to fail in {target_month.strftime('%B %Y')}:")
for f, prob in down_feeders:
    print(f"Feeder {f}: Probability = {prob:.4f}")

# Optionally, save predictions to a CSV
pred_df = pd.DataFrame(feeder_predictions, columns=["fdr_Id", "predicted_probability"])
pred_df.to_csv(f"feeder_predictions_{target_month.strftime('%Y_%m')}.csv", index=False)
print(f"Predictions saved as 'feeder_predictions_{target_month.strftime('%Y_%m')}.csv'")


Features used for prediction: ['Fault_Count', 'Total_Duration', 'NoFault_Count', 'temp', 'humidity', 'dew', 'windspeed', 'windgust', 'sealevelpressure', 'solarradiation', 'precip', 'reason_Bird / Animal Electrocuted', 'reason_Breaker Fault', 'reason_Bus Bar Broken', 'reason_Cable End Damage', 'reason_Cambric Lead', 'reason_Consumer Side Fault', 'reason_D/O Faulty', 'reason_Fire/Flash/Sparking', 'reason_HT Cable Lead', 'reason_HT IPC', 'reason_HT Jumper', 'reason_Insulator Damage', 'reason_KS Faults', 'reason_LBS Faults', 'reason_LT ABC Broken', 'reason_LT Cable Lead', 'reason_LT DB Fault', 'reason_LT Panel fault', 'reason_LT Wire Broken', 'reason_MISC', 'reason_Misc.', 'reason_PMT Link', 'reason_Relay Fault', 'reason_Tree Snapping', 'reason_Trolley Issue', 'reason_VIR / Wire / Cloth etc Grounding', 'reason_VIR / Wire / Cloth etc. grounding', 'Year', 'Month_Num']


RuntimeError: The size of tensor a (12) must match the size of tensor b (3) at non-singleton dimension 1

In [None]:
import pandas as pd
import numpy as np
import torch

# Load the trained LSTM model
model.eval()

def predict_feeder_failures(df, seq_length=12):
    feeders = df['fdr_Id'].unique()
    predictions = []
    with torch.no_grad():
        for feeder in feeders:
            feeder_df = df[df['fdr_Id'] == feeder].reset_index(drop=True)
            if len(feeder_df) >= seq_length:
                seq_X = feeder_df[features].iloc[-seq_length:].values
                seq_tensor = torch.tensor(seq_X, dtype=torch.float32).unsqueeze(0).to(device)
                pred_prob = model(seq_tensor).item()
                predictions.append((feeder, pred_prob))
    return predictions

# Load monthly data and filter for past months
df_monthly = pd.read_csv("/content/feeder_monthly_data_upto_2024.csv")
df_monthly['Month'] = pd.to_datetime(df_monthly['Month'])
target_month = "2025-01-01"
df_filtered = df_monthly[df_monthly['Month'] < target_month]

# Get predictions
feeder_predictions = predict_feeder_failures(df_filtered)

# Apply threshold
threshold = 0.5
down_feeders = [(f, prob) for f, prob in feeder_predictions if prob > threshold]

# Save predictions
pred_df = pd.DataFrame(down_feeders, columns=["fdr_Id", "predicted_probability"])
pred_df.to_csv(f"LSTM_predictions_{target_month[:7]}.csv", index=False)

# ----- Compare Predictions with Actual Data -----
actual_data = pd.read_csv("/content/feeder_monthly_data_2025_01.csv")
actual_data = actual_data[actual_data['Fault_Label'] == 1]
actual_feeders = set(actual_data['fdr_Id'])
predicted_feeders = set(pred_df['fdr_Id'])

# True Positives (Correct Predictions)
true_positives = actual_feeders & predicted_feeders

# False Negatives (Missed Failures)
false_negatives = actual_feeders - predicted_feeders

# False Positives (Incorrect Predictions)
false_positives = predicted_feeders - actual_feeders

# Print Results
print(f"Correct Predictions: {len(true_positives)}")
print(f"Missed Failures: {len(false_negatives)}")
print(f"Incorrect Predictions: {len(false_positives)}")

# Save Comparison Results
comparison_results = {
    "True Positives": list(true_positives),
    "False Negatives": list(false_negatives),
    "False Positives": list(false_positives),
}
pd.DataFrame(dict([(k, pd.Series(v)) for k, v in comparison_results.items()])).to_csv("prediction_comparison.csv", index=False)


NameError: name 'seq_X' is not defined