In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score

In [10]:
# Load data
data = pd.read_csv('AIA_cleaned_data.csv')  # Update the path as needed
data['AGE'] = 2024 - data['YEAR']
data['Customer_Density'] = data['NO_CUSTOMERS'] / data['XFMR_KVA_RATING']

# Define failure condition based on age and load (you can adjust thresholds as needed)
age_threshold = 20  # Transformers older than 20 years
load_threshold = 1.5  # Customer density indicating high load (arbitrary, adjust as needed)

# Create a binary 'Failure' column
data['Failure'] = ((data['AGE'] > age_threshold) | (data['Customer_Density'] > load_threshold)).astype(int)

# Check the distribution of the new 'Failure' column
print(data['Failure'].value_counts())



Failure
0    1352
1     147
Name: count, dtype: int64


In [11]:
# Define features and target
features = ['XFMR_KVA_RATING', 'PRIMARY_VOLTAGE', 'CLASSIFICATION', 'PCB', 'PHASE', 'AGE', 'Customer_Density', 'LOWER_VOLTAGE', 'UPPER_VOLTAGE']
target = 'Failure'  # Binary column indicating failure (1 for failure, 0 for no failure)

X = data[features]
y = data[target]

In [12]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing: Scaling and Encoding
categorical_features = ['PRIMARY_VOLTAGE', 'CLASSIFICATION', 'PCB', 'PHASE']
numerical_features = ['XFMR_KVA_RATING', 'AGE', 'Customer_Density', 'LOWER_VOLTAGE', 'UPPER_VOLTAGE']

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(), categorical_features)
])

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

In [13]:
# Neural Network Model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
# Train model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Evaluate model
y_pred = (model.predict(X_test) > 0.5).astype(int)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

Epoch 1/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9070 - loss: nan - val_accuracy: 0.8967 - val_loss: nan
Epoch 2/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9097 - loss: nan - val_accuracy: 0.8967 - val_loss: nan
Epoch 3/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9113 - loss: nan - val_accuracy: 0.8967 - val_loss: nan
Epoch 4/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9058 - loss: nan - val_accuracy: 0.8967 - val_loss: nan
Epoch 5/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9016 - loss: nan - val_accuracy: 0.8967 - val_loss: nan
Epoch 6/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9018 - loss: nan - val_accuracy: 0.8967 - val_loss: nan
Epoch 7/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
