<h1>Smart Grid Stability Prediction </h1>

In [1]:
# importing library
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier, XGBRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.svm import SVC, SVR
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, confusion_matrix, precision_score, recall_score, fbeta_score

<h2>Exploratory Data Analysis </h2>

In [2]:
# loading the dataset
# file_path = "/kaggle/input/smart-grid-stability/smart_grid_stability_augmented.csv"
file_path = "smart_grid_stability_augmented.csv"
df = pd.read_csv(file_path)

In [3]:
# getting preliminary information
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tau1    60000 non-null  float64
 1   tau2    60000 non-null  float64
 2   tau3    60000 non-null  float64
 3   tau4    60000 non-null  float64
 4   p1      60000 non-null  float64
 5   p2      60000 non-null  float64
 6   p3      60000 non-null  float64
 7   p4      60000 non-null  float64
 8   g1      60000 non-null  float64
 9   g2      60000 non-null  float64
 10  g3      60000 non-null  float64
 11  g4      60000 non-null  float64
 12  stab    60000 non-null  float64
 13  stabf   60000 non-null  object 
dtypes: float64(13), object(1)
memory usage: 6.4+ MB


In [4]:
# check for null values
df.isnull().sum()

tau1     0
tau2     0
tau3     0
tau4     0
p1       0
p2       0
p3       0
p4       0
g1       0
g2       0
g3       0
g4       0
stab     0
stabf    0
dtype: int64

In [5]:
df.columns.to_list()

['tau1',
 'tau2',
 'tau3',
 'tau4',
 'p1',
 'p2',
 'p3',
 'p4',
 'g1',
 'g2',
 'g3',
 'g4',
 'stab',
 'stabf']

In [6]:
columns = df.columns.to_list()
numerical_columns = columns[:-1]
categorical_columns = columns[-1]



<h2>Preprocessing the data</h2>

In [7]:
def replace_labels(x: str):
    if x == "unstable":
        return 0
    else:
        return 1


def preprocess_inputs(df, task="classification"):
    df = df.copy()
    if task == "classification":
        df = df.drop("stab", axis=1)
        y = df["stabf"].apply(replace_labels)
        x = df.drop("stabf", axis=1).copy()
    elif task == "regression":
        df = df.drop("stabf", axis=1)
        y = df["stab"]
        x = df.drop("stab", axis=1).copy()
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, train_size=0.7, shuffle=True, random_state=123
    )
    return x_train, x_test, y_train, y_test

In [8]:
x_train, x_test, y_train, y_test = preprocess_inputs(df, task="classification")
x_train

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
50049,9.818128,8.588987,0.511597,5.260072,3.395636,-1.478677,-1.107857,-0.809102,0.931287,0.291643,0.599963,0.384295
42117,2.988689,0.532388,1.456258,9.804672,4.992851,-1.899322,-1.336375,-1.757154,0.945929,0.407395,0.236469,0.947458
20378,5.936029,9.336081,1.988640,1.907732,4.433782,-1.324150,-1.417504,-1.692128,0.452766,0.882208,0.159555,0.243988
17121,5.001385,3.720948,3.111653,8.465773,3.239639,-0.903370,-0.836304,-1.499965,0.129207,0.500248,0.863418,0.075551
22484,2.284157,4.881570,5.095673,8.647201,3.375693,-1.920184,-0.630081,-0.825428,0.477917,0.446620,0.773091,0.424167
...,...,...,...,...,...,...,...,...,...,...,...,...
54370,5.701267,6.684273,9.601239,1.441566,2.855926,-0.966439,-0.772487,-1.117001,0.767515,0.141450,0.825274,0.681713
17730,6.962724,5.720939,3.331487,3.904860,4.653462,-1.254036,-1.608298,-1.791128,0.207566,0.925921,0.999739,0.635747
28030,6.763179,4.443784,5.772680,8.344345,4.390494,-1.586663,-0.817729,-1.986103,0.449443,0.276032,0.990190,0.785749
15725,4.542570,5.302279,1.487013,6.115917,3.317675,-1.656348,-0.568968,-1.092359,0.279099,0.486512,0.411925,0.963123


In [9]:
y_train

50049    0
42117    1
20378    0
17121    1
22484    0
        ..
54370    0
17730    0
28030    0
15725    0
52734    0
Name: stabf, Length: 42000, dtype: int64

In [10]:
y_test

29561    0
26640    0
24498    0
24594    0
24249    0
        ..
59393    1
21497    1
18948    1
24384    0
48300    1
Name: stabf, Length: 18000, dtype: int64

In [11]:
clf = SVC()
clf.fit(x_train, y_train)
print("Classifier Trained")

Classifier Trained


In [13]:
print("The Classification Score", clf.score(x_test, y_test))
y_pred = clf.predict(x_test)
print("Confusion matrix : ", confusion_matrix(y_true=y_test, y_pred=y_pred))
print("Precisioin : ", precision_score(y_test, y_pred))
print("Recall : ", recall_score(y_test, y_pred))

The Classification Score 0.9290555555555555
Confusion matrix :  [[11007   493]
 [  784  5716]]
Precisioin :  0.9205991302947335
Recall :  0.8793846153846154


In [10]:
x_train, x_test, y_train, y_test = preprocess_inputs(df, task="regression")
x_train

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
50049,9.818128,8.588987,0.511597,5.260072,3.395636,-1.478677,-1.107857,-0.809102,0.931287,0.291643,0.599963,0.384295
42117,2.988689,0.532388,1.456258,9.804672,4.992851,-1.899322,-1.336375,-1.757154,0.945929,0.407395,0.236469,0.947458
20378,5.936029,9.336081,1.988640,1.907732,4.433782,-1.324150,-1.417504,-1.692128,0.452766,0.882208,0.159555,0.243988
17121,5.001385,3.720948,3.111653,8.465773,3.239639,-0.903370,-0.836304,-1.499965,0.129207,0.500248,0.863418,0.075551
22484,2.284157,4.881570,5.095673,8.647201,3.375693,-1.920184,-0.630081,-0.825428,0.477917,0.446620,0.773091,0.424167
...,...,...,...,...,...,...,...,...,...,...,...,...
54370,5.701267,6.684273,9.601239,1.441566,2.855926,-0.966439,-0.772487,-1.117001,0.767515,0.141450,0.825274,0.681713
17730,6.962724,5.720939,3.331487,3.904860,4.653462,-1.254036,-1.608298,-1.791128,0.207566,0.925921,0.999739,0.635747
28030,6.763179,4.443784,5.772680,8.344345,4.390494,-1.586663,-0.817729,-1.986103,0.449443,0.276032,0.990190,0.785749
15725,4.542570,5.302279,1.487013,6.115917,3.317675,-1.656348,-0.568968,-1.092359,0.279099,0.486512,0.411925,0.963123


In [11]:
y_train

50049    0.011189
42117   -0.023532
20378    0.014560
17121   -0.025091
22484    0.027030
           ...   
54370    0.022260
17730    0.072665
28030    0.072647
15725    0.034689
52734    0.026553
Name: stab, Length: 42000, dtype: float64

In [15]:
x_train, x_test, y_train, y_test = preprocess_inputs(df, task="regression")
reg = SVR()
reg.fit(x_train, y_train)
print("model Trained")

model Trained


In [17]:
print("Regression Score", reg.score(x_test, y_test))
y_pred = reg.predict(x_test)
print(f"RMSE : {root_mean_squared_error(y_test, y_pred)}")
print(f"MAE  : {mean_absolute_error(y_test, y_pred)}")

Regression Score -0.0015794571310954186
RMSE : 0.036963855199827866
MAE  : 0.031349403371748755
