<a href="https://colab.research.google.com/github/prajeshatm/hello/blob/main/HamoyeStageCTapAlongProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A binary classification model to predict if a grid is stable or unstable using the UCI Electrical Grid Stability Simulated dataset.
Predictive features:

'tau1' to 'tau4': the reaction time of each network participant, a real value within the range 0.5 to 10 ('tau1' corresponds to the supplier node, 'tau2' to 'tau4' to the consumer nodes);
'p1' to 'p4': nominal power produced (positive) or consumed (negative) by each network participant, a real value within the range -2.0 to -0.5 for consumers ('p2' to 'p4'). As the total power consumed equals the total power generated, p1 (supplier node) = - (p2 + p3 + p4);
'g1' to 'g4': price elasticity coefficient for each network participant, a real value within the range 0.05 to 1.00 ('g1' corresponds to the supplier node, 'g2' to 'g4' to the consumer nodes; 'g' stands for 'gamma');
Dependent variables:

'stab': the maximum real part of the characteristic differential equation root (if positive, the system is linearly unstable; if negative, linearly stable);
'stabf': a categorical (binary) label ('stable' or 'unstable').

In [1]:
import pandas as pd
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00471/Data_for_UCI_named.csv')
df.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [2]:
df['stabf'].value_counts()

unstable    6380
stable      3620
Name: stabf, dtype: int64

In [3]:
df.isna().sum()

tau1     0
tau2     0
tau3     0
tau4     0
p1       0
p2       0
p3       0
p4       0
g1       0
g2       0
g3       0
g4       0
stab     0
stabf    0
dtype: int64

In [4]:

X = df.drop(columns=['stab','stabf'])
y = df['stabf']
X.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923


In [5]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1)


Scaling using Min_max scaler

In [6]:
from sklearn.preprocessing import MinMaxScaler
minmax= MinMaxScaler()
x_trainscaled = minmax.fit_transform(x_train)
x_testscaled=minmax.fit_transform(x_test)
x_trainscaled

array([[0.60589517, 0.21489752, 0.68682574, ..., 0.59833149, 0.66936   ,
        0.64223413],
       [0.48109731, 0.52530525, 0.79747025, ..., 0.04968025, 0.91282399,
        0.08340342],
       [0.07572417, 0.87424452, 0.35515473, ..., 0.91962228, 0.1988299 ,
        0.64230625],
       ...,
       [0.68975576, 0.29088764, 0.09529355, ..., 0.58481743, 0.05321433,
        0.73406711],
       [0.48264079, 0.1356732 , 0.20904037, ..., 0.38789543, 0.29083034,
        0.98163769],
       [0.07417463, 0.68375924, 0.57166704, ..., 0.16077134, 0.84060903,
        0.72635408]])

#Classification using RandomForestClassifier

In [28]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
RFC=RandomForestClassifier()
classifier1=RFC.fit(x_trainscaled,y_train)
y_pred1=classifier1.predict(x_test)
print("Classification report \n",metrics.classification_report(y_test,y_pred1))
y_pred1

Classification report 
               precision    recall  f1-score   support

      stable       1.00      0.03      0.05       712
    unstable       0.65      1.00      0.79      1288

    accuracy                           0.65      2000
   macro avg       0.82      0.51      0.42      2000
weighted avg       0.77      0.65      0.52      2000



  f"X has feature names, but {self.__class__.__name__} was fitted without"


array(['unstable', 'unstable', 'unstable', ..., 'unstable', 'unstable',
       'unstable'], dtype=object)

In [39]:
from sklearn.metrics import accuracy_score,precision_score
accuracy = accuracy_score(y_test, y_pred1)
#print(round(accuracy,4))
print('accuracy: %.4f' % accuracy)

accuracy: 0.6530


#Classification using ExtraTreeClassifier


In [50]:
from sklearn.ensemble import ExtraTreesClassifier
ETC=ExtraTreesClassifier()
classifier2=ETC.fit(x_trainscaled,y_train)
y_pred2=classifier2.predict(x_testscaled)
print("Classification Report\n",metrics.classification_report(y_test,y_pred2))

Classification Report
               precision    recall  f1-score   support

      stable       0.95      0.85      0.90       712
    unstable       0.92      0.97      0.95      1288

    accuracy                           0.93      2000
   macro avg       0.93      0.91      0.92      2000
weighted avg       0.93      0.93      0.93      2000



In [52]:
from sklearn.metrics import accuracy_score,precision_score
accuracy = accuracy_score(y_test, y_pred2)
print( 'Accuracy: {}' .format(round(accuracy* 100 ), 4 )) 


Accuracy: 93


Moel creation using Randomized SearchCV

In [49]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
param_dist = {"max_depth": [3, None],"max_features":randint(1, 9),"min_samples_leaf": randint(1, 9),"criterion": ["gini", "entropy"]}
ETC_cv = RandomizedSearchCV(ETC,param_dist)

# Fit it to the data
ETC_cv.fit(x_trainscaled,y_train)

# Print the tuned parameters and score
print("Tuned Decision Tree Parameters: {}".format(ETC_cv.best_params_))
print("Best score is {}".format(ETC_cv.best_score_))

Tuned Decision Tree Parameters: {'criterion': 'entropy', 'max_depth': None, 'max_features': 6, 'min_samples_leaf': 2}
Best score is 0.9315000000000001


Find the feature importances

In [42]:

# Computing the importance of each feature
feature_importance = ETC.feature_importances_
print(feature_importance)

[0.11636655 0.11729554 0.11453511 0.11597967 0.0391222  0.0409483
 0.04031762 0.03981034 0.08879915 0.09457419 0.09692254 0.09532878]


#Classification using XGBoosting

In [20]:
from xgboost import XGBClassifier
XGB=XGBClassifier()
classifier3=XGB.fit(x_trainscaled,y_train)
y_pred3=classifier3.predict(x_testscaled)
print("Classification Report \n",metrics.classification_report(y_test,y_pred3))

Classification Report 
               precision    recall  f1-score   support

      stable       0.92      0.85      0.88       712
    unstable       0.92      0.96      0.94      1288

    accuracy                           0.92      2000
   macro avg       0.92      0.90      0.91      2000
weighted avg       0.92      0.92      0.92      2000



In [38]:
from sklearn.metrics import accuracy_score,precision_score
accuracy = accuracy_score(y_test, y_pred3)
#print(round(accuracy,4))
print('accuracy: %.4f' % accuracy)

accuracy: 0.9190


#Classificaton using LGBMClassifier

In [34]:
from lightgbm import LGBMClassifier
LGBM=LGBMClassifier()
classifier4=LGBM.fit(x_trainscaled,y_train)
y_pred4=classifier4.predict(x_testscaled)
print("classification Report\n",metrics.classification_report(y_test,y_pred4))

classification Report
               precision    recall  f1-score   support

      stable       0.93      0.90      0.92       712
    unstable       0.95      0.97      0.96      1288

    accuracy                           0.94      2000
   macro avg       0.94      0.93      0.94      2000
weighted avg       0.94      0.94      0.94      2000



In [37]:
from sklearn.metrics import accuracy_score,precision_score
accuracy = accuracy_score(y_test, y_pred4)
#print(round(accuracy,4))
print('accuracy: %.4f' % accuracy)

accuracy: 0.9430
