In [6]:
import pandas as pd
df = pd.read_csv('Electrical.csv')

It has 12 primary predictive features and two dependent variables.

Predictive features:

'tau1' to 'tau4': the reaction time of each network participant, a real value within the range 0.5 to 10 ('tau1' corresponds to the supplier node, 'tau2' to 'tau4' to the consumer nodes);

'p1' to 'p4': nominal power produced (positive) or consumed (negative) by each network participant, a real value within the range -2.0 to -0.5 for consumers ('p2' to 'p4'). As the total power consumed equals the total power generated, p1 (supplier node) = - (p2 + p3 + p4);

'g1' to 'g4': price elasticity coefficient for each network participant, a real value within the range 0.05 to 1.00 ('g1' corresponds to the supplier node, 'g2' to 'g4' to the consumer nodes; 'g' stands for 'gamma');

Dependent variables:


'stab': the maximum real part of the characteristic differential equation root (if positive, the system is linearly unstable; if negative, linearly stable);

'stabf': a categorical (binary) label ('stable' or 'unstable').

In [7]:
df.head()
df = df.drop('stab',axis=1)
df.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,unstable


In [8]:
from sklearn.model_selection import train_test_split
X = df.drop('stabf',axis=1)
y = df['stabf']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

In [9]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(random_state = 1)
from pprint import pprint
# Look at parameters used by our current forest
print('Parameters currently in use:\n')
pprint(rf.get_params())

Parameters currently in use:

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 1,
 'verbose': 0,
 'warm_start': False}


In [10]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
n_estimators = [50, 100, 300, 500, 1000]

min_samples_split = [2, 3, 5, 7, 9]

min_samples_leaf = [1, 2, 4, 6, 8]

max_features = ['auto', 'sqrt', 'log2', None] 

In [11]:
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'max_features':max_features}
pprint(random_grid)

{'max_features': ['auto', 'sqrt', 'log2', None],
 'min_samples_leaf': [1, 2, 4, 6, 8],
 'min_samples_split': [2, 3, 5, 7, 9],
 'n_estimators': [50, 100, 300, 500, 1000]}


In [12]:
from sklearn.ensemble import RandomForestClassifier
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestClassifier(random_state=1)
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, cv = 3,random_state=1,n_jobs=-1)
# Fit the random search model
rf_random.fit(X_train,y_train)

RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
               

In [13]:
rf_random.best_params_

{'max_features': 'log2',
 'min_samples_leaf': 4,
 'min_samples_split': 2,
 'n_estimators': 1000}

In [14]:
base_model = RandomForestClassifier(random_state =1)
base_model.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [15]:
  base_pred = base_model.predict(X_test)

In [16]:
y_test.unique()

array(['unstable', 'stable'], dtype=object)

In [2]:
from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score,confusion_matrix 

In [18]:
cnf_mat = confusion_matrix(y_true=y_test, y_pred=base_pred, labels=['unstable', 'stable']) 

In [19]:
cnf_mat

array([[1233,   55],
       [  87,  625]])

In [20]:
accuracy = accuracy_score(y_true=y_test, y_pred=base_pred)
print( 'Accuracy: {}' .format(round(accuracy* 100 ), 2 ))

Accuracy: 93.0


In [23]:
precision = precision_score(y_true=y_test, y_pred=base_pred, pos_label= 'stable' )
print( 'Precision: {}' .format(round(precision* 100 ), 2 ))

Precision: 92.0


In [25]:
recall = recall_score(y_true=y_test, y_pred=base_pred, pos_label= 'stable' )
print( 'Recall: {}' .format(round(recall* 100 ), 2 ))

Recall: 88.0


In [27]:
f1 = f1_score(y_true=y_test, y_pred=base_pred, pos_label= 'stable' )
print( 'F1: {}' .format(round(f1* 100 ), 2 ))

F1: 90.0


In [36]:
tunedrf_model = RandomForestClassifier(max_features= 'log2',min_samples_leaf=4,min_samples_split=2,n_estimators=1000,random_state=1)
tunedrf_model.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=4, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=None, oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [37]:
tunedrf_pred = tunedrf_model.predict(X_test)

In [38]:
cnf_mat = confusion_matrix(y_true=y_test, y_pred=tunedrf_pred, labels=['unstable', 'stable']) 

In [39]:
cnf_mat

array([[1227,   61],
       [ 100,  612]])

In [40]:
accuracy = accuracy_score(y_true=y_test, y_pred=tunedrf_pred)
print( 'Accuracy: {}' .format(round(accuracy* 100 ), 2 ))

Accuracy: 92.0


In [42]:
from sklearn.ensemble import ExtraTreesClassifier
  etc = ExtraTreesClassifier(random_state=1)
  extra_grid = {'n_estimators': n_estimators,
                'max_features': max_features,
                'min_samples_split': min_samples_split,
                'min_samples_leaf': min_samples_leaf,
                'max_features':max_features}
  pprint(extra_grid)

{'max_features': ['auto', 'sqrt', 'log2', None],
 'min_samples_leaf': [1, 2, 4, 6, 8],
 'min_samples_split': [2, 3, 5, 7, 9],
 'n_estimators': [50, 100, 300, 500, 1000]}


In [44]:
etc_random = RandomizedSearchCV(estimator = etc, param_distributions = extra_grid, cv = 3,random_state=1,n_jobs=-1)
# Fit the random search model
etc_random.fit(X_train,y_train)

RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=ExtraTreesClassifier(bootstrap=False,
                                                  ccp_alpha=0.0,
                                                  class_weight=None,
                                                  criterion='gini',
                                                  max_depth=None,
                                                  max_features='auto',
                                                  max_leaf_nodes=None,
                                                  max_samples=None,
                                                  min_impurity_decrease=0.0,
                                                  min_impurity_split=None,
                                                  min_samples_leaf=1,
                                                  min_samples_split=2,
                                                  min_weight_fraction_leaf=0.0,
                                        

In [45]:
etc_random.best_params_

{'max_features': None,
 'min_samples_leaf': 8,
 'min_samples_split': 2,
 'n_estimators': 1000}

In [47]:
etc.fit(X_train,y_train)
base_etc = etc.predict(X_test)
cnf_mat = confusion_matrix(y_true=y_test, y_pred=base_etc, labels=['unstable', 'stable']) 
cnf_mat

array([[1250,   38],
       [ 106,  606]])

In [48]:
accuracy = accuracy_score(y_true=y_test, y_pred=base_etc)
print( 'Accuracy: {}' .format(round(accuracy* 100 ), 2 ))

Accuracy: 93.0


In [50]:
etc_tuned = ExtraTreesClassifier(max_features=None,min_samples_leaf= 8,min_samples_split=2,n_estimators=1000)
etc_tuned.fit(X_train,y_train)
tuned_etc = etc_tuned.predict(X_test)
cnf_mat = confusion_matrix(y_true=y_test, y_pred=tuned_etc, labels=['unstable', 'stable']) 
cnf_mat

array([[1238,   50],
       [  94,  618]])

In [51]:
accuracy = accuracy_score(y_true=y_test, y_pred=tuned_etc)
print( 'Accuracy: {}' .format(round(accuracy* 100 ), 2 ))

Accuracy: 93.0
