In [None]:
#@title
# Upload:
# electrical_grid_stability_simulated_data.csv

# 14.1. Combinatory Pattern Recognition Using Supercomputers

In [None]:
#@title 14.1.1. One main cell

#############################################################
# Import some necessary packages
#############################################################

import numpy as np
import pandas as pd
import matplotlib
import itertools # new

from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error as mse

# ml models
from sklearn.svm import SVC as svc
from sklearn.neighbors import KNeighborsClassifier as knc
from sklearn.tree import DecisionTreeClassifier as dtc
from sklearn.ensemble import RandomForestClassifier as rfc

import warnings
warnings.filterwarnings("ignore")

from multiprocessing import Pool
import multiprocessing
import time

#############################################################
# Data processing
#############################################################

data = pd.read_csv('electrical_grid_stability_simulated_data.csv')

datain = data.iloc[:,0:12]
dataou = data.iloc[:,-1:]

dataou[dataou == 'stable']   = 1
dataou[dataou == 'unstable'] = 0

datain = datain.values
dataou = dataou.values

# object to float data
mat = np.empty(dataou.shape,dtype=float)
for i0 in range(dataou.shape[0]): mat[i0,0] = dataou[i0]
dataou = mat

# define some simple keys
data_keys = list(data.keys())
keys = []
for i0 in range(len(data_keys)):
  keys.append('feature {:02d}'.format(i0+1))

# remove the outputs
keys = keys[:13]

# necessary information for combinatory model
num_data      = datain.shape[0]
num_variables = datain.shape[1]

#############################################################
# Some necessary functions
#############################################################

def fun_combinations(num_data, num_variables):
  num_comb = 2**num_variables - 1
  comb_bin = np.zeros((num_comb,num_variables))
  comb_ind = []

  counter  = 0
  for i0 in range(num_variables):
    ind       = list(itertools.combinations(range(num_variables),i0+1))
    comb_ind.append(ind)

    for i1 in range(len(ind)):
      comb_bin[counter,ind[i1]] = 1
      counter                   = counter + 1

  comb_bin = comb_bin.astype(dtype = bool)

  return comb_bin, comb_ind, num_comb

def fun_split(RTT, RRS, num_data):
  np.random.seed(42)

  index_te = []
  index_tr = []

  for rtt in RTT:
    index_te_tmp = []
    index_tr_tmp = []

    for rrs in range(RRS):
      index = list(range(num_data))
      index = np.ndarray.tolist(np.random.permutation(index))

      index_te_tmp.append(index[0:int(np.floor(rtt*num_data))])
      index_tr_tmp.append(index[int(np.floor(rtt*num_data)):])

    index_te.append(index_te_tmp)
    index_tr.append(index_tr_tmp)

  return index_tr, index_te

def fun_prep(datain, dataou, comb_bin, index_tr, index_te, lb, ub):
  datain_tr = datain[index_tr, :][:, comb_bin] # this way we avoid "mismatch" error in indexing
  datain_te = datain[index_te, :][:, comb_bin] # this way we avoid "mismatch" error in indexing
  dataou_tr = dataou[index_tr,:]
  dataou_te = dataou[index_te,:]

  if len(datain_tr.shape) == 1:
    datain_tr = np.expand_dims(datain_tr, axis = 1)
    datain_te = np.expand_dims(datain_te, axis = 1)

  if len(dataou_tr.shape) == 1:
    dataou_tr = np.expand_dims(dataou_tr, axis = 1)
    dataou_te = np.expand_dims(dataou_te, axis = 1)

  scalerin = MinMaxScaler(feature_range=(lb,ub))
  scalerin.fit(datain_tr)

  scalerou = MinMaxScaler(feature_range=(lb,ub))
  scalerou.fit(dataou_tr)

  datain_tr_calibrated = scalerin.transform(datain_tr)
  datain_te_calibrated = scalerin.transform(datain_te)

  # dataou_tr_calibrated = scalerou.transform(dataou_tr)
  # dataou_te_calibrated = scalerou.transform(dataou_te)

  return datain_tr_calibrated, dataou_tr, datain_te_calibrated, dataou_te


def fun_ml(datain_tr, dataou_tr, datain_te, dataou_te, model_type):

  result = {}

  if model_type == 'SVC':
    mdl       = svc()
  elif model_type == 'KNC':
    mdl       = knc()
  elif model_type == 'DTC':
    mdl       = dtc()
  elif model_type == 'RFC':
    mdl       = rfc()
  else:
    print("Model type has not been defined!")

  result['model_type'] = model_type
  history              = mdl.fit(datain_tr, dataou_tr)
  dataes_tr            = mdl.predict(datain_tr)
  dataes_te            = mdl.predict(datain_te)

  if len(dataes_tr.shape) == 1:
    dataes_tr = np.expand_dims(dataes_tr, axis = 1)

  if len(dataes_te.shape) == 1:
    dataes_te = np.expand_dims(dataes_te, axis = 1)

  result['acc_tr']     = fun_accuracy(dataes_tr, dataou_tr)
  result['acc_te']     = fun_accuracy(dataes_te, dataou_te)

  # result['datain_tr'] = datain_tr
  # result['datain_te'] = datain_te

  # result['dataes_tr'] = dataes_tr
  # result['dataou_tr'] = dataou_tr
  # result['dataes_te'] = dataes_te
  # result['dataou_te'] = dataou_te

  return result

def fun_accuracy(dataes, dataou):
  num_err = np.count_nonzero(dataes - dataou)
  accuracy = 1 - num_err/dataou.shape[0]
  return accuracy

def fun_rttrrs(i0):
  result  = np.empty((len(RTT), RRS, 1, len(model_type)), dtype = dict)
  for i1 in range(len(RTT)):
    for i2 in range(RRS):
      datain_tr, dataou_tr, datain_te, dataou_te = fun_prep(datain, dataou, comb_bin[i0,:], index_tr[i1][i2], index_te[i1][i2], 0, 1)
      for i3 in range(len(model_type)):
        result[i1,i2,0,i3] = fun_ml(datain_tr, dataou_tr, datain_te, dataou_te, model_type[i3])
        print("Comb #: {:05d}/{} | RTT #: {:05d}/{} | RRS #: {:05d}/{} | Model: {}".format(i0+1, num_comb, i1+1, len(RTT), i2+1, RRS, model_type[i3]))
  return result

def fun_parallel(pool_size):
  result = np.empty((len(RTT), RRS, num_comb, len(model_type)), dtype = dict)

  # define the loop counters
  par_loop = []
  for i0 in range(0, num_comb):
    par_loop.append((i0,))

  #my_pool     = Pool(pool_size) # define the number of cores/workers
  my_pool = multiprocessing.get_context('fork').Pool(pool_size)
  result_list = my_pool.starmap(fun_rttrrs, par_loop)

  # gather results
  for i0 in range(len(result_list)):
    result[:,:,i0:i0+1,:] = result_list[i0]

  return result

#############################################################
# Main run
#############################################################

model_type = ['SVC', 'KNC', 'DTC', 'RFC']
RTT        = [0.1,0.2,0.3,0.4,0.5]
RRS        = 100
pool_size  = 2

comb_bin, _, num_comb = fun_combinations(num_data, num_variables)
index_tr, index_te           = fun_split(RTT, RRS, num_data)

comb_index_start = 110
comb_index_end   = 115

range_list = list(range(comb_index_start,comb_index_end ,1))
num_comb   = len(range_list)
comb_bin   = comb_bin[range_list,:]

time_start = time.time()

result    = fun_parallel(pool_size)

run_time = time.time() - time_start

file_dict = {'comb_bin': comb_bin, 'range': [comb_index_start, comb_index_end], 'run_time':run_time , 'result': result}

print(run_time)

filename = "result_{:07d}_{:07d}.npy".format(comb_index_start, comb_index_end)
np.save(filename, file_dict)


In [None]:
#@title 14.1.2. Run as a .py file
#!python module14_a.py