In [1]:
import copy
import logging
import time
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# approximation methods
from scipy.interpolate import LinearNDInterpolator
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR

import pypsa
from n_dimensional_datasets import *
from plotter import *

warnings.filterwarnings('ignore')
logger = logging.getLogger("pypsa")
logger.setLevel("WARNING")


## user defines

In [6]:
start = 0
stop = 1
n_buses = 3
n_samples = 1000
train_percentage = 80
n_train_samples = int(n_samples*(train_percentage/100))

In [7]:
def setup_network():
    network = pypsa.Network()

    network.set_snapshots(range(n_samples))

    #add buses
    for i in range(n_buses):
        network.add("Bus","My bus {}".format(i), v_nom=20.)

    # add radial lines
    branches = 3
    for i in range(0, int(n_buses/branches)):
        for j in range((i*branches)+1, (i*branches)+1+branches):
            if j < n_buses:
                network.add("Line",
                            "My line {}{}".format(i,j), 
                            bus0="My bus {}".format(i), 
                            bus1="My bus {}".format(j), 
                            x=0.01, r=1, s_nom=1)


    #add a generator at bus 0
    network.add("Generator","My gen 0", bus="My bus 0", p_nom=2, marginal_cost=1)
    network.add("Generator","My gen 1", bus="My bus 1", p_nom=0.25, marginal_cost=1)
    
    return network

In [8]:
sim_data = {"start": start, 
            "stop": stop, 
            "samples": n_samples, 
            "train samples": n_train_samples, 
            "seed": []}
stats = {"score": [], 
         "error": [], 
         "scorevar": [],
         "time": []}
approx_type = {"rf": copy.deepcopy(stats), 
               "interp": copy.deepcopy(stats), 
               "svr": copy.deepcopy(stats)}

seeds = range(10)
for seed in seeds:
    # create data
    X = stochastic_mesh(n_buses, start, stop, n_samples, seed=seed)
    X_train = X.T[:(n_train_samples)]
    X_test = X.T[(n_train_samples):]
    for i in range(n_buses):
        network = setup_network()
        network.add("Load", "My load {}".format(i), bus="My bus {}".format(i), p_set=X[i])
    network.pf()
    y = network.buses_t.v_mag_pu.values
    y_train = y[:(n_train_samples)]
    y_test = y[(n_train_samples):]
    
    
    # setup approximators
    time_start = time.time()
    forest = RandomForestRegressor()
    forest.fit(X_train, y_train)
    time_forest = time.time()
    
    interp = LinearNDInterpolator(X_train, y_train)
    time_interp = time.time()
    
    svr = copy.deepcopy(stats)
    for idx in range(n_buses):
        clf = SVR(gamma='scale', C=1.0, epsilon=0.2)
        clf.fit(X_train, y_train.T[idx].T)
        svr_xval_score = cross_val_score(clf, X_train, y_train.T[idx].T, cv=5)
        svr["error"].append(clf.predict(X_test)-y_test.T[idx])
        svr["score"].append(svr_xval_score.mean())
        svr["scorevar"].append(svr_xval_score.std())
    time_svr = time.time()
    
    # score approximators
    forest_xval_score = cross_val_score(forest, X_train, y_train, cv=5)
    approx_type["rf"]["error"].append(np.mean(forest.predict(X_test)-y_test))
    approx_type["rf"]["score"].append(forest_xval_score.mean())
    approx_type["rf"]["scorevar"].append(forest_xval_score.std())
    approx_type["rf"]["time"].append(time_forest-time_start)
    
    interp_y_hat = interp(X_test)
    not_nan_idx = [i for i, val in enumerate(interp_y_hat) if not np.isnan(val[0])]
    interp_score = r2_score(y_test[not_nan_idx], interp_y_hat[not_nan_idx])
    approx_type["interp"]["error"].append(np.mean(interp_y_hat[not_nan_idx] - y_test[not_nan_idx]))
    approx_type["interp"]["score"].append(interp_score)
    approx_type["interp"]["scorevar"].append(np.nan)
    approx_type["interp"]["time"].append(time_interp-time_forest)  
    
    approx_type["svr"]["error"].append(np.mean(svr["error"]))
    approx_type["svr"]["score"].append(np.mean(svr["score"]))
    approx_type["svr"]["scorevar"].append(svr_xval_score.std())
    approx_type["svr"]["time"].append(time_svr-time_interp)
    
    sim_data["seed"].append(seed)
    
    print("Seed {} complete with an interpolation score {}".format(seed, interp_score))

for t in approx_type:
    print("\n", t + " Stats: \n{}".format(pd.DataFrame(approx_type[t])))

Seed 0 complete with an interpolation score 0.3333333297070315
Seed 1 complete with an interpolation score 0.33333333213043564
Seed 2 complete with an interpolation score 0.3333333320148383
Seed 3 complete with an interpolation score 0.3333333310284215
Seed 4 complete with an interpolation score 0.33333333260305625
Seed 5 complete with an interpolation score 0.3333333322710807
Seed 6 complete with an interpolation score 0.3333333327852838
Seed 7 complete with an interpolation score 0.33333333082222144
Seed 8 complete with an interpolation score 0.3333333270223414
Seed 9 complete with an interpolation score 0.33333333211200294

 rf Stats: 
      score         error  scorevar      time
0  0.758167  1.780994e-05  0.011503  0.009306
1  0.752802  1.278122e-05  0.030502  0.007873
2  0.758595 -1.210594e-07  0.020379  0.009710
3  0.772863  1.192383e-06  0.011449  0.010403
4  0.760306 -2.893307e-06  0.013525  0.009726
5  0.756168 -4.463928e-06  0.017424  0.009519
6  0.759252  9.708441e-06  0.00

## plotting
.
.
.
.
.
.

In [None]:
# plotEnabled = True
# if plotEnabled:
#     fig = plt.figure()
#     ax_l = fig.add_subplot(1,1,1)
#     ax_l.set_xticks(frequency_coeff)
#     ax_l.set_xticklabels([round(coeff,3) for coeff in frequency_coeff], rotation = 60, ha="right")
    
#     left, = ax_l.plot(,
#                       results_avg_dataframe["r2_adj"].values, 
#                       'x', 
#                       label="$R^2$ Adjusted",
#                       markersize=10)
#     left2, = ax_l.plot(results_avg_dataframe["coeff"].values, 
#                        fit_fn(results_avg_dataframe["coeff"].values), 
#                        "-",
#                        label="$R^2$ Regression")  
#     ax_l.set_xlabel("Decaying Sinewave C oeffici       
#     ax_l.set_ylabel("$R^2$ Adjusted")
    
#     ax_r = fig.add_subplot(1,1,1, sharex=ax_l, frameon=False)
#     ax_r.yaxis.tick_right()
#     ax_r.yaxis.set_label_position("right")
#     ax_r.set_xticklabels([round(coeff,3) for coeff in frequency_coeff], rotation = 60, ha="right")
#     right, = ax_r.plot(results_avg_dataframe["coeff"].values, 
#                        results_avg_dataframe["variance"].values, 
#                        'o',
#                        label="Variance")
#     ax_r.set_ylabel("Variance")
    
#     plt.title("Approximation Performance With $n=M+1$ for a Family\n of Datasets With the Same $M$")
#     plt.legend(handles=[left, left2, right], loc=3) # loc moves legend to bottom right
#     plt.tight_layout()
# #     plt.savefig("r2adj_vs_sinecoeff.pdf")
#     plt.show()