### Linear regression and log regression
#### Can we predict the power generation for next couple of days? - this allows for better grid management
While analyzing data, do we see some patterns in power generation?

In [59]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn import utils
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from calculate_model_statistics import calculate_RMSE_MSE
#from matplotlib import pyplot
import scipy.stats

#### Data

In [60]:
train1 = pd.read_csv("ML2020-p26-main/Data/Train_Plant1_added_features.csv")
test1 = pd.read_csv("ML2020-p26-main/Data/Test_Plant1_added_features.csv")
#standardized
train1s = pd.read_csv("ML2020-p26-main/Data/Train_Plant1_added_features_standardized.csv")
test1s = pd.read_csv("ML2020-p26-main/Data/Test_Plant1_added_features_standardized.csv")
#min_max
train1m = pd.read_csv("ML2020-p26-main/Data/Train_Plant1_added_features_min_max.csv")
test1m = pd.read_csv("ML2020-p26-main/Data/Test_Plant1_added_features_min_max.csv")

train2 = pd.read_csv("ML2020-p26-main/Data/Train_Plant2_added_features.csv")
test2 = pd.read_csv("ML2020-p26-main/Data/Test_Plant2_added_features.csv")
#standardized
train2s = pd.read_csv("ML2020-p26-main/Data/Train_Plant2_added_features_standardized.csv")
test2s = pd.read_csv("ML2020-p26-main/Data/Test_Plant2_added_features_standardized.csv")

In [61]:
def create_features(in_data):
    in_data['DATE_TIME'] = pd.to_datetime(in_data['DATE_TIME'])
    in_data["DATE"] = in_data['DATE_TIME'].dt.date
    in_data["HOUR"] = in_data['DATE_TIME'].dt.hour
    in_data["MINUTE"] = in_data['DATE_TIME'].dt.minute
    in_data["DAY_OF_YEAR"] = in_data['DATE_TIME'].dt.dayofyear
    in_data["DAY_OF_WEEK"] = in_data['DATE_TIME'].dt.dayofweek
    in_data["MONTH"] = in_data['DATE_TIME'].dt.month
    in_data["DAY_OF_MONTH"] = in_data['DATE_TIME'].dt.day
    
create_features(train1)
create_features(test1)
create_features(train1s)
create_features(test1s)
create_features(train1m)
create_features(test1m)
    
create_features(train2)
create_features(test2)
create_features(train2s)
create_features(test2s)

In [None]:
# All the column names
columns = list(train1s.columns) 
columns = columns[2:]
#print(len(columns))
columns = [x for x in columns if x not in ['DATE_TIME','SOURCE_KEY','DATE','TIME'] ]
#print(len(columns))

##### Function to find the best features

In [None]:
import itertools
#all_features does not include 'DATE_TIME' and 'SOURCE_KEY'

alll_features = ['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION', 'PLANT_ID', 'DC_POWER', 'AC_POWER', 'DAILY_YIELD', 'DC_AVG_DIF_PER_DAY', 'AC_AVG_DIF_PER_DAY', 'DC_DIF_AVG_PER_MOMENT', 'AC_DIF_AVG_PER_MOMENT', 'HOUR', 'MINUTE', 'DAY_OF_YEAR', 'DAY_OF_WEEK', 'MONTH', 'DAY_OF_MONTH']
predict_column = 'DC_POWER'
all_features = [x for x in alll_features if x != predict_column]

def feature_combinations(all_features):
    all_combinations = []
    length = len(all_features)
    for i in range(1, length + 1):
        for combination in itertools.combinations(all_features, i):
            all_combinations.append(list(combination))
            #print(combination)
    return all_combinations 

# The above function returns a list of all possible combinations the given features
all_combinations = feature_combinations(all_features)

#### Linear regression

In [62]:
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [None]:
"""
train = train1s
test = test1s

np.random.seed(1111)

for i in range(len(all_combinations)):
    best_rmse = 100000 #or some other large nr
    best_rmse_features = []
    best_mae = 100000 #or some other large nr
    best_mae_features = []
    best_r2 = 0 #small nr means the model is bad
    best_r2_features = []
    train_features = all_combinations[i]
    predict_column = 'DC_POWER'
    
    # THE SPECIFIC MODEL
    lr = LinearRegression()
    lr.fit(train[train_features], train[[predict_column]])
    test_predictions = lr.predict(test[train_features])
    #
    
    # rmse
    rmse = mean_squared_error( test[[predict_column]] , test_predictions, squared=False)
    if rmse < best_rmse:
        best_rmse = rmse
        best_rmse_features = train_features
        
    # mae
    mae = mean_absolute_error( test[[predict_column]] , test_predictions )
    if mae < best_mae:
        best_mae = mae
        best_mae_features = train_features
        
    # r^2
    r2 = r2_score( test[[predict_column]] , test_predictions )
    if r2 > best_r2:
        best_r2 = r2
        best_r2_features = train_features
        
"""

In [64]:
"""
print(best_r2_features)
print(best_r2)
print(best_rmse_features)
print(best_rmse)
print(best_mae_features)
print(best_mae)
"""
print(train1s['DATE'].unique())

[datetime.date(2020, 5, 15) datetime.date(2020, 5, 16)
 datetime.date(2020, 5, 17) datetime.date(2020, 5, 18)
 datetime.date(2020, 5, 19) datetime.date(2020, 5, 20)
 datetime.date(2020, 5, 21) datetime.date(2020, 5, 22)
 datetime.date(2020, 5, 23) datetime.date(2020, 5, 24)
 datetime.date(2020, 5, 25) datetime.date(2020, 5, 26)
 datetime.date(2020, 5, 27) datetime.date(2020, 5, 28)
 datetime.date(2020, 5, 29) datetime.date(2020, 5, 30)
 datetime.date(2020, 5, 31) datetime.date(2020, 6, 1)
 datetime.date(2020, 6, 2) datetime.date(2020, 6, 3)
 datetime.date(2020, 6, 4) datetime.date(2020, 6, 5)
 datetime.date(2020, 6, 6) datetime.date(2020, 6, 7)
 datetime.date(2020, 6, 8) datetime.date(2020, 6, 9)]


In [None]:
all_features = ['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION', 'PLANT_ID', 'DC_POWER', 'AC_POWER', 
                'DAILY_YIELD', 'TOTAL_YIELD', 'ARTIFICIAL', 'DC_POWER_DIF_AVG_PER_ALL', 'AC_POWER_DIF_AVG_PER_ALL', 
                'DC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'AC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'DAILY_YIELD_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 'DAILY_YIELD_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'TOTAL_YIELD_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 'TOTAL_YIELD_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'AMBIENT_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'MODULE_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'IRRADIATION_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 'IRRADIATION_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'DC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_2', 'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'AC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_2', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'DAILY_YIELD_PER_INVERTER_MOMENT_SHIFT_MINUS_2', 'DAILY_YIELD_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'TOTAL_YIELD_PER_INVERTER_MOMENT_SHIFT_MINUS_2', 'TOTAL_YIELD_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'AMBIENT_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_2', 'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'MODULE_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_2', 'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'IRRADIATION_PER_INVERTER_MOMENT_SHIFT_MINUS_2', 'IRRADIATION_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'DC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_3', 'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'AC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_3', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'DAILY_YIELD_PER_INVERTER_MOMENT_SHIFT_MINUS_3', 'DAILY_YIELD_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'TOTAL_YIELD_PER_INVERTER_MOMENT_SHIFT_MINUS_3', 'TOTAL_YIELD_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'AMBIENT_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_3', 'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'MODULE_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_3', 'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'IRRADIATION_PER_INVERTER_MOMENT_SHIFT_MINUS_3', 'IRRADIATION_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'DC_DIF_AVG_PER_INVERTER_WHOLE_PERIOD', 'AC_DIF_AVG_PER_INVERTER_WHOLE_PERIOD', 'DC_DIF_AVG_PER_INVERTER_PER_DAY', 
                'AC_DIF_AVG_PER_INVERTER_PER_DAY', 'DC_AVG_DIF_PER_DAY', 'AC_AVG_DIF_PER_DAY', 'DC_DIF_AVG_PER_MOMENT', 
                'AC_DIF_AVG_PER_MOMENT', 'HOUR', 'MINUTE', 'DAY_OF_YEAR', 'DAY_OF_WEEK', 'MONTH', 'DAY_OF_MONTH']
len(all_features)

In [96]:
all_features1 = ['DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1',  
                'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2',   
                'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3']

len(all_features1)

10

In [97]:
train = train1s
test = test1s

# beginning
# ['HOUR','MINUTE','DAY_OF_YEAR','DAY_OF_WEEK','MONTH','DAY_OF_MONTH', 
# 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION']
# before
# ['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION', 'PLANT_ID', 'DC_POWER', 'AC_POWER', 
# 'DAILY_YIELD', 'DC_AVG_DIF_PER_DAY', 'AC_AVG_DIF_PER_DAY', 'DC_DIF_AVG_PER_MOMENT', 'AC_DIF_AVG_PER_MOMENT', 
# 'HOUR', 'MINUTE', 'DAY_OF_YEAR', 'DAY_OF_WEEK', 'MONTH', 'DAY_OF_MONTH']
# now (67 features - the predicted feature)


train_features = all_features1
predict_column = 'DC_POWER'
train_features = [x for x in train_features if x not in [predict_column] ]

np.random.seed(1111)
lr = LinearRegression()#normalize=True

# fit the model
lr.fit(train[train_features], train[[predict_column]])
#print(summary(lr.fit(train[train_features], train[[predict_column]])))

# learnt weights
for i in range(len(train_features)):
    print(f'[ {lr.coef_[0][i]},     {train_features[i]} ]')
print(f'[{lr.intercept_}]')

[ -85979.5483602746,     DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1 ]
[ 87260.93768781114,     AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1 ]
[ -988.9513171053977,     AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1 ]
[ 1493.4974491693922,     MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1 ]
[ -45301.48082234248,     DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2 ]
[ 45909.25543677684,     AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2 ]
[ 31462.014524111353,     DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3 ]
[ -31154.62646826987,     AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3 ]
[ -1320.3988593347,     AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3 ]
[ 1527.694588688621,     MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3 ]
[[3087.33100413]]


In [95]:
test_predictions = lr.predict(test[train_features])

# R^2 score
r2 = r2_score( test[[predict_column]] , test_predictions )
print(r2)

# RMSE
rmse = mean_squared_error( test[[predict_column]] , test_predictions, squared=False)
print(rmse)
# MAE
mae = mean_absolute_error( test[[predict_column]] , test_predictions )
print(mae)

0.8087136159533445
1603.832153877536
894.8372324475326


##### Daily and 15 min scores

In [None]:
'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3', 'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3'

In [98]:
#calculate_RMSE_MSE(test_data, predict_column, test_predictions)
train = train1s

train_features = ['DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1',  
                'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2',   
                'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_3']


#train_features = ['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE',
#                'DC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 
#                'AC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 
#                'AMBIENT_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_1',
#                'MODULE_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_1', 
#                'AC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_2',
#                'DC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_3',
#                'AC_POWER_PER_INVERTER_MOMENT_SHIFT_MINUS_3']

predict_column = 'DC_POWER'

lr = LinearRegression()
lr.fit(train[train_features], train[[predict_column]])

test_predictions = lr.predict(test1s[train_features])
test1 = test1s[['DAY_OF_YEAR','DC_POWER']].copy()

calculate_RMSE_MSE(test1, 'DC_POWER', test_predictions)

   DAY_OF_YEAR  DC_POWER_SUM  PREDICTED_DC_POWER_SUM
0          162  5.784040e+06            7.181942e+06
1          163  5.045679e+06            6.476474e+06
2          164  5.222354e+06            5.877386e+06
3          165  7.593776e+06            5.489943e+06
4          166  6.350495e+06            6.269846e+06
5          167  6.055536e+06            6.213408e+06
6          168  5.572501e+06            6.680883e+06
7          169  5.317780e+06            6.059745e+06
daily predictions:
 RMSE:1154715.0; MAE:959554.0; R2:-1.25209
individual measurements:
 RMSE:1583.0; MAE:879.0; R2:0.8137


In [None]:
'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_2', 'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_2', 'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3'

In [49]:
train = train2s

#'AMBIENT_TEMPERATURE','AMBIENT_TEMPERATURE_PER_INVERTER_MOMENT_SHIFT_MINUS_1'
train_features = ['DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_1', 
                'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'AMBIENT_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'MODULE_TEMPERATURE_PER_INVERTER_DAY_SHIFT_MINUS_2', 
                'DC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3', 
                'AC_POWER_PER_INVERTER_DAY_SHIFT_MINUS_3']

predict_column = 'DC_POWER'

lr = LinearRegression()
lr.fit(train[train_features], train[[predict_column]])

test_predictions = lr.predict(test2s[train_features])
test1 = test1s[['DAY_OF_YEAR','DC_POWER']].copy()

calculate_RMSE_MSE(test2, 'DC_POWER', test_predictions)

   DAY_OF_YEAR   DC_POWER_SUM  PREDICTED_DC_POWER_SUM
0          162  393589.770390           547289.878305
1          163  342752.854139           475413.596101
2          164  411233.862857           388800.203925
3          165  455305.790476           410535.787869
4          166  505911.451905           476915.730874
5          167  410998.909524           513485.802099
6          168  480810.068571           471319.237131
7          169  380211.240476           471052.421460
daily predictions:
 RMSE:89034.0; MAE:73172.0; R2:-2.06707
individual measurements:
 RMSE:186.0; MAE:111.0; R2:0.58398


#### Plant 1  (15min)

Used features: DC_POWER_DIF_AVG_PER_ALL, DAY_OF_YEAR, MONTH, DAY_OF_MONTH (AC_POWER,AC_POWER_DIF_AVG_PER_ALL)

RMSE: 767.48207281644

 MAE: 767.4820728164401

 R^2:   0.9477802249173171


#### Plant 2  (15min)

Used features: DC_POWER_DIF_AVG_PER_ALL, DAY_OF_YEAR, MONTH, DAY_OF_MONTH

RMSE: 26.82104543432361

MAE: 26.821045434323608

R^2:   0.991384737097839

##### Results before (faulty)

RMSE when predicting AC POWER in Plant 1 : 335.74 -> 48.61 ---> RMSE: 0.355   MAE: 0.267

RMSE when predicting DC POWER in Plant 1 : 3433.54 -> 495.67 ---> RMSE: 3.625   MAE: 2.725

RMSE when predicting AC POWER in Plant 2 : 289.39 -> 177.05 ---> RMSE: 0.251   MAE: 0.234

RMSE when predicting DC POWER in Plant 2 : 295.81 -> 180.86 ---> RMSE: 0.257   MAE: 0.240

#### Log regression

!  Not valid, because it's a classifier and we need to predict a variable with a continuous value  !

In [None]:
from sklearn.linear_model import LogisticRegression 
lab_enc = preprocessing.LabelEncoder()

In [None]:
# Trial dataset

#train22 = train2[:5000]
#test22 = test2[:1000]

#train22 = preprocessing.scale(train22)
#test22 = preprocessing.scale(test22)

In [None]:
train = train1[:9000]
test = test1

train_features = ['HOUR','MINUTE','DAY_OF_YEAR','DAY_OF_WEEK','MONTH','DAY_OF_MONTH']
predict_column = 'AC_POWER'

logr = LogisticRegression(solver='lbfgs', max_iter=1000)

train_pred = lab_enc.fit_transform(preprocessing.scale( train[predict_column] ))

# FIT
logr.fit(preprocessing.scale( train[train_features] ), train_pred)
# test22.loc['logr'] = logr.predict(test22[train_features])

test_pred = lab_enc.fit_transform(preprocessing.scale( test[predict_column] ))

print(f"Accuracy of LOG {logr.score(preprocessing.scale( test[train_features] ), test_pred )*100}%")

In [None]:
# RMSE
print(np.sqrt(metrics.mean_squared_error( test[[predict_column]] , test_pred )))

Accuracy of predicting AC POWER in Plant 1 : 69.34046345811052 % (with training data length 9000)

RMSE : 1339.85

Accuracy of predicting DC POWER in Plant 1 : 69.34046345811052 % (with training data length 9000)

RMSE : 2011.80

Accuracy of predicting AC POWER in Plant 2 : 50.32308377896613 % (with training data length 9000)

RMSE : 3193.82

Accuracy of predicting DC POWER in Plant 2 : 50.32308377896613 % (with training data length 9000)

RMSE : 3194.76